@@ -401,139 +401,147 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
401401 description = "Processing... " ,
402402 disable = self .disable_progressbar ,
403403 ):
404- ncount += 1
405- if ncount <= self .skip :
406- continue
407- elif ncount == self .skip + 1 :
408- _LOGGER .info (f"Skipped { self .skip } accessions. Starting now." )
409-
410- if not self .just_object or not self .acc_anno :
411- _LOGGER .info (
412- f"\033 [38;5;200mProcessing accession { ncount } of { nkeys } : '{ acc_GSE } '\033 [0m"
413- )
414-
415- if len (re .findall (GSE_PATTERN , acc_GSE )) != 1 :
416- _LOGGER .debug (len (re .findall (GSE_PATTERN , acc_GSE )))
417- _LOGGER .warning (
418- "This does not appear to be a correctly formatted GSE accession! "
419- "Continue anyway..."
420- )
404+ try :
405+ ncount += 1
406+ if ncount <= self .skip :
407+ continue
408+ elif ncount == self .skip + 1 :
409+ _LOGGER .info (f"Skipped { self .skip } accessions. Starting now." )
421410
422- if len (acc_GSE_list [acc_GSE ]) > 0 :
423- _LOGGER .info (
424- f"Limit to: { list (acc_GSE_list [acc_GSE ])} "
425- ) # a list of GSM#s
426-
427- # For each GSE acc, produce a series of metadata files
428- file_gse = os .path .join (self .metadata_expanded , acc_GSE + "_GSE.soft" )
429- file_gsm = os .path .join (self .metadata_expanded , acc_GSE + "_GSM.soft" )
430- file_sra = os .path .join (self .metadata_expanded , acc_GSE + "_SRA.csv" )
431-
432- if not os .path .isfile (file_gse ) or self .refresh_metadata :
433- file_gse_content = Accession (acc_GSE ).fetch_metadata (
434- file_gse ,
435- clean = self .discard_soft ,
436- max_soft_size = self .max_soft_size ,
437- )
438- else :
439- _LOGGER .info (f"Found previous GSE file: { file_gse } " )
440- gse_file_obj = open (file_gse , "r" )
441- file_gse_content = gse_file_obj .read ().split ("\n " )
442- file_gse_content = [elem for elem in file_gse_content if len (elem ) > 0 ]
443-
444- file_gse_content_dict = gse_content_to_dict (file_gse_content )
445-
446- if not os .path .isfile (file_gsm ) or self .refresh_metadata :
447- file_gsm_content = Accession (acc_GSE ).fetch_metadata (
448- file_gsm ,
449- typename = "GSM" ,
450- clean = self .discard_soft ,
451- max_soft_size = self .max_soft_size ,
452- )
453- else :
454- _LOGGER .info (f"Found previous GSM file: { file_gsm } " )
455- gsm_file_obj = open (file_gsm , "r" )
456- file_gsm_content = gsm_file_obj .read ().split ("\n " )
457- file_gsm_content = [elem for elem in file_gsm_content if len (elem ) > 0 ]
458-
459- gsm_enter_dict = acc_GSE_list [acc_GSE ]
460-
461- # download processed data
462- if self .processed :
463- (
464- meta_processed_samples ,
465- meta_processed_series ,
466- ) = self .fetch_processed_one (
467- gse_file_content = file_gse_content ,
468- gsm_file_content = file_gsm_content ,
469- gsm_filter_list = gsm_enter_dict ,
470- )
411+ if not self .just_object or not self .acc_anno :
412+ _LOGGER .info (
413+ f"\033 [38;5;200mProcessing accession { ncount } of { nkeys } : '{ acc_GSE } '\033 [0m"
414+ )
471415
472- # download processed files:
473- if not self .just_metadata :
474- self ._download_processed_data (
475- acc_gse = acc_GSE ,
476- meta_processed_samples = meta_processed_samples ,
477- meta_processed_series = meta_processed_series ,
416+ if len (re .findall (GSE_PATTERN , acc_GSE )) != 1 :
417+ _LOGGER .debug (len (re .findall (GSE_PATTERN , acc_GSE )))
418+ _LOGGER .warning (
419+ "This does not appear to be a correctly formatted GSE accession! "
420+ "Continue anyway..."
478421 )
479422
480- # generating PEPs for processed files:
481- if self .acc_anno :
482- self ._generate_processed_meta (
483- acc_GSE ,
423+ if len (acc_GSE_list [acc_GSE ]) > 0 :
424+ _LOGGER .info (
425+ f"Limit to: { list (acc_GSE_list [acc_GSE ])} "
426+ ) # a list of GSM#s
427+
428+ # For each GSE acc, produce a series of metadata files
429+ file_gse = os .path .join (self .metadata_expanded , acc_GSE + "_GSE.soft" )
430+ file_gsm = os .path .join (self .metadata_expanded , acc_GSE + "_GSM.soft" )
431+ file_sra = os .path .join (self .metadata_expanded , acc_GSE + "_SRA.csv" )
432+
433+ if not os .path .isfile (file_gse ) or self .refresh_metadata :
434+ file_gse_content = Accession (acc_GSE ).fetch_metadata (
435+ file_gse ,
436+ clean = self .discard_soft ,
437+ max_soft_size = self .max_soft_size ,
438+ )
439+ else :
440+ _LOGGER .info (f"Found previous GSE file: { file_gse } " )
441+ with open (file_gse , "r" ) as gse_file_obj :
442+ file_gse_content = gse_file_obj .read ().split ("\n " )
443+ file_gse_content = [
444+ elem for elem in file_gse_content if len (elem ) > 0
445+ ]
446+
447+ file_gse_content_dict = gse_content_to_dict (file_gse_content )
448+
449+ if not os .path .isfile (file_gsm ) or self .refresh_metadata :
450+ file_gsm_content = Accession (acc_GSE ).fetch_metadata (
451+ file_gsm ,
452+ typename = "GSM" ,
453+ clean = self .discard_soft ,
454+ max_soft_size = self .max_soft_size ,
455+ )
456+ else :
457+ _LOGGER .info (f"Found previous GSM file: { file_gsm } " )
458+ with open (file_gsm , "r" ) as gsm_file_obj :
459+ file_gsm_content = gsm_file_obj .read ().split ("\n " )
460+ file_gsm_content = [
461+ elem for elem in file_gsm_content if len (elem ) > 0
462+ ]
463+
464+ gsm_enter_dict = acc_GSE_list [acc_GSE ]
465+
466+ # download processed data
467+ if self .processed :
468+ (
484469 meta_processed_samples ,
485470 meta_processed_series ,
486- gse_meta_dict = file_gse_content_dict ,
471+ ) = self .fetch_processed_one (
472+ gse_file_content = file_gse_content ,
473+ gsm_file_content = file_gsm_content ,
474+ gsm_filter_list = gsm_enter_dict ,
487475 )
488476
489- else :
490- # adding metadata from current experiment to the project
491- processed_metadata_samples .extend (meta_processed_samples )
492- processed_metadata_series .extend (meta_processed_series )
477+ # download processed files:
478+ if not self .just_metadata :
479+ self ._download_processed_data (
480+ acc_gse = acc_GSE ,
481+ meta_processed_samples = meta_processed_samples ,
482+ meta_processed_series = meta_processed_series ,
483+ )
493484
494- else :
495- # read gsm metadata
496- gsm_metadata = self ._read_gsm_metadata (
497- acc_GSE , acc_GSE_list , file_gsm_content
498- )
485+ # generating PEPs for processed files:
486+ if self .acc_anno :
487+ self ._generate_processed_meta (
488+ acc_GSE ,
489+ meta_processed_samples ,
490+ meta_processed_series ,
491+ gse_meta_dict = file_gse_content_dict ,
492+ )
499493
500- # download sra metadata
501- srp_list_result = self ._get_SRA_meta (
502- file_gse_content , gsm_metadata , file_sra
503- )
504- if not srp_list_result :
505- _LOGGER .info ("No SRP data, continuing ...." )
506- _LOGGER .warning ("No raw pep will be created! ...." )
507- # delete current acc if no raw data was found
508- # del metadata_dict[acc_GSE]
509- pass
510- else :
511- _LOGGER .info ("Parsing SRA file to download SRR records" )
512- gsm_multi_table , gsm_metadata , runs = self ._process_sra_meta (
513- srp_list_result , gsm_enter_dict , gsm_metadata
514- )
494+ else :
495+ # adding metadata from current experiment to the project
496+ processed_metadata_samples .extend (meta_processed_samples )
497+ processed_metadata_series .extend (meta_processed_series )
515498
516- # download raw data:
517- if not self .just_metadata :
518- for run in runs :
519- # download raw data
520- _LOGGER .info (f"Getting SRR: { run } in ({ acc_GSE } )" )
521- self ._download_raw_data (run )
522499 else :
523- _LOGGER .info ("Dry run, no data will be downloaded" )
524-
525- # save one project
526- if self .acc_anno and nkeys > 1 :
527- self ._write_raw_annotation_new (
528- name = acc_GSE ,
529- metadata_dict = gsm_metadata ,
530- subannot_dict = gsm_multi_table ,
531- gse_meta_dict = file_gse_content_dict ,
500+ # read gsm metadata
501+ gsm_metadata = self ._read_gsm_metadata (
502+ acc_GSE , acc_GSE_list , file_gsm_content
532503 )
533504
534- else :
535- metadata_dict_combined .update (gsm_metadata )
536- subannotation_dict_combined .update (gsm_multi_table )
505+ # download sra metadata
506+ srp_list_result = self ._get_SRA_meta (
507+ file_gse_content , gsm_metadata , file_sra
508+ )
509+ if not srp_list_result :
510+ _LOGGER .info ("No SRP data, continuing ...." )
511+ _LOGGER .warning ("No raw pep will be created! ...." )
512+ # delete current acc if no raw data was found
513+ # del metadata_dict[acc_GSE]
514+ pass
515+ else :
516+ _LOGGER .info ("Parsing SRA file to download SRR records" )
517+ gsm_multi_table , gsm_metadata , runs = self ._process_sra_meta (
518+ srp_list_result , gsm_enter_dict , gsm_metadata
519+ )
520+
521+ # download raw data:
522+ if not self .just_metadata :
523+ for run in runs :
524+ # download raw data
525+ _LOGGER .info (f"Getting SRR: { run } in ({ acc_GSE } )" )
526+ self ._download_raw_data (run )
527+ else :
528+ _LOGGER .info ("Dry run, no data will be downloaded" )
529+
530+ # save one project
531+ if self .acc_anno and nkeys > 1 :
532+ self ._write_raw_annotation_new (
533+ name = acc_GSE ,
534+ metadata_dict = gsm_metadata ,
535+ subannot_dict = gsm_multi_table ,
536+ gse_meta_dict = file_gse_content_dict ,
537+ )
538+
539+ else :
540+ metadata_dict_combined .update (gsm_metadata )
541+ subannotation_dict_combined .update (gsm_multi_table )
542+ except Exception as e :
543+ _LOGGER .warning (f"Couldn't process { acc_GSE } : { e } " , exc_info = True )
544+ continue
537545
538546 _LOGGER .info (f"Finished processing { len (acc_GSE_list )} accession(s)" )
539547
@@ -802,7 +810,7 @@ def _generate_processed_meta(
802810 f"{ name } _series" ,
803811 name + EXP_SUPP_METADATA_FILE ,
804812 )
805- self ._write_processed_annotation (
813+ return_objects [ f" { name } _series" ] = self ._write_processed_annotation (
806814 meta_processed_series ,
807815 pep_acc_path_exp ,
808816 just_object = self .just_object ,
0 commit comments