Skip to content

Commit ffbd29c

Browse files
Merge pull request #148 from pepkit/dev
Release 0.12.9
2 parents 58bb467 + edbcca2 commit ffbd29c

File tree

3 files changed

+130
-122
lines changed

3 files changed

+130
-122
lines changed

.github/workflows/run-pytest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ${{ matrix.os }}
1212
strategy:
1313
matrix:
14-
python-version: ["3.8", "3.12"]
14+
python-version: ["3.9", "3.13"]
1515
os: [ubuntu-latest]
1616

1717
steps:

geofetch/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.12.8"
1+
__version__ = "0.12.9"

geofetch/geofetch.py

Lines changed: 128 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -401,139 +401,147 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
401401
description="Processing... ",
402402
disable=self.disable_progressbar,
403403
):
404-
ncount += 1
405-
if ncount <= self.skip:
406-
continue
407-
elif ncount == self.skip + 1:
408-
_LOGGER.info(f"Skipped {self.skip} accessions. Starting now.")
409-
410-
if not self.just_object or not self.acc_anno:
411-
_LOGGER.info(
412-
f"\033[38;5;200mProcessing accession {ncount} of {nkeys}: '{acc_GSE}'\033[0m"
413-
)
414-
415-
if len(re.findall(GSE_PATTERN, acc_GSE)) != 1:
416-
_LOGGER.debug(len(re.findall(GSE_PATTERN, acc_GSE)))
417-
_LOGGER.warning(
418-
"This does not appear to be a correctly formatted GSE accession! "
419-
"Continue anyway..."
420-
)
404+
try:
405+
ncount += 1
406+
if ncount <= self.skip:
407+
continue
408+
elif ncount == self.skip + 1:
409+
_LOGGER.info(f"Skipped {self.skip} accessions. Starting now.")
421410

422-
if len(acc_GSE_list[acc_GSE]) > 0:
423-
_LOGGER.info(
424-
f"Limit to: {list(acc_GSE_list[acc_GSE])}"
425-
) # a list of GSM#s
426-
427-
# For each GSE acc, produce a series of metadata files
428-
file_gse = os.path.join(self.metadata_expanded, acc_GSE + "_GSE.soft")
429-
file_gsm = os.path.join(self.metadata_expanded, acc_GSE + "_GSM.soft")
430-
file_sra = os.path.join(self.metadata_expanded, acc_GSE + "_SRA.csv")
431-
432-
if not os.path.isfile(file_gse) or self.refresh_metadata:
433-
file_gse_content = Accession(acc_GSE).fetch_metadata(
434-
file_gse,
435-
clean=self.discard_soft,
436-
max_soft_size=self.max_soft_size,
437-
)
438-
else:
439-
_LOGGER.info(f"Found previous GSE file: {file_gse}")
440-
gse_file_obj = open(file_gse, "r")
441-
file_gse_content = gse_file_obj.read().split("\n")
442-
file_gse_content = [elem for elem in file_gse_content if len(elem) > 0]
443-
444-
file_gse_content_dict = gse_content_to_dict(file_gse_content)
445-
446-
if not os.path.isfile(file_gsm) or self.refresh_metadata:
447-
file_gsm_content = Accession(acc_GSE).fetch_metadata(
448-
file_gsm,
449-
typename="GSM",
450-
clean=self.discard_soft,
451-
max_soft_size=self.max_soft_size,
452-
)
453-
else:
454-
_LOGGER.info(f"Found previous GSM file: {file_gsm}")
455-
gsm_file_obj = open(file_gsm, "r")
456-
file_gsm_content = gsm_file_obj.read().split("\n")
457-
file_gsm_content = [elem for elem in file_gsm_content if len(elem) > 0]
458-
459-
gsm_enter_dict = acc_GSE_list[acc_GSE]
460-
461-
# download processed data
462-
if self.processed:
463-
(
464-
meta_processed_samples,
465-
meta_processed_series,
466-
) = self.fetch_processed_one(
467-
gse_file_content=file_gse_content,
468-
gsm_file_content=file_gsm_content,
469-
gsm_filter_list=gsm_enter_dict,
470-
)
411+
if not self.just_object or not self.acc_anno:
412+
_LOGGER.info(
413+
f"\033[38;5;200mProcessing accession {ncount} of {nkeys}: '{acc_GSE}'\033[0m"
414+
)
471415

472-
# download processed files:
473-
if not self.just_metadata:
474-
self._download_processed_data(
475-
acc_gse=acc_GSE,
476-
meta_processed_samples=meta_processed_samples,
477-
meta_processed_series=meta_processed_series,
416+
if len(re.findall(GSE_PATTERN, acc_GSE)) != 1:
417+
_LOGGER.debug(len(re.findall(GSE_PATTERN, acc_GSE)))
418+
_LOGGER.warning(
419+
"This does not appear to be a correctly formatted GSE accession! "
420+
"Continue anyway..."
478421
)
479422

480-
# generating PEPs for processed files:
481-
if self.acc_anno:
482-
self._generate_processed_meta(
483-
acc_GSE,
423+
if len(acc_GSE_list[acc_GSE]) > 0:
424+
_LOGGER.info(
425+
f"Limit to: {list(acc_GSE_list[acc_GSE])}"
426+
) # a list of GSM#s
427+
428+
# For each GSE acc, produce a series of metadata files
429+
file_gse = os.path.join(self.metadata_expanded, acc_GSE + "_GSE.soft")
430+
file_gsm = os.path.join(self.metadata_expanded, acc_GSE + "_GSM.soft")
431+
file_sra = os.path.join(self.metadata_expanded, acc_GSE + "_SRA.csv")
432+
433+
if not os.path.isfile(file_gse) or self.refresh_metadata:
434+
file_gse_content = Accession(acc_GSE).fetch_metadata(
435+
file_gse,
436+
clean=self.discard_soft,
437+
max_soft_size=self.max_soft_size,
438+
)
439+
else:
440+
_LOGGER.info(f"Found previous GSE file: {file_gse}")
441+
with open(file_gse, "r") as gse_file_obj:
442+
file_gse_content = gse_file_obj.read().split("\n")
443+
file_gse_content = [
444+
elem for elem in file_gse_content if len(elem) > 0
445+
]
446+
447+
file_gse_content_dict = gse_content_to_dict(file_gse_content)
448+
449+
if not os.path.isfile(file_gsm) or self.refresh_metadata:
450+
file_gsm_content = Accession(acc_GSE).fetch_metadata(
451+
file_gsm,
452+
typename="GSM",
453+
clean=self.discard_soft,
454+
max_soft_size=self.max_soft_size,
455+
)
456+
else:
457+
_LOGGER.info(f"Found previous GSM file: {file_gsm}")
458+
with open(file_gsm, "r") as gsm_file_obj:
459+
file_gsm_content = gsm_file_obj.read().split("\n")
460+
file_gsm_content = [
461+
elem for elem in file_gsm_content if len(elem) > 0
462+
]
463+
464+
gsm_enter_dict = acc_GSE_list[acc_GSE]
465+
466+
# download processed data
467+
if self.processed:
468+
(
484469
meta_processed_samples,
485470
meta_processed_series,
486-
gse_meta_dict=file_gse_content_dict,
471+
) = self.fetch_processed_one(
472+
gse_file_content=file_gse_content,
473+
gsm_file_content=file_gsm_content,
474+
gsm_filter_list=gsm_enter_dict,
487475
)
488476

489-
else:
490-
# adding metadata from current experiment to the project
491-
processed_metadata_samples.extend(meta_processed_samples)
492-
processed_metadata_series.extend(meta_processed_series)
477+
# download processed files:
478+
if not self.just_metadata:
479+
self._download_processed_data(
480+
acc_gse=acc_GSE,
481+
meta_processed_samples=meta_processed_samples,
482+
meta_processed_series=meta_processed_series,
483+
)
493484

494-
else:
495-
# read gsm metadata
496-
gsm_metadata = self._read_gsm_metadata(
497-
acc_GSE, acc_GSE_list, file_gsm_content
498-
)
485+
# generating PEPs for processed files:
486+
if self.acc_anno:
487+
self._generate_processed_meta(
488+
acc_GSE,
489+
meta_processed_samples,
490+
meta_processed_series,
491+
gse_meta_dict=file_gse_content_dict,
492+
)
499493

500-
# download sra metadata
501-
srp_list_result = self._get_SRA_meta(
502-
file_gse_content, gsm_metadata, file_sra
503-
)
504-
if not srp_list_result:
505-
_LOGGER.info("No SRP data, continuing ....")
506-
_LOGGER.warning("No raw pep will be created! ....")
507-
# delete current acc if no raw data was found
508-
# del metadata_dict[acc_GSE]
509-
pass
510-
else:
511-
_LOGGER.info("Parsing SRA file to download SRR records")
512-
gsm_multi_table, gsm_metadata, runs = self._process_sra_meta(
513-
srp_list_result, gsm_enter_dict, gsm_metadata
514-
)
494+
else:
495+
# adding metadata from current experiment to the project
496+
processed_metadata_samples.extend(meta_processed_samples)
497+
processed_metadata_series.extend(meta_processed_series)
515498

516-
# download raw data:
517-
if not self.just_metadata:
518-
for run in runs:
519-
# download raw data
520-
_LOGGER.info(f"Getting SRR: {run} in ({acc_GSE})")
521-
self._download_raw_data(run)
522499
else:
523-
_LOGGER.info("Dry run, no data will be downloaded")
524-
525-
# save one project
526-
if self.acc_anno and nkeys > 1:
527-
self._write_raw_annotation_new(
528-
name=acc_GSE,
529-
metadata_dict=gsm_metadata,
530-
subannot_dict=gsm_multi_table,
531-
gse_meta_dict=file_gse_content_dict,
500+
# read gsm metadata
501+
gsm_metadata = self._read_gsm_metadata(
502+
acc_GSE, acc_GSE_list, file_gsm_content
532503
)
533504

534-
else:
535-
metadata_dict_combined.update(gsm_metadata)
536-
subannotation_dict_combined.update(gsm_multi_table)
505+
# download sra metadata
506+
srp_list_result = self._get_SRA_meta(
507+
file_gse_content, gsm_metadata, file_sra
508+
)
509+
if not srp_list_result:
510+
_LOGGER.info("No SRP data, continuing ....")
511+
_LOGGER.warning("No raw pep will be created! ....")
512+
# delete current acc if no raw data was found
513+
# del metadata_dict[acc_GSE]
514+
pass
515+
else:
516+
_LOGGER.info("Parsing SRA file to download SRR records")
517+
gsm_multi_table, gsm_metadata, runs = self._process_sra_meta(
518+
srp_list_result, gsm_enter_dict, gsm_metadata
519+
)
520+
521+
# download raw data:
522+
if not self.just_metadata:
523+
for run in runs:
524+
# download raw data
525+
_LOGGER.info(f"Getting SRR: {run} in ({acc_GSE})")
526+
self._download_raw_data(run)
527+
else:
528+
_LOGGER.info("Dry run, no data will be downloaded")
529+
530+
# save one project
531+
if self.acc_anno and nkeys > 1:
532+
self._write_raw_annotation_new(
533+
name=acc_GSE,
534+
metadata_dict=gsm_metadata,
535+
subannot_dict=gsm_multi_table,
536+
gse_meta_dict=file_gse_content_dict,
537+
)
538+
539+
else:
540+
metadata_dict_combined.update(gsm_metadata)
541+
subannotation_dict_combined.update(gsm_multi_table)
542+
except Exception as e:
543+
_LOGGER.warning(f"Couldn't process {acc_GSE}: {e}", exc_info=True)
544+
continue
537545

538546
_LOGGER.info(f"Finished processing {len(acc_GSE_list)} accession(s)")
539547

@@ -802,7 +810,7 @@ def _generate_processed_meta(
802810
f"{name}_series",
803811
name + EXP_SUPP_METADATA_FILE,
804812
)
805-
self._write_processed_annotation(
813+
return_objects[f"{name}_series"] = self._write_processed_annotation(
806814
meta_processed_series,
807815
pep_acc_path_exp,
808816
just_object=self.just_object,

0 commit comments

Comments
 (0)