Skip to content

Commit f2dff6d

Browse files
committed
fixed sra convert functionality
1 parent 9ee1df2 commit f2dff6d

15 files changed

+99
-71
lines changed

.looper.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
pep_config: /home/bnt4me/virginia/repos/geofetch/red_algae/GSE67303_PEP/GSE67303_PEP.yaml
2+
output_dir: /home/bnt4me/virginia/repos/geofetch/red_algae/GSE67303_PEP
3+
pipeline_interfaces:
4+
- /home/bnt4me/virginia/repos/geofetch/pipeline_interface_convert.yaml

MANIFEST.in

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
include requirements/*
22
include README.md
33
include docs/img/geofetch_logo.svg
4-
include geofetch/config_template.yaml
5-
include geofetch/config_processed_template.yaml
6-
include geofetch/looper_sra_convert.yaml
4+
include geofetch/templates/*
5+
include geofetch/templates/config_template.yaml
6+
include geofetch/templates/config_processed_template.yaml
7+
include geofetch/templates/looper_sra_convert.yaml
8+
include geofetch/templates/looper_config_template.yaml
9+
include geofetch/templates/pipeline_interface_convert.yaml

geofetch/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.12.7"
1+
__version__ = "0.12.8"

geofetch/const.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,14 @@
4444

4545
NEW_GENOME_COL_NAME = "ref_genome"
4646

47+
TEMPLATES_DIR = "templates"
4748
CONFIG_PROCESSED_TEMPLATE_NAME = "config_processed_template.yaml"
4849
CONFIG_RAW_TEMPLATE_NAME = "config_template.yaml"
49-
CONFIG_SRA_TEMPLATE = "looper_sra_convert.yaml"
50+
CONFIG_SRA_TEMPLATE_NAME = "looper_sra_convert.yaml"
51+
PIPELINE_INTERFACE_CONVERT_TEMPLATE_NAME = "pipeline_interface_convert.yaml"
52+
LOOPER_SRA_CONVERT = "looper_config_template.yaml"
53+
# SRA_CONVERT_SCHEMA_NAME = "sra_convert_schema.yaml"
54+
# RESOURCES_NAME = "resources.tsv"
5055

5156
# const for Finder:
5257
RETMAX = 10000000 # once it should be increased
@@ -63,3 +68,5 @@
6368
'+AND+("{start_date}"[Publication%20Date]%20:%20"{end_date}"[Publication%20Date])'
6469
)
6570
THREE_MONTH_FILTER = '+AND+"published+last+3+months"[Filter]'
71+
72+
LOOPER_CONFIG_FILE_NAME = "looper_config.yaml"

geofetch/geofetch.py

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from geofetch.const import (
2121
CONFIG_PROCESSED_TEMPLATE_NAME,
2222
CONFIG_RAW_TEMPLATE_NAME,
23-
CONFIG_SRA_TEMPLATE,
23+
CONFIG_SRA_TEMPLATE_NAME,
2424
EXP_SUPP_METADATA_FILE,
2525
EXPERIMENT_PATTERN,
2626
FILE_RAW_NAME_SAMPLE_PATTERN,
@@ -34,6 +34,10 @@
3434
SAMPLE_SUPP_METADATA_FILE,
3535
SER_SUPP_FILE_PATTERN,
3636
SUPP_FILE_PATTERN,
37+
TEMPLATES_DIR,
38+
PIPELINE_INTERFACE_CONVERT_TEMPLATE_NAME,
39+
LOOPER_SRA_CONVERT,
40+
LOOPER_CONFIG_FILE_NAME,
3741
)
3842
from geofetch.utils import (
3943
Accession,
@@ -1171,11 +1175,43 @@ def _write_raw_annotation_new(
11711175
if len(subannot_dict) > 0:
11721176
self._write_subannotation(subannot_dict, proj_root_subsample)
11731177

1174-
self._write(proj_root_yaml, template, msg_pre=" Config file: ")
1178+
self._write(proj_root_yaml, template, msg_pre="Config file: ")
11751179

11761180
if self.add_dotfile:
11771181
_create_dot_yaml(dot_yaml_path, yaml_name)
11781182

1183+
if self.add_convert_modifier:
1184+
geofetchdir = os.path.dirname(__file__)
1185+
pipeline_interface_convert_path = os.path.join(
1186+
geofetchdir, TEMPLATES_DIR, PIPELINE_INTERFACE_CONVERT_TEMPLATE_NAME
1187+
)
1188+
1189+
looper_config_template_path = os.path.join(
1190+
geofetchdir, TEMPLATES_DIR, LOOPER_SRA_CONVERT
1191+
)
1192+
1193+
with open(looper_config_template_path, "r") as template_file:
1194+
template_looper = template_file.read()
1195+
1196+
template_values = {
1197+
"pep_config": proj_root_yaml,
1198+
"output_dir": os.path.join(self.metadata_root_full, "output_dir"),
1199+
"pipeline_interface_convert": pipeline_interface_convert_path,
1200+
}
1201+
1202+
for k, v in template_values.items():
1203+
placeholder = "{" + str(k) + "}"
1204+
template_looper = template_looper.replace(placeholder, str(v))
1205+
1206+
looper_config_file = os.path.join(
1207+
self.metadata_root_full,
1208+
LOOPER_CONFIG_FILE_NAME,
1209+
)
1210+
1211+
self._write(
1212+
looper_config_file, template_looper, msg_pre="Looper config file: "
1213+
)
1214+
11791215
else:
11801216
meta_df = pd.DataFrame.from_dict(metadata_dict, orient="index")
11811217

@@ -1214,8 +1250,11 @@ def _create_config_processed(
12141250
:param meta_in_series:
12151251
:return: generated, complete config file content
12161252
"""
1253+
12171254
geofetchdir = os.path.dirname(__file__)
1218-
config_template = os.path.join(geofetchdir, CONFIG_PROCESSED_TEMPLATE_NAME)
1255+
config_template = os.path.join(
1256+
geofetchdir, TEMPLATES_DIR, CONFIG_PROCESSED_TEMPLATE_NAME
1257+
)
12191258
with open(config_template, "r") as template_file:
12201259
template = template_file.read()
12211260
meta_list_str = [
@@ -1270,9 +1309,13 @@ def _create_config_raw(
12701309
else:
12711310
sample_modifier_str = ""
12721311
if not self.config_template:
1273-
self.config_template = os.path.join(geofetchdir, CONFIG_RAW_TEMPLATE_NAME)
1312+
self.config_template = os.path.join(
1313+
geofetchdir, TEMPLATES_DIR, CONFIG_RAW_TEMPLATE_NAME
1314+
)
12741315
if self.add_convert_modifier:
1275-
sra_convert_path = os.path.join(geofetchdir, CONFIG_SRA_TEMPLATE)
1316+
sra_convert_path = os.path.join(
1317+
geofetchdir, TEMPLATES_DIR, CONFIG_SRA_TEMPLATE_NAME
1318+
)
12761319
with open(sra_convert_path, "r") as template_file:
12771320
sra_convert_template = template_file.read()
12781321
else:
@@ -1301,6 +1344,7 @@ def _create_config_raw(
13011344
for k, v in template_values.items():
13021345
placeholder = "{" + str(k) + "}"
13031346
template = template.replace(placeholder, str(v))
1347+
13041348
return template
13051349

13061350
@staticmethod
File renamed without changes.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
pep_config: {pep_config}
2+
output_dir: {output_dir}
3+
pipeline_interfaces:
4+
- {pipeline_interface_convert}

geofetch/looper_sra_convert.yaml renamed to geofetch/templates/looper_sra_convert.yaml

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
derive:
55
attributes: [read1, read2, SRR_files]
66
sources:
7-
SRA: "${SRABAM}/{srr}.bam"
7+
SRA: "${SRARAW}/{srr}/{srr}.sra"
88
FQ: "${SRAFQ}/{srr}.fastq.gz"
99
FQ1: "${SRAFQ}/{srr}_1.fastq.gz"
1010
FQ2: "${SRAFQ}/{srr}_2.fastq.gz"
@@ -26,20 +26,3 @@
2626
read_type: "SINGLE"
2727
then:
2828
read1: FQ1
29-
30-
project_modifiers:
31-
amend:
32-
sra_convert:
33-
looper:
34-
results_subdir: sra_convert_results
35-
sample_modifiers:
36-
append:
37-
SRR_files: SRA
38-
pipeline_interfaces: ${CODE}/geofetch/pipeline_interface_convert.yaml
39-
derive:
40-
attributes: [read1, read2, SRR_files]
41-
sources:
42-
SRA: "${SRARAW}/{srr}/{srr}.sra"
43-
FQ: "${SRAFQ}/{srr}.fastq.gz"
44-
FQ1: "${SRAFQ}/{srr}_1.fastq.gz"
45-
FQ2: "${SRAFQ}/{srr}_2.fastq.gz"
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
pipeline_name: sra_convert
2+
path: sraconvert
3+
input_schema: ./sra_convert_schema.yaml
4+
sample_interface:
5+
command_template: >
6+
{pipeline.path} --srr {sample.SRR_files}
7+
{% if sample.SRX is defined %} --sample-name {sample.SRX} {% endif %}
8+
{% if project.fqfolder is defined %} --fqfolder {project.fqfolder} {% endif %}
9+
-O {looper.results_subdir}
10+
compute:
11+
bulker_crate: databio/sra_convert
12+
size_dependent_variables: ./resources.tsv

0 commit comments

Comments
 (0)