1+ def get_fast5_path (wildcards ):
2+ return SAMPLESDF ["path_to_fast5_parent_folder" ][wildcards .sample ]
3+
4+
15rule guppy :
26 input :
3- fast5path = samplesdf [ 'path_to_fast5_parent_folder' ][{ sample }]
7+ fast5path = get_fast5_path
48 output :
5- outfastq = join (workdir ,fastqs ,"{sample}.fastq.gz" )
6- params :
7- flowcell = config ['flowcell' ],
8- kit = config ['kit' ],
9- sample = {sample }
10- envmodules : tools ['guppy' ]['version' ]
11- shell :"""
9+ outfastq = join (WORKDIR ,"fastqs" ,"{sample}.fastq.gz" ),
10+ sequencing_summary = join (WORKDIR ,"fastqs" ,"{sample}.sequencing_summary.txt.gz" )
11+ params :
12+ flowcell = config ["flowcell" ],
13+ kit = config ["kit" ],
14+ sample = "{sample}"
15+ envmodules : TOOLS ["guppy" ]["version" ], TOOLS ["pigz" ]["version" ]
16+ log : join (WORKDIR ,"logs" ,"{sample}.guppy.log" )
17+ shell :"""
18+ guppy_basecaller --print_workflows 2>&1 | tee -a {log}
1219guppy_basecaller \
13- --input_path {input.fast5path} \
14- --recursive \
15- --flowcell {params.flowcell} \
16- --kit {params.kit} \
17- -x cuda:all \
18- --records_per_fastq 0 \
19- --save_path /lscratch/$SLURM_JOBID/{params.sample}
20- find /lscratch/$SLURM_JOBID/{params.sample} -name "*.fastq" -exec cat {} \; \
21- | gzip -n - > {output.outfastq}
20+ --input_path {input.fast5path} \
21+ --recursive \
22+ --flowcell {params.flowcell} \
23+ --kit {params.kit} \
24+ -x cuda:all \
25+ --records_per_fastq 0 \
26+ --save_path /lscratch/$SLURM_JOBID/{params.sample} 2>&1 |tee -a {log}
27+ find /lscratch/$SLURM_JOBID/{params.sample} -name "*.fastq" -exec cat {{}} \; \
28+ | gzip -n -> {output.outfastq} 2>&1 |tee -a {log}
29+ pigz -p $(nproc) /lscratch/$SLURM_JOBID/{params.sample}/sequencing_summary.txt && cp /lscratch/$SLURM_JOBID/{params.sample}/sequencing_summary.txt.gz {output.sequencing_summary} 2>&1 |tee -a {log}
30+ """
31+ ## Files created by guppy look like this:
32+ # -rw-r--r-- 1 kopardevn CCBR 3.0K Nov 19 14:20 sequencing_summary.txt
33+ # -rw-r--r-- 1 kopardevn CCBR 3.7K Nov 19 14:20 fastq_runid_d531634aaf7cba4fd8f7a1fba1d8ed9f0f81be2a_0_0.fastq
34+ # -rw-r--r-- 1 kopardevn CCBR 3.3K Nov 19 14:20 fastq_runid_84d34f66eed213a95bd9b6aff2d24aac498555ff_0_0.fastq
35+ # -rw-r--r-- 1 kopardevn CCBR 11K Nov 19 14:20 fastq_runid_013ea2ec6aebadbd80826ad673b152e04460f452_0_0.fastq
36+ # -rw-r--r-- 1 kopardevn CCBR 64K Nov 19 14:20 sequencing_telemetry.js
37+ # -rw-r--r-- 1 kopardevn CCBR 7.4K Nov 19 14:20 guppy_basecaller_log-2020-11-19_14-20-41.log
38+
39+ rule fastqc :
40+ input :
41+ expand (join (WORKDIR ,"fastqs" ,"{sample}.fastq.gz" ),sample = SAMPLES )
42+ output :
43+ expand (join (WORKDIR ,"qc" ,"fastqc" ,"{sample}_fastqc.zip" ),sample = SAMPLES )
44+ params :
45+ outdir = join (WORKDIR ,"qc" ,"fastqc" )
46+ threads : 16
47+ envmodules : TOOLS ["fastqc" ]["version" ]
48+ log : join (WORKDIR ,"logs" ,"fastqc.log" )
49+ shell :"""
50+ fastqc {input} -t {threads} -o {params.outdir}
51+ """
52+
53+ rule pycoqc :
54+ input :
55+ # sequencing_summary=join(WORKDIR,"fastqs","{sample}.sequencing_summary.txt")
56+ sequencing_summary = rules .guppy .output .sequencing_summary
57+ output :
58+ pycoQChtml = join (WORKDIR ,"qc" ,"pycoQC" ,"{sample}.pycoQC.html" ),
59+ pycoQCjson = join (WORKDIR ,"qc" ,"pycoQC" ,"{sample}.pycoQC.json" ),
60+ params :
61+ outdir = join (WORKDIR ,"qc" ,"fastqc" )
62+ conda : "envs/pycoqc.yaml"
63+ container : "docker://nciccbr/ccbr_pycoqc_v2.5.0.23:latest"
64+ log : join (WORKDIR ,"logs" ,"{sample}.pycoQC.log" )
65+ shell :"""
66+ pycoQC -f {input.sequencing_summary} -o {output.pycoQChtml} -j {output.pycoQCjson} 2>&1 |tee -a {log}
2267"""
0 commit comments