diff --git a/workflow/Snakefile b/workflow/Snakefile index 7bc435a..c1c8f8f 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -210,26 +210,26 @@ rule cutadapt: if [ ! -d {params.outdir} ];then mkdir {params.outdir};fi if [ "{params.peorse}" == "PE" ];then ## Paired-end - cutadapt --pair-filter=any \ - --nextseq-trim=2 \ - --trim-n \ - -n 5 -O 5 \ - -q 10,10 -m {params.cutadapt_min_length}:{params.cutadapt_min_length} \ - -b file:{params.adapters} \ - -B file:{params.adapters} \ - -j {threads} \ - -o {output.of1} -p {output.of2} \ + cutadapt --pair-filter=any \\ + --nextseq-trim=2 \\ + --trim-n \\ + -n 5 -O 5 \\ + -q 10,10 -m {params.cutadapt_min_length}:{params.cutadapt_min_length} \\ + -b file:{params.adapters} \\ + -B file:{params.adapters} \\ + -j {threads} \\ + -o {output.of1} -p {output.of2} \\ {input.R1} {input.R2} else ## Single-end - cutadapt \ - --nextseq-trim=2 \ - --trim-n \ - -n 5 -O 5 \ - -q 10,10 -m {params.cutadapt_min_length} \ - -b file:{params.adapters} \ - -j {threads} \ - -o {output.of1} \ + cutadapt \\ + --nextseq-trim=2 \\ + --trim-n \\ + -n 5 -O 5 \\ + -q 10,10 -m {params.cutadapt_min_length} \\ + -b file:{params.adapters} \\ + -j {threads} \\ + -o {output.of1} \\ {input.R1} touch {output.of2} fi @@ -284,29 +284,29 @@ if [ "{params.peorse}" == "PE" ];then overhang=$(zcat {input} | awk -v maxlen=100 'NR%4==2 {{if (length($1) > maxlen+0) maxlen=length($1)}}; END {{print maxlen-1}}') echo "sjdbOverhang for STAR: ${{overhang}}" cd {params.outdir} - STAR --genomeDir {params.starindexdir} \ - --outSAMstrandField None \ - --outFilterMultimapNmax 20 \ - --alignSJoverhangMin 8 \ - --alignSJDBoverhangMin 1 \ - --outFilterMismatchNmax 999 \ - --outFilterMismatchNoverLmax 0.3 \ - --alignIntronMin 20 \ - --alignIntronMax 1000000 \ - --alignMatesGapMax 1000000 \ - --readFilesIn {input.R1} {input.R2} \ - --readFilesCommand zcat \ - --runThreadN {threads} \ - --outFileNamePrefix {params.sample}_p1. \ - --chimSegmentMin 20 \ - --chimMultimapNmax 10 \ - --chimOutType Junctions \ - --alignTranscriptsPerReadNmax {params.alignTranscriptsPerReadNmax} \ - --outSAMtype None \ - --alignEndsProtrude 10 ConcordantPair \ - --outFilterIntronMotifs None \ - --sjdbGTFfile {params.gtf} \ - --outTmpDir=${{TMPDIR}} \ + STAR --genomeDir {params.starindexdir} \\ + --outSAMstrandField None \\ + --outFilterMultimapNmax 20 \\ + --alignSJoverhangMin 8 \\ + --alignSJDBoverhangMin 1 \\ + --outFilterMismatchNmax 999 \\ + --outFilterMismatchNoverLmax 0.3 \\ + --alignIntronMin 20 \\ + --alignIntronMax 1000000 \\ + --alignMatesGapMax 1000000 \\ + --readFilesIn {input.R1} {input.R2} \\ + --readFilesCommand zcat \\ + --runThreadN {threads} \\ + --outFileNamePrefix {params.sample}_p1. \\ + --chimSegmentMin 20 \\ + --chimMultimapNmax 10 \\ + --chimOutType Junctions \\ + --alignTranscriptsPerReadNmax {params.alignTranscriptsPerReadNmax} \\ + --outSAMtype None \\ + --alignEndsProtrude 10 ConcordantPair \\ + --outFilterIntronMotifs None \\ + --sjdbGTFfile {params.gtf} \\ + --outTmpDir=${{TMPDIR}} \\ --sjdbOverhang $overhang else @@ -315,29 +315,29 @@ else overhang=$(zcat {input.R1} | awk -v maxlen=100 'NR%4==2 {{if (length($1) > maxlen+0) maxlen=length($1)}}; END {{print maxlen-1}}') echo "sjdbOverhang for STAR: ${{overhang}}" cd {params.outdir} - STAR --genomeDir {params.starindexdir} \ - --outSAMstrandField None \ - --outFilterMultimapNmax 20 \ - --alignSJoverhangMin 8 \ - --alignSJDBoverhangMin 1 \ - --outFilterMismatchNmax 999 \ - --outFilterMismatchNoverLmax 0.3 \ - --alignIntronMin 20 \ - --alignIntronMax 1000000 \ - --alignMatesGapMax 1000000 \ - --readFilesIn {input.R1} \ - --readFilesCommand zcat \ - --runThreadN {threads} \ - --outFileNamePrefix {params.sample}_p1. \ - --chimSegmentMin 20 \ - --chimMultimapNmax 10 \ - --chimOutType Junctions \ - --alignTranscriptsPerReadNmax {params.alignTranscriptsPerReadNmax} \ - --outSAMtype None \ - --alignEndsProtrude 10 ConcordantPair \ - --outFilterIntronMotifs None \ - --sjdbGTFfile {params.gtf} \ - --outTmpDir=${{TMPDIR}} \ + STAR --genomeDir {params.starindexdir} \\ + --outSAMstrandField None \\ + --outFilterMultimapNmax 20 \\ + --alignSJoverhangMin 8 \\ + --alignSJDBoverhangMin 1 \\ + --outFilterMismatchNmax 999 \\ + --outFilterMismatchNoverLmax 0.3 \\ + --alignIntronMin 20 \\ + --alignIntronMax 1000000 \\ + --alignMatesGapMax 1000000 \\ + --readFilesIn {input.R1} \\ + --readFilesCommand zcat \\ + --runThreadN {threads} \\ + --outFileNamePrefix {params.sample}_p1. \\ + --chimSegmentMin 20 \\ + --chimMultimapNmax 10 \\ + --chimOutType Junctions \\ + --alignTranscriptsPerReadNmax {params.alignTranscriptsPerReadNmax} \\ + --outSAMtype None \\ + --alignEndsProtrude 10 ConcordantPair \\ + --outFilterIntronMotifs None \\ + --sjdbGTFfile {params.gtf} \\ + --outTmpDir=${{TMPDIR}} \\ --sjdbOverhang $overhang fi @@ -359,14 +359,14 @@ rule merge_SJ_tabs: filter2_unannotated=config['star1passfilter2_unannotated'] threads: 1 shell:""" -cat {input} | \ -python {params.script1} \ - --regions {params.regions} \ - --filter1regions {params.filter1regions} \ - --filter1_noncanonical {params.filter1_noncanonical} \ - --filter1_unannotated {params.filter1_unannotated} \ - --filter2_noncanonical {params.filter2_noncanonical} \ - --filter2_unannotated {params.filter1_unannotated} | \ +cat {input} | \\ +python {params.script1} \\ + --regions {params.regions} \\ + --filter1regions {params.filter1regions} \\ + --filter1_noncanonical {params.filter1_noncanonical} \\ + --filter1_unannotated {params.filter1_unannotated} \\ + --filter2_noncanonical {params.filter2_noncanonical} \\ + --filter2_unannotated {params.filter1_unannotated} | \\ cut -f1-4 | sort -k1,1 -k2,2n | uniq > {output.pass1sjtab} """ @@ -405,33 +405,33 @@ if [ "{params.peorse}" == "PE" ];then overhang=$(zcat {input.R1} {input.R2} | awk -v maxlen=100 'NR%4==2 {{if (length($1) > maxlen+0) maxlen=length($1)}}; END {{print maxlen-1}}') echo "sjdbOverhang for STAR: ${{overhang}}" cd {params.outdir} - STAR --genomeDir {params.starindexdir} \ - --outSAMstrandField None \ - --outFilterType BySJout \ - --outFilterMultimapNmax 20 \ - --alignSJoverhangMin 8 \ - --alignSJDBoverhangMin 1 \ - --outFilterMismatchNmax 999 \ - --outFilterMismatchNoverLmax 0.3 \ - --alignIntronMin 20 \ - --alignIntronMax 2000000 \ - --alignMatesGapMax 2000000 \ - --readFilesIn {input.R1} {input.R2} \ - --readFilesCommand zcat \ - --runThreadN 56 \ - --outFileNamePrefix {params.sample}_p2. \ - --sjdbFileChrStartEnd {input.pass1sjtab} \ - --chimSegmentMin 20 \ - --chimOutType Junctions WithinBAM \ - --chimMultimapNmax 10 \ - --limitSjdbInsertNsj $limitSjdbInsertNsj \ - --alignTranscriptsPerReadNmax {params.alignTranscriptsPerReadNmax} \ - --outSAMtype BAM SortedByCoordinate \ - --alignEndsProtrude 10 ConcordantPair \ - --outFilterIntronMotifs None \ - --sjdbGTFfile {params.gtf} \ - --quantMode GeneCounts \ - --outTmpDir=${{TMPDIR}} \ + STAR --genomeDir {params.starindexdir} \\ + --outSAMstrandField None \\ + --outFilterType BySJout \\ + --outFilterMultimapNmax 20 \\ + --alignSJoverhangMin 8 \\ + --alignSJDBoverhangMin 1 \\ + --outFilterMismatchNmax 999 \\ + --outFilterMismatchNoverLmax 0.3 \\ + --alignIntronMin 20 \\ + --alignIntronMax 2000000 \\ + --alignMatesGapMax 2000000 \\ + --readFilesIn {input.R1} {input.R2} \\ + --readFilesCommand zcat \\ + --runThreadN 56 \\ + --outFileNamePrefix {params.sample}_p2. \\ + --sjdbFileChrStartEnd {input.pass1sjtab} \\ + --chimSegmentMin 20 \\ + --chimOutType Junctions WithinBAM \\ + --chimMultimapNmax 10 \\ + --limitSjdbInsertNsj $limitSjdbInsertNsj \\ + --alignTranscriptsPerReadNmax {params.alignTranscriptsPerReadNmax} \\ + --outSAMtype BAM SortedByCoordinate \\ + --alignEndsProtrude 10 ConcordantPair \\ + --outFilterIntronMotifs None \\ + --sjdbGTFfile {params.gtf} \\ + --quantMode GeneCounts \\ + --outTmpDir=${{TMPDIR}} \\ --sjdbOverhang $overhang else @@ -439,33 +439,33 @@ else overhang=$(zcat {input.R1} | awk -v maxlen=100 'NR%4==2 {{if (length($1) > maxlen+0) maxlen=length($1)}}; END {{print maxlen-1}}') echo "sjdbOverhang for STAR: ${{overhang}}" cd {params.outdir} - STAR --genomeDir {params.starindexdir} \ - --outSAMstrandField None \ - --outFilterType BySJout \ - --outFilterMultimapNmax 20 \ - --alignSJoverhangMin 8 \ - --alignSJDBoverhangMin 1 \ - --outFilterMismatchNmax 999 \ - --outFilterMismatchNoverLmax 0.3 \ - --alignIntronMin 20 \ - --alignIntronMax 2000000 \ - --alignMatesGapMax 2000000 \ - --readFilesIn {input.R1} \ - --readFilesCommand zcat \ - --runThreadN 56 \ - --outFileNamePrefix {params.sample}_p2. \ - --sjdbFileChrStartEnd {input.pass1sjtab} \ - --chimSegmentMin 20 \ - --chimOutType Junctions WithinBAM \ - --chimMultimapNmax 10 \ - --limitSjdbInsertNsj $limitSjdbInsertNsj \ - --alignTranscriptsPerReadNmax {params.alignTranscriptsPerReadNmax} \ - --outSAMtype BAM SortedByCoordinate \ - --alignEndsProtrude 10 ConcordantPair \ - --outFilterIntronMotifs None \ - --sjdbGTFfile {params.gtf} \ - --quantMode GeneCounts \ - --outTmpDir=${{TMPDIR}} \ + STAR --genomeDir {params.starindexdir} \\ + --outSAMstrandField None \\ + --outFilterType BySJout \\ + --outFilterMultimapNmax 20 \\ + --alignSJoverhangMin 8 \\ + --alignSJDBoverhangMin 1 \\ + --outFilterMismatchNmax 999 \\ + --outFilterMismatchNoverLmax 0.3 \\ + --alignIntronMin 20 \\ + --alignIntronMax 2000000 \\ + --alignMatesGapMax 2000000 \\ + --readFilesIn {input.R1} \\ + --readFilesCommand zcat \\ + --runThreadN 56 \\ + --outFileNamePrefix {params.sample}_p2. \\ + --sjdbFileChrStartEnd {input.pass1sjtab} \\ + --chimSegmentMin 20 \\ + --chimOutType Junctions WithinBAM \\ + --chimMultimapNmax 10 \\ + --limitSjdbInsertNsj $limitSjdbInsertNsj \\ + --alignTranscriptsPerReadNmax {params.alignTranscriptsPerReadNmax} \\ + --outSAMtype BAM SortedByCoordinate \\ + --alignEndsProtrude 10 ConcordantPair \\ + --outFilterIntronMotifs None \\ + --sjdbGTFfile {params.gtf} \\ + --quantMode GeneCounts \\ + --outTmpDir=${{TMPDIR}} \\ --sjdbOverhang $overhang fi ## ensure the star2p file is indexed ... is should already be sorted by STAR @@ -712,16 +712,16 @@ if [ ! -d {params.outdir} ];then mkdir {params.outdir};fi cd {params.outdir} mv {input.junctionfile} {input.junctionfile}.original grep -v junction_type {input.junctionfile}.original > {input.junctionfile} -CIRCexplorer2 parse \ - -t STAR \ +CIRCexplorer2 parse \\ + -t STAR \\ {input.junctionfile} > {params.sample}_circexplorer_parse.log 2>&1 mv back_spliced_junction.bed {output.backsplicedjunctions} mv {input.junctionfile}.original {input.junctionfile} -CIRCexplorer2 annotate \ --r {params.genepred} \ --g {params.reffa} \ --b {output.backsplicedjunctions} \ --o $(basename {output.annotations}) \ +CIRCexplorer2 annotate \\ +-r {params.genepred} \\ +-g {params.reffa} \\ +-b {output.backsplicedjunctions} \\ +-o $(basename {output.annotations}) \\ --low-confidence """ @@ -750,22 +750,22 @@ rule ciri: cd {params.outdir} if [ "{params.peorse}" == "PE" ];then ## paired-end - bwa mem -t {threads} -T 19 \ - {params.bwaindex} \ - {input.R1} {input.R2} \ + bwa mem -t {threads} -T 19 \\ + {params.bwaindex} \\ + {input.R1} {input.R2} \\ > {params.sample}.bwa.sam 2> {output.bwalog} else ## single-end - bwa mem -t {threads} -T 19 \ - {params.bwaindex} \ - {input.R1} \ + bwa mem -t {threads} -T 19 \\ + {params.bwaindex} \\ + {input.R1} \\ > {params.sample}.bwa.sam 2> {output.bwalog} fi -perl {params.ciripl} \ --I {params.sample}.bwa.sam \ --O {output.ciriout} \ --F {params.reffa} \ --A {params.gtf} \ +perl {params.ciripl} \\ +-I {params.sample}.bwa.sam \\ +-O {output.ciriout} \\ +-F {params.reffa} \\ +-A {params.gtf} \\ -G {output.cirilog} -T {threads} samtools view -@{threads} -bS {params.sample}.bwa.sam > {output.ciribam} rm -rf {params.sample}.bwa.sam @@ -815,11 +815,11 @@ rule clear: container: "docker://nciccbr/ccbr_clear:latest" threads: 4 shell:""" -circ_quant \ --c {input.circexplorerout} \ --b {input.bam} \ --t \ --r {params.genepred} \ +circ_quant \\ +-c {input.circexplorerout} \\ +-b {input.bam} \\ +-t \\ +-r {params.genepred} \\ -o {output.quantfile} """ @@ -835,10 +835,15 @@ rule annotate_clear_output: shell:""" ## cleanup quant.txt* dirs before annotation find {params.cleardir} -maxdepth 1 -type d -name "quant.txt*" -exec rm -rf {{}} \; +if [[ "$(cat {input.quantfile} | wc -l)" != "0" ]] +then python {params.script} {params.lookup} {input.quantfile} +else +touch {output.annotatedquantfile} +fi """ - +localrules: venn rule venn: input: circexplorerout=rules.annotate_circRNA.output.annotations, @@ -857,16 +862,25 @@ rule venn: shell:""" cut -f1 {input.ciriout}|grep -v circRNA_ID > /dev/shm/{params.sample}.ciri.lst cut -f1-3 {input.circexplorerout}|awk -F"\\t" '{{print $1\":\"$2+1\"|\"$3}}' > /dev/shm/{params.sample}.circExplorer.lst -2set_venn.R \ - -l /dev/shm/{params.sample}.ciri.lst \ - -r /dev/shm/{params.sample}.circExplorer.lst \ - -p {output.png} \ - -m {output.cirionly} \ - -s {output.circexploreronly} \ - -c1 "CIRI2" \ - -c2 "CircExplorer2" \ - -c {output.common} \ - -t {params.sample} +if [[ "$(cat /dev/shm/{params.sample}.ciri.lst | wc -l)" != "0" ]];then + if [[ "$(cat /dev/shm/{params.sample}.circExplorer.lst | wc -l)" != "0" ]];then + 2set_venn.R \\ + -l /dev/shm/{params.sample}.ciri.lst \\ + -r /dev/shm/{params.sample}.circExplorer.lst \\ + -p {output.png} \\ + -m {output.cirionly} \\ + -s {output.circexploreronly} \\ + -c1 "CIRI2" \\ + -c2 "CircExplorer2" \\ + -c {output.common} \\ + -t {params.sample} + else + for o in {output} + do + touch $o + done + fi +fi rm -f /dev/shm{params.sample}* """