diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk index 47f06cf..1b408a6 100644 --- a/workflow/rules/align.smk +++ b/workflow/rules/align.smk @@ -244,7 +244,7 @@ rule merge_SJ_tabs: """ set -exo pipefail cat {input} | \\ -python {params.script1} \\ +python -E {params.script1} \\ --regions {params.regions} \\ --filter1regions {params.filter1regions} \\ --filter1_noncanonical {params.filter1_noncanonical} \\ diff --git a/workflow/rules/create_index.smk b/workflow/rules/create_index.smk index 419c036..949d6af 100644 --- a/workflow/rules/create_index.smk +++ b/workflow/rules/create_index.smk @@ -33,13 +33,13 @@ samtools faidx {params.reffa} && \\ # bwa index -p ref {params.reffa} > bwa_index.log ... created in a separate rule # NCLscan files -python {params.script3} --ingtf {params.refgtf} --outgtf {output.fixed_gtf} +python -E {params.script3} --ingtf {params.refgtf} --outgtf {output.fixed_gtf} gffread -w {output.transcripts_fa} -g {params.reffa} {output.fixed_gtf} touch {output.lncRNA_transcripts_fa} create_reference.py -c {params.nclscan_config} gtfToGenePred -ignoreGroupsWithoutExons {output.fixed_gtf} ref.genes.genepred && \\ - python {params.script1} {output.fixed_gtf} ref.genes.genepred > {output.genepred_w_geneid} + python -E {params.script1} {output.fixed_gtf} ref.genes.genepred > {output.genepred_w_geneid} stardir=$(dirname {output.sa}) mkdir -p $stardir && \\ @@ -125,4 +125,4 @@ set -exo pipefail refdir=$(dirname {params.reffa}) cd $refdir bowtie-build {params.reffa} ref -""" +""" diff --git a/workflow/rules/findcircrna.smk b/workflow/rules/findcircrna.smk index 7edec8d..84712fe 100644 --- a/workflow/rules/findcircrna.smk +++ b/workflow/rules/findcircrna.smk @@ -141,7 +141,7 @@ def get_per_sample_files_to_merge(wildcards): # 2. parse the back_spliced_junction BED from above along with known splicing annotations to CircExplorer2 'parse' to create # a. circularRNA_known.txt ... circRNAs around known gene exons # b. low_conf_circularRNA_known.txt .... circRNAs with low confidence -# 3. parse back_spliced_junction BED along with circularRNA_known.txt and low_conf_circularRNA_known.txt to custom python script +# 3. parse back_spliced_junction BED along with circularRNA_known.txt and low_conf_circularRNA_known.txt to custom python -E script # to create an aggregated list of BSJs with following columns: # | # | ColName | # |---|-------------| @@ -305,7 +305,7 @@ perl {params.ciripl} \\ # samtools view -@{threads} -T {params.reffa} -CS {params.sample}.bwa.sam | samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O CRAM -o {output.ciribam} - samtools view -@{threads} -bS {params.sample}.bwa.sam | samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o {output.ciribam} - rm -rf {params.sample}.bwa.sam -python {params.script} \\ +python -E {params.script} \\ --ciriout {output.ciriout} \\ --back_spliced_min_reads {params.bsj_min_nreads} \\ --host "{params.host}" \\ @@ -411,7 +411,7 @@ rule create_ciri_count_matrix: """ set -exo pipefail cd {params.outdir} -python {params.script} {params.lookup} {params.hostID} +python -E {params.script} {params.lookup} {params.hostID} """ @@ -441,8 +441,8 @@ rule create_circexplorer_count_matrix: shell: """ cd {params.outdir} -python {params.script} {params.lookup} {params.hostID} -python {params.script2} {params.lookup} {params.hostID} +python -E {params.script} {params.lookup} {params.hostID} +python -E {params.script2} {params.lookup} {params.hostID} """ @@ -563,7 +563,7 @@ set -exo pipefail find {params.cleardir} -maxdepth 1 -type d -name "quant.txt*" -exec rm -rf {{}} \; if [[ "$(cat {input.quantfile} | wc -l)" != "0" ]] then -python {params.script} {params.lookup} {input.quantfile} {params.hostID} +python -E {params.script} {params.lookup} {input.quantfile} {params.hostID} else touch {output.annotatedquantfile} fi @@ -722,10 +722,10 @@ ls -alrth {params.tmpdir} paste {output.cr} {output.linear} | cut -f1-5,9 > {params.tmpdir}/CircRNALinearCount -python {params.script} \\ +python -E {params.script} \\ --CircCoordinates {output.cc} --CircRNALinearCount {params.tmpdir}/CircRNALinearCount -o {output.ct} -python {params.script2} \\ +python -E {params.script2} \\ --in_dcc_counts_table {output.ct} \\ --out_dcc_filtered_counts_table {output.ctf} \\ --back_spliced_min_reads {params.bsj_min_nreads} \\ @@ -843,7 +843,7 @@ R2fn=$(basename {input.R2}) zcat {input.R1} > {params.tmpdir}/${{R1fn%.*}} zcat {input.R2} > {params.tmpdir}/${{R2fn%.*}} -python $MSHOME/mapsplice.py \\ +python -E $MSHOME/mapsplice.py \\ -1 {params.tmpdir}/${{R1fn%.*}} \\ -2 {params.tmpdir}/${{R2fn%.*}} \\ -c {params.separate_fastas} \\ @@ -862,7 +862,7 @@ else R1fn=$(basename {input.R1}) zcat {input.R1} > {params.tmpdir}/${{R1fn%.*}} -python $MSHOME/mapsplice.py \ +python -E $MSHOME/mapsplice.py \ -1 {params.tmpdir}/${{R1fn%.*}} \ -c {params.separate_fastas} \ -p {threads} \ @@ -934,7 +934,7 @@ rule mapsplice_postprocess: """ set -exo pipefail mkdir -p {params.tmpdir} -python {params.script} \\ +python -E {params.script} \\ --circularRNAstxt {input.circRNAs} \\ -o {output.ct} \\ -fo {output.ctf} \\ @@ -1027,7 +1027,7 @@ results_bn=$(basename {output.result}) if [ "{params.peorse}" == "PE" ];then NCLscan.py -c {params.nclscan_config} -pj {params.sample} -o {params.tmpdir} --fq1 {input.R1} --fq2 {input.R2} rsync -az --progress {params.tmpdir}/${{results_bn}} {output.result} -python {params.script} \\ +python -E {params.script} \\ --result {output.result} \\ -o {output.ct} \\ -fo {output.ctf} \\ @@ -1163,7 +1163,7 @@ rule find_circ: shell: """ set -exo pipefail -python --version +python -E --version which python mkdir -p {params.tmpdir} cd {params.tmpdir} @@ -1223,7 +1223,7 @@ grep CIRCULAR {params.tmpdir}/{params.sample}.splice_sites.bed | \\ > {output.find_circ_bsj_bed} echo -ne "chrom\\tstart\\tend\\tname\\tn_reads\\tstrand\\tn_uniq\\tuniq_bridges\\tbest_qual_left\\tbest_qual_right\\ttissues\\ttiss_counts\\tedits\\tanchor_overlap\\tbreakpoints\\tsignal\\tstrandmatch\\tcategory\\n" > {output.find_circ_bsj_bed_filtered} -cat {output.find_circ_bsj_bed} | python {params.collapse_script} | awk -F"\\t" -v m={params.min_reads} -v OFS="\\t" '{{if ($5>=m) {{print}}}}' \\ +cat {output.find_circ_bsj_bed} | python -E {params.collapse_script} | awk -F"\\t" -v m={params.min_reads} -v OFS="\\t" '{{if ($5>=m) {{print}}}}' \\ >> {output.find_circ_bsj_bed_filtered} """ @@ -1332,7 +1332,7 @@ for f in {input};do fi done -python {params.script} \\ +python -E {params.script} \\ --counttablelist $infiles \\ -o {output.matrix} \\ --minreads {params.bsj_min_nreads} diff --git a/workflow/scripts/_create_circExplorer_linear_bam.v2.sh b/workflow/scripts/_create_circExplorer_linear_bam.v2.sh index f415ad9..cf44b03 100644 --- a/workflow/scripts/_create_circExplorer_linear_bam.v2.sh +++ b/workflow/scripts/_create_circExplorer_linear_bam.v2.sh @@ -78,10 +78,10 @@ function printtime() { start0=$2 start=$3 msg=$4 - end=$(date +%s.%N) - runtime0=$(python -c "print(${end} - ${start0})") + end=$(date +%s.%N) + runtime0=$(python -E -c "print(${end} - ${start0})") runtime0=${runtime0%.*} - runtime=$(python -c "print(${end} - ${start})") + runtime=$(python -E -c "print(${end} - ${start})") runtime=${runtime%.*} echo "$scriptname | $runtime0 | $runtime | $msg" } @@ -156,7 +156,7 @@ start=$(date +%s.%N) bedtools bamtobed -split -i $filtered_bam > ${tmpdir}/${sample_name}.bed -python ${SCRIPT_DIR}/_process_bamtobed.py \ +python -E ${SCRIPT_DIR}/_process_bamtobed.py \ --inbed ${tmpdir}/${sample_name}.bed \ --outbed ${tmpdir}/${sample_name}.readends.bed \ --linear ${tmpdir}/${sample_name}.linear.readids.gz \ diff --git a/workflow/scripts/_run_circExplorer_bwa.sh b/workflow/scripts/_run_circExplorer_bwa.sh index 1234ff3..4f7951e 100644 --- a/workflow/scripts/_run_circExplorer_bwa.sh +++ b/workflow/scripts/_run_circExplorer_bwa.sh @@ -63,12 +63,12 @@ cat back_spliced_junction.filter1.bed|tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREA # 1. both chromosomes are the same # 2. both strands are the same # 3. both coordinates are NOT the same -# as the junction file will be empy for BWA .. this strategy needs to be redone! +# as the junction file will be empty for BWA .. this strategy needs to be redone! # awk '$1==$4' junction |awk '$3==$6' | awk '$2!=$5' > junction.filter1 -# use junctions file to get the true strand (not + as reported in back_spliced_junction.bed) ... this is done +# use junctions file to get the true strand (not + as reported in back_spliced_junction.bed) ... this is done # using _circExplorer_BSJ_get_strand.sh ... and replace it to create new BSJ BED -# while read seq s e score name ostrand;do +# while read seq s e score name ostrand;do # strand=$(bash ${SCRIPTDIR}/_circExplorer_BSJ_get_strand.sh $seq $s $e junction.filter1) # echo -ne "$seq\t$s\t$e\t$score\t.\t$strand\n" # done < back_spliced_junction.filter2.bed > back_spliced_junction.filter2.strand_fixed.bed @@ -89,7 +89,7 @@ cp low_conf_circRNA_known.txt $LOWCONF cat $KNOWNTXT |tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREADS '$4>=m' > $FILTEREDKNOWNTXT cat $LOWCONF |tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREADS '$4>=m' > $FILTEREDLOWCONF -python ${SCRIPTDIR}/circExplorer_get_annotated_counts_per_sample.py \ +python -E ${SCRIPTDIR}/circExplorer_get_annotated_counts_per_sample.py \ --back_spliced_bed $STRANDFIXEDBSJBED \ --back_spliced_min_reads $MINREADS \ --circularRNA_known $FILTEREDKNOWNTXT \ diff --git a/workflow/scripts/_run_circExplorer_star.sh b/workflow/scripts/_run_circExplorer_star.sh index cd4ac9f..0ecd1fb 100644 --- a/workflow/scripts/_run_circExplorer_star.sh +++ b/workflow/scripts/_run_circExplorer_star.sh @@ -56,7 +56,7 @@ CIRCexplorer2 parse -t STAR junction > $PARSELOG 2>&1 # copy back original back_spliced BED file cp back_spliced_junction.bed $ORIGINALBSJBED -python ${SCRIPTDIR}/_circExplorer_BSJ_get_strand.py ${JUNCTIONFILE} back_spliced_junction.bed ${MINREADS} > back_spliced_junction.strand_fixed.bed +python -E ${SCRIPTDIR}/_circExplorer_BSJ_get_strand.py ${JUNCTIONFILE} back_spliced_junction.bed ${MINREADS} > back_spliced_junction.strand_fixed.bed # copy back strand_fixed BSJ BED cp back_spliced_junction.strand_fixed.bed $STRANDFIXEDBSJBED @@ -72,7 +72,7 @@ cp low_conf_circRNA_known.txt $LOWCONF cat $KNOWNTXT |tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREADS '$4>=m' > $FILTEREDKNOWNTXT cat $LOWCONF |tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREADS '$4>=m' > $FILTEREDLOWCONF -python ${SCRIPTDIR}/circExplorer_get_annotated_counts_per_sample.py \ +python -E ${SCRIPTDIR}/circExplorer_get_annotated_counts_per_sample.py \ --back_spliced_bed $STRANDFIXEDBSJBED \ --back_spliced_min_reads $MINREADS \ --circularRNA_known $FILTEREDKNOWNTXT \