Skip to content

Commit

Permalink
fix: use python -E to circumvent $PYTHONPATH
Browse files Browse the repository at this point in the history
fixes #120
  • Loading branch information
kelly-sovacool committed Oct 16, 2024
1 parent 9bc97e0 commit 180ba71
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 29 deletions.
2 changes: 1 addition & 1 deletion workflow/rules/align.smk
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ rule merge_SJ_tabs:
"""
set -exo pipefail
cat {input} | \\
python {params.script1} \\
python -E {params.script1} \\
--regions {params.regions} \\
--filter1regions {params.filter1regions} \\
--filter1_noncanonical {params.filter1_noncanonical} \\
Expand Down
6 changes: 3 additions & 3 deletions workflow/rules/create_index.smk
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ samtools faidx {params.reffa} && \\
# bwa index -p ref {params.reffa} > bwa_index.log ... created in a separate rule
# NCLscan files
python {params.script3} --ingtf {params.refgtf} --outgtf {output.fixed_gtf}
python -E {params.script3} --ingtf {params.refgtf} --outgtf {output.fixed_gtf}
gffread -w {output.transcripts_fa} -g {params.reffa} {output.fixed_gtf}
touch {output.lncRNA_transcripts_fa}
create_reference.py -c {params.nclscan_config}
gtfToGenePred -ignoreGroupsWithoutExons {output.fixed_gtf} ref.genes.genepred && \\
python {params.script1} {output.fixed_gtf} ref.genes.genepred > {output.genepred_w_geneid}
python -E {params.script1} {output.fixed_gtf} ref.genes.genepred > {output.genepred_w_geneid}
stardir=$(dirname {output.sa})
mkdir -p $stardir && \\
Expand Down Expand Up @@ -125,4 +125,4 @@ set -exo pipefail
refdir=$(dirname {params.reffa})
cd $refdir
bowtie-build {params.reffa} ref
"""
"""
30 changes: 15 additions & 15 deletions workflow/rules/findcircrna.smk
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def get_per_sample_files_to_merge(wildcards):
# 2. parse the back_spliced_junction BED from above along with known splicing annotations to CircExplorer2 'parse' to create
# a. circularRNA_known.txt ... circRNAs around known gene exons
# b. low_conf_circularRNA_known.txt .... circRNAs with low confidence
# 3. parse back_spliced_junction BED along with circularRNA_known.txt and low_conf_circularRNA_known.txt to custom python script
# 3. parse back_spliced_junction BED along with circularRNA_known.txt and low_conf_circularRNA_known.txt to custom python -E script
# to create an aggregated list of BSJs with following columns:
# | # | ColName |
# |---|-------------|
Expand Down Expand Up @@ -305,7 +305,7 @@ perl {params.ciripl} \\
# samtools view -@{threads} -T {params.reffa} -CS {params.sample}.bwa.sam | samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O CRAM -o {output.ciribam} -
samtools view -@{threads} -bS {params.sample}.bwa.sam | samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o {output.ciribam} -
rm -rf {params.sample}.bwa.sam
python {params.script} \\
python -E {params.script} \\
--ciriout {output.ciriout} \\
--back_spliced_min_reads {params.bsj_min_nreads} \\
--host "{params.host}" \\
Expand Down Expand Up @@ -411,7 +411,7 @@ rule create_ciri_count_matrix:
"""
set -exo pipefail
cd {params.outdir}
python {params.script} {params.lookup} {params.hostID}
python -E {params.script} {params.lookup} {params.hostID}
"""


Expand Down Expand Up @@ -441,8 +441,8 @@ rule create_circexplorer_count_matrix:
shell:
"""
cd {params.outdir}
python {params.script} {params.lookup} {params.hostID}
python {params.script2} {params.lookup} {params.hostID}
python -E {params.script} {params.lookup} {params.hostID}
python -E {params.script2} {params.lookup} {params.hostID}
"""


Expand Down Expand Up @@ -563,7 +563,7 @@ set -exo pipefail
find {params.cleardir} -maxdepth 1 -type d -name "quant.txt*" -exec rm -rf {{}} \;
if [[ "$(cat {input.quantfile} | wc -l)" != "0" ]]
then
python {params.script} {params.lookup} {input.quantfile} {params.hostID}
python -E {params.script} {params.lookup} {input.quantfile} {params.hostID}
else
touch {output.annotatedquantfile}
fi
Expand Down Expand Up @@ -722,10 +722,10 @@ ls -alrth {params.tmpdir}
paste {output.cr} {output.linear} | cut -f1-5,9 > {params.tmpdir}/CircRNALinearCount
python {params.script} \\
python -E {params.script} \\
--CircCoordinates {output.cc} --CircRNALinearCount {params.tmpdir}/CircRNALinearCount -o {output.ct}
python {params.script2} \\
python -E {params.script2} \\
--in_dcc_counts_table {output.ct} \\
--out_dcc_filtered_counts_table {output.ctf} \\
--back_spliced_min_reads {params.bsj_min_nreads} \\
Expand Down Expand Up @@ -843,7 +843,7 @@ R2fn=$(basename {input.R2})
zcat {input.R1} > {params.tmpdir}/${{R1fn%.*}}
zcat {input.R2} > {params.tmpdir}/${{R2fn%.*}}
python $MSHOME/mapsplice.py \\
python -E $MSHOME/mapsplice.py \\
-1 {params.tmpdir}/${{R1fn%.*}} \\
-2 {params.tmpdir}/${{R2fn%.*}} \\
-c {params.separate_fastas} \\
Expand All @@ -862,7 +862,7 @@ else
R1fn=$(basename {input.R1})
zcat {input.R1} > {params.tmpdir}/${{R1fn%.*}}
python $MSHOME/mapsplice.py \
python -E $MSHOME/mapsplice.py \
-1 {params.tmpdir}/${{R1fn%.*}} \
-c {params.separate_fastas} \
-p {threads} \
Expand Down Expand Up @@ -934,7 +934,7 @@ rule mapsplice_postprocess:
"""
set -exo pipefail
mkdir -p {params.tmpdir}
python {params.script} \\
python -E {params.script} \\
--circularRNAstxt {input.circRNAs} \\
-o {output.ct} \\
-fo {output.ctf} \\
Expand Down Expand Up @@ -1027,7 +1027,7 @@ results_bn=$(basename {output.result})
if [ "{params.peorse}" == "PE" ];then
NCLscan.py -c {params.nclscan_config} -pj {params.sample} -o {params.tmpdir} --fq1 {input.R1} --fq2 {input.R2}
rsync -az --progress {params.tmpdir}/${{results_bn}} {output.result}
python {params.script} \\
python -E {params.script} \\
--result {output.result} \\
-o {output.ct} \\
-fo {output.ctf} \\
Expand Down Expand Up @@ -1163,7 +1163,7 @@ rule find_circ:
shell:
"""
set -exo pipefail
python --version
python -E --version
which python
mkdir -p {params.tmpdir}
cd {params.tmpdir}
Expand Down Expand Up @@ -1223,7 +1223,7 @@ grep CIRCULAR {params.tmpdir}/{params.sample}.splice_sites.bed | \\
> {output.find_circ_bsj_bed}
echo -ne "chrom\\tstart\\tend\\tname\\tn_reads\\tstrand\\tn_uniq\\tuniq_bridges\\tbest_qual_left\\tbest_qual_right\\ttissues\\ttiss_counts\\tedits\\tanchor_overlap\\tbreakpoints\\tsignal\\tstrandmatch\\tcategory\\n" > {output.find_circ_bsj_bed_filtered}
cat {output.find_circ_bsj_bed} | python {params.collapse_script} | awk -F"\\t" -v m={params.min_reads} -v OFS="\\t" '{{if ($5>=m) {{print}}}}' \\
cat {output.find_circ_bsj_bed} | python -E {params.collapse_script} | awk -F"\\t" -v m={params.min_reads} -v OFS="\\t" '{{if ($5>=m) {{print}}}}' \\
>> {output.find_circ_bsj_bed_filtered}
"""

Expand Down Expand Up @@ -1332,7 +1332,7 @@ for f in {input};do
fi
done
python {params.script} \\
python -E {params.script} \\
--counttablelist $infiles \\
-o {output.matrix} \\
--minreads {params.bsj_min_nreads}
Expand Down
8 changes: 4 additions & 4 deletions workflow/scripts/_create_circExplorer_linear_bam.v2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,10 @@ function printtime() {
start0=$2
start=$3
msg=$4
end=$(date +%s.%N)
runtime0=$(python -c "print(${end} - ${start0})")
end=$(date +%s.%N)
runtime0=$(python -E -c "print(${end} - ${start0})")
runtime0=${runtime0%.*}
runtime=$(python -c "print(${end} - ${start})")
runtime=$(python -E -c "print(${end} - ${start})")
runtime=${runtime%.*}
echo "$scriptname | $runtime0 | $runtime | $msg"
}
Expand Down Expand Up @@ -156,7 +156,7 @@ start=$(date +%s.%N)

bedtools bamtobed -split -i $filtered_bam > ${tmpdir}/${sample_name}.bed

python ${SCRIPT_DIR}/_process_bamtobed.py \
python -E ${SCRIPT_DIR}/_process_bamtobed.py \
--inbed ${tmpdir}/${sample_name}.bed \
--outbed ${tmpdir}/${sample_name}.readends.bed \
--linear ${tmpdir}/${sample_name}.linear.readids.gz \
Expand Down
8 changes: 4 additions & 4 deletions workflow/scripts/_run_circExplorer_bwa.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ cat back_spliced_junction.filter1.bed|tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREA
# 1. both chromosomes are the same
# 2. both strands are the same
# 3. both coordinates are NOT the same
# as the junction file will be empy for BWA .. this strategy needs to be redone!
# as the junction file will be empty for BWA .. this strategy needs to be redone!
# awk '$1==$4' junction |awk '$3==$6' | awk '$2!=$5' > junction.filter1

# use junctions file to get the true strand (not + as reported in back_spliced_junction.bed) ... this is done
# use junctions file to get the true strand (not + as reported in back_spliced_junction.bed) ... this is done
# using _circExplorer_BSJ_get_strand.sh ... and replace it to create new BSJ BED
# while read seq s e score name ostrand;do
# while read seq s e score name ostrand;do
# strand=$(bash ${SCRIPTDIR}/_circExplorer_BSJ_get_strand.sh $seq $s $e junction.filter1)
# echo -ne "$seq\t$s\t$e\t$score\t.\t$strand\n"
# done < back_spliced_junction.filter2.bed > back_spliced_junction.filter2.strand_fixed.bed
Expand All @@ -89,7 +89,7 @@ cp low_conf_circRNA_known.txt $LOWCONF
cat $KNOWNTXT |tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREADS '$4>=m' > $FILTEREDKNOWNTXT
cat $LOWCONF |tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREADS '$4>=m' > $FILTEREDLOWCONF

python ${SCRIPTDIR}/circExplorer_get_annotated_counts_per_sample.py \
python -E ${SCRIPTDIR}/circExplorer_get_annotated_counts_per_sample.py \
--back_spliced_bed $STRANDFIXEDBSJBED \
--back_spliced_min_reads $MINREADS \
--circularRNA_known $FILTEREDKNOWNTXT \
Expand Down
4 changes: 2 additions & 2 deletions workflow/scripts/_run_circExplorer_star.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ CIRCexplorer2 parse -t STAR junction > $PARSELOG 2>&1
# copy back original back_spliced BED file
cp back_spliced_junction.bed $ORIGINALBSJBED

python ${SCRIPTDIR}/_circExplorer_BSJ_get_strand.py ${JUNCTIONFILE} back_spliced_junction.bed ${MINREADS} > back_spliced_junction.strand_fixed.bed
python -E ${SCRIPTDIR}/_circExplorer_BSJ_get_strand.py ${JUNCTIONFILE} back_spliced_junction.bed ${MINREADS} > back_spliced_junction.strand_fixed.bed

# copy back strand_fixed BSJ BED
cp back_spliced_junction.strand_fixed.bed $STRANDFIXEDBSJBED
Expand All @@ -72,7 +72,7 @@ cp low_conf_circRNA_known.txt $LOWCONF
cat $KNOWNTXT |tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREADS '$4>=m' > $FILTEREDKNOWNTXT
cat $LOWCONF |tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREADS '$4>=m' > $FILTEREDLOWCONF

python ${SCRIPTDIR}/circExplorer_get_annotated_counts_per_sample.py \
python -E ${SCRIPTDIR}/circExplorer_get_annotated_counts_per_sample.py \
--back_spliced_bed $STRANDFIXEDBSJBED \
--back_spliced_min_reads $MINREADS \
--circularRNA_known $FILTEREDKNOWNTXT \
Expand Down

0 comments on commit 180ba71

Please sign in to comment.