From 9f3ec243d947c09a8c340b17a412ed3c82e20567 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 5 Nov 2024 13:35:07 -0500 Subject: [PATCH 1/6] fix: increase latency wait to 300 --- charlie | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/charlie b/charlie index 7899407..d36629a 100755 --- a/charlie +++ b/charlie @@ -481,7 +481,7 @@ function run() { --use-singularity \ --singularity-args "$SINGULARITY_BINDS" \ --use-envmodules \ - --latency-wait 120 \ + --latency-wait 300 \ --configfile $CONFIGFILE \ --cores all \ --rerun-incomplete \ @@ -521,7 +521,7 @@ snakemake -s $SNAKEFILE \ --singularity-args "$SINGULARITY_BINDS" \ --use-envmodules \ --printshellcmds \ - --latency-wait 120 \ + --latency-wait 300 \ --configfile $CONFIGFILE \ --cluster-config $CLUSTERFILE \ --cluster "$CLUSTER_SBATCH_CMD" \ @@ -562,7 +562,7 @@ EOF --use-singularity \ --singularity-args "$SINGULARITY_BINDS" \ --printshellcmds \ - --latency-wait 120 \ + --latency-wait 300 \ --configfile $CONFIGFILE \ --cluster-config $CLUSTERFILE \ --cluster "$CLUSTER_SBATCH_CMD" \ From 1947120bc3005af95af4f2461ceb7f3ab700f0ea Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 15 Nov 2024 15:12:23 -0500 Subject: [PATCH 2/6] chore: remove unused lint files --- .github/workflows/activeDev_linter.yaml | 29 ------ .github/workflows/main_linter.yaml | 27 ----- config/lint.config.yaml | 127 ------------------------ lintit | 2 - 4 files changed, 185 deletions(-) delete mode 100644 .github/workflows/activeDev_linter.yaml delete mode 100644 .github/workflows/main_linter.yaml delete mode 100644 config/lint.config.yaml delete mode 100755 lintit diff --git a/.github/workflows/activeDev_linter.yaml b/.github/workflows/activeDev_linter.yaml deleted file mode 100644 index 4d99527..0000000 --- a/.github/workflows/activeDev_linter.yaml +++ /dev/null @@ -1,29 +0,0 @@ -run-name: ${{github.actor}} is running LINT on activeDev branch! - -on: - push: - branches: - - activeDev - pull_request: - branches-ignore: [] - -jobs: - Dryrun_Lint: - runs-on: ubuntu-latest - steps: - - name: Linting - uses: actions/checkout@v3 - with: - ref: "activeDev" - - run: echo "repo cloned --> ${{github.repository}}" - - run: echo "github workspace --> ${{github.workspace}}" - - name: Listing the repo - run: | - ls -larth ${{github.workspace}} - - name: Running Lint - uses: snakemake/snakemake-github-action@v1 - with: - directory: '.' - snakefile: 'workflow/Snakefile' - args: '--lint --configfile config/lint.config.yaml' - - run: echo "STATUS --> ${{job.status}}" diff --git a/.github/workflows/main_linter.yaml b/.github/workflows/main_linter.yaml deleted file mode 100644 index ee61de9..0000000 --- a/.github/workflows/main_linter.yaml +++ /dev/null @@ -1,27 +0,0 @@ -run-name: ${{github.actor}} is running LINT on main/master branch - -on: - push: - branches: - - main - - master - pull_request: - branches-ignore: [] - -jobs: - Dryrun_Lint: - runs-on: ubuntu-latest - steps: - - name: Linting - uses: actions/checkout@v3 - - run: | - git branch -a - git log | head - ls -alrth config - ls -alrth workflow - - name: Running Lint - uses: snakemake/snakemake-github-action@v1 - with: - directory: '.' - snakefile: 'workflow/Snakefile' - args: '--lint --configfile config/lint.config.yaml' diff --git a/config/lint.config.yaml b/config/lint.config.yaml deleted file mode 100644 index fde2b7e..0000000 --- a/config/lint.config.yaml +++ /dev/null @@ -1,127 +0,0 @@ -## you probably need to change or comment/uncomment some of these -# -# The working dir... output will be in the results subfolder of the .tests/lint_workdir -workdir: ".tests/lint_workdir" -# -# tab delimited samples file ... should have the following 3 columns -# sampleName path_to_R1_fastq path_to_R2_fastq -# -samples: ".tests/lint/samples.tsv" -# -# Should the CLEAR pipeline be run? True or False WITHOUT quotes -run_clear: True -# -# Should the DCC pipeline be run? True or False WITHOUT quote -run_dcc: True -# -# Should the MapSplice pipeline be run? True or False WITHOUT quotes -run_mapsplice: True -mapsplice_min_map_len: 50 -mapsplice_filtering: 2 # 1=less stringent 2=default -# -# Should the circRNA_finder be run? True or False WITHOUT quotes -run_circRNAFinder: True -# Should the NCLscan pipeline be run? True or False WITHOUT quotes -# This can only be run for PE data -run_nclscan: True -nclscan_config: ".tests/lint_workdir/nclscan.config" -# - -# select references .... host + viruses(comma-separated): -# select host: # options are hg38 or mm39 -host: "host" -additives: "additive" # options are ERCC and BAC16Insert -viruses: "virus" -# select viruses and other (ERCC/BAC): options are -# ERCC -# BAC16Insert -# -# | RefSeq Sequence | RefSeq assembly accession | Notes | -# | ---------------- | ------------------------- | ----------------------------------------------------- | -# | NC_007605.1 | GCF_002402265.1 | Human gammaherpesvirus 4 (Epstein-Barr virus) | -# | NC_000898.1 | GCF_000846365.1 | Human betaherpesvirus 6B | -# | NC_001664.4 | GCF_000845685.2 | Human betaherpesvirus 6A | -# | NC_001716.2 | GCF_000848125.1 | Human betaherpesvirus 7 | -# | NC_006273.2 | GCF_000845245.1 | Human betaherpesvirus 5 | -# | NC_009333.1 | GCF_000838265.1 | Human gammaherpesvirus 8 | -# | NC_045512.2 | GCF_009858895.2 | Severe acute respiratory syndrome-related coronavirus | -# | MN485971.1 | xx | HIV from Belgium ... GTF is hand curated | -# -# | RefSeq Sequence | RefSeq assembly accession | Notes | -# | ---------------- | ------------------------- | ------------------------------------------------------------ | -# | NC_001806.2 | GCF_000859985.2 | [Human alphaherpesvirus 1 (Herpes simplex virus type 1)](https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=10298&lvl=3&lin=f&keep=1&srchmode=1&unlock) (strain 17) | -# -# | RefSeq Sequence | RefSeq assembly accession | Notes | -# | ---------------- | ------------------------- | ------------------------------------------------------------ | -# | KT899744 | KT899744 | HSV-1 strain KOS | -# | MH636806.1 | MH636806.1 | MHV68 (Murine herpesvirus 68 strain WUMS) | -# -# comma separated list -# STAR 1-pass junction filtering.... -# 1st pass of STAR generates a list of splice junctions which are filtered to be parsed to the second pass of STAR -# Separate filters can be applied to the "host"+"additives" and "viruses" defined above -# Typically, since "host"+"additives" annotations are much more well-established we filter out noncanonical and unannotated -# while keeping everything for the poorly annotated viruses -star_1pass_filter_host_noncanonical: "True" -star_1pass_filter_host_unannotated: "True" -star_1pass_filter_viruses_noncanonical: "False" -star_1pass_filter_viruses_unannotated: "False" - -# BSJ filters in bp: -minsize_host: 150 -minsize_virus: 150 -maxsize_host: 1000000000 -maxsize_virus: 5000 - -# -## Resources -# -## hg38 vanilla --> hg38 rRNA masked + rRNA appended -## -#ref_fa: "/data/CCBR_Pipeliner/Pipelines/resources/CCBR_circRNA_DAQ/hg38/hg38_rRNA_masked_plus_rRNA.fa" -#ref_gtf: "/data/CCBR_Pipeliner/Pipelines/resources/CCBR_circRNA_DAQ/hg38/hg38_rRNA_masked_plus_rRNA.gtf" -#regions: "/data/CCBR_Pipeliner/Pipelines/resources/CCBR_circRNA_DAQ/hg38/hg38_rRNA_masked_plus_rRNA.fa.regions" -#star_index_dir: "/data/CCBR_Pipeliner/Pipelines/resources/CCBR_circRNA_DAQ/hg38/STAR_index_no_GTF_2.7.6a" -#ref_bwa_index: "/data/CCBR_Pipeliner/Pipelines/resources/CCBR_circRNA_DAQ/hg38/hg38_rRNA_masked_plus_rRNA" -#ref_hisat_index: "/data/CCBR_Pipeliner/Pipelines/resources/CCBR_circRNA_DAQ/hg38/hg38_rRNA_masked_plus_rRNA" -#ref_bowtie1_index: "/data/CCBR_Pipeliner/Pipelines/resources/CCBR_circRNA_DAQ/hg38/hg38_rRNA_masked_plus_rRNA" -#genepred_w_geneid: "/data/CCBR_Pipeliner/Pipelines/resources/CCBR_circRNA_DAQ/hg38/hg38_rRNA_masked_plus_rRNA.genes.genepred_w_geneid" -# -# hg38_rRNA_masked_plus_rRNA_plus_viruses_plus_ERCC -# * hg38 ... rRNA masked -# * rRNA ... 45S and 5S sequences -# * ERCC sequences -# * viruses: - -# ref_fa: "/data/Ziegelbauer_lab/resources/mm39_ERCC_HSV1_MHV68/mm39_ERCC_HSV1_MHV68.fa" -# ref_gtf: "/data/Ziegelbauer_lab/resources/mm39_ERCC_HSV1_MHV68/mm39_ERCC_HSV1_MHV68.gtf" -# regions: "/data/Ziegelbauer_lab/resources/mm39_ERCC_HSV1_MHV68/mm39_ERCC_HSV1_MHV68.regions" -# star_index_dir: "/data/Ziegelbauer_lab/resources/mm39_ERCC_HSV1_MHV68/STAR_index_no_GTF_2.7.6a" -# ref_bwa_index: "/data/Ziegelbauer_lab/resources/mm39_ERCC_HSV1_MHV68/mm39_ERCC_HSV1_MHV68" -# ref_hisat_index: "/data/Ziegelbauer_lab/resources/mm39_ERCC_HSV1_MHV68/mm39_ERCC_HSV1_MHV68" -# ref_bowtie1_index: "/data/Ziegelbauer_lab/resources/mm39_ERCC_HSV1_MHV68/mm39_ERCC_HSV1_MHV68" -# genepred_w_geneid: "/data/Ziegelbauer_lab/resources/mm39_ERCC_HSV1_MHV68/mm39_ERCC_HSV1_MHV68.genes.genepred_w_geneid" - -# - -## you most probably dont need to change these -scriptsdir: "workflow/scripts" -resourcesdir: "resources" -cluster: "config/unknown/cluster.json" -adapters: "resources/TruSeq_and_nextera_adapters.consolidated.fa" -circexplorer_bsj_circRNA_min_reads: 2 # in addition to "known" and "low-conf" circRNAs identified by circexplorer, we also include those found in back_spliced.bed file but not classified as known/low-conf only if the number of reads supporting the BSJ call is greater than this number -minreadcount: 2 # this is used to filter circRNAs while creating the per-sample counts table -ciri_perl_script: "/data/Ziegelbauer_lab/tools/CIRI_v2.0.6/CIRI2.pl" -nclscan_dir: "/data/Ziegelbauer_lab/tools/NCLscan-1.7.0" -circrnafinder_dir: "/data/Ziegelbauer_lab/tools/circRNA_finder-1.2" -dcc_strandedness: "-ss" # "-ss" for stranded library and "--nonstrand" for unstranded -cutadapt_min_length: 15 -cutadapt_n: 5 -cutadapt_max_n: 0.5 -cutadapt_O: 5 -cutadapt_q: 20 -fastas_gtfs_dir: ".tests/lint/fasta_gtf" -annotation_lookups: - hg38: "resources/hg38_2_hg19_lookup.txt" - mm39: "resources/mm39_circBase_annotation_lookup.txt" - host: "resources/dummy" diff --git a/lintit b/lintit deleted file mode 100755 index b4d7e90..0000000 --- a/lintit +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -snakemake --lint --snakefile workflow/Snakefile --configfile config/lint.config.yaml From 7e7d20bcd0a9b26960cf8639da0df54581f26672 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 15 Nov 2024 15:16:10 -0500 Subject: [PATCH 3/6] fix: use realpaths in attempt to fix file latency issue --- charlie | 1 + config/biowulf/config.yaml | 2 +- config/samples.tsv | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/charlie b/charlie index d36629a..24e1fbd 100755 --- a/charlie +++ b/charlie @@ -640,6 +640,7 @@ function main(){ esac done + WORKDIR=$(readlink -f $WORKDIR) echo "Working Dir: $WORKDIR" if [[ -z "$SING_CACHE_DIR" ]]; then diff --git a/config/biowulf/config.yaml b/config/biowulf/config.yaml index c1dfa91..87e534d 100644 --- a/config/biowulf/config.yaml +++ b/config/biowulf/config.yaml @@ -107,7 +107,7 @@ high_confidence_core_callers_plus_n: 1 ciri_perl_script: "/opt2/CIRI_v2.0.6/CIRI2.pl" # path in docker container # change this path to a directory containing fasta and GTF files for all host and virus genomes -fastas_gtfs_dir: "/data/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs" +fastas_gtfs_dir: "/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs" annotation_lookups: hg38: "PIPELINE_HOME/resources/hg38_2_hg19_lookup.txt" diff --git a/config/samples.tsv b/config/samples.tsv index af56753..878645a 100644 --- a/config/samples.tsv +++ b/config/samples.tsv @@ -1,3 +1,3 @@ sampleName path_to_R1_fastq path_to_R2_fastq -GI1_N /data/CCBR_Pipeliner/testdata/circRNA/human/GI1_N_ss.R1.fastq.gz /data/CCBR_Pipeliner/testdata/circRNA/human/GI1_N_ss.R2.fastq.gz -GI1_T /data/CCBR_Pipeliner/testdata/circRNA/human/GI1_T_ss.R1.fastq.gz +GI1_N /gpfs/gsfs10/users/CCBR_Pipeliner/testdata/circRNA/human/GI1_N_ss.R1.fastq.gz /gpfs/gsfs10/users/CCBR_Pipeliner/testdata/circRNA/human/GI1_N_ss.R2.fastq.gz +GI1_T /gpfs/gsfs10/users/CCBR_Pipeliner/testdata/circRNA/human/GI1_T_ss.R1.fastq.gz From 613fb617f1ed426fb8900f98e599ca0497a67cc0 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 15 Nov 2024 15:20:30 -0500 Subject: [PATCH 4/6] fix: add sleep to wait for file creation --- workflow/rules/findcircrna.smk | 1 + 1 file changed, 1 insertion(+) diff --git a/workflow/rules/findcircrna.smk b/workflow/rules/findcircrna.smk index e72dfb4..1b931b6 100644 --- a/workflow/rules/findcircrna.smk +++ b/workflow/rules/findcircrna.smk @@ -1104,6 +1104,7 @@ postProcessStarAlignment.pl \\ --starDir ${{starDir}}/ \\ --outDir ${{outDir}}/ +sleep 10 echo -ne "chr\\tstart\\tend\\tstrand\\tread_count\\n" > {output.ctf} awk -F"\\t" -v OFS="\\t" -v minreads={params.bsj_min_nreads} '{{if ($5>=minreads) {{print $1,$2,$3,$6,$5}}}}' {output.bed} >> {output.ctf} From 4ddadaf01ecc56bad06558b59930ab23f240d090 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 15 Nov 2024 15:25:27 -0500 Subject: [PATCH 5/6] chore: update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index be4042d..2069b49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Also use `python -E` to ensure the `$PYTHONPATH` is not carried over. (#129, @kelly-sovacool) - Fix `reconfig` to correctly replace variables in the config file. (#121, @kelly-sovacool) - Prevent using excessive memory when copying reference files. (#126, @kelly-sovacool) +- Fix missing output files due to file system latency. (#130, @kelly-sovacool) ## CHARLIE 0.11.0 From b95e4c842b152dd40292fdd4b89af06ded07127c Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 19 Nov 2024 10:13:39 -0500 Subject: [PATCH 6/6] chore: improve changelog description --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2069b49..a244c6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ - Also use `python -E` to ensure the `$PYTHONPATH` is not carried over. (#129, @kelly-sovacool) - Fix `reconfig` to correctly replace variables in the config file. (#121, @kelly-sovacool) - Prevent using excessive memory when copying reference files. (#126, @kelly-sovacool) -- Fix missing output files due to file system latency. (#130, @kelly-sovacool) +- Fix missing output files due to file system latency and use real (absolute) paths where possible. (#130, @kelly-sovacool) ## CHARLIE 0.11.0