-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Make alignment cleaning optional * updates * Update test workflow * re-working testing pipeline * fix workflow structure * update nextflow installation script * updates * update * add conda enabled to config for compatibility * add missing script * updates * Collect outputs when running pipeline * fix sample_id in test
- Loading branch information
Showing
34 changed files
with
286 additions
and
1,129 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
NC000962.3,.github/data/refs/NC_000962.3.fa |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
name: art | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- art=2016.06.05 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
name: check-outputs | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- python=3 | ||
- jsonschema=4.20.0 | ||
- pyyaml=6.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import csv | ||
import glob | ||
import json | ||
import os | ||
import urllib.request | ||
|
||
from jsonschema import validate | ||
import yaml | ||
|
||
|
||
def check_provenance_format_valid(provenance_files, schema): | ||
""" | ||
Check that the provenance files are valid according to the schema. | ||
""" | ||
for provenance_file in provenance_files: | ||
with open(provenance_file) as f: | ||
try: | ||
provenance = yaml.load(f, Loader=yaml.BaseLoader) | ||
validate(provenance, schema) | ||
except Exception as e: | ||
print(f"Error validating {provenance_file}: {e}") | ||
exit(1) | ||
return False | ||
|
||
return True | ||
|
||
def check_expected_files_exist(output_dir, sample_ids): | ||
""" | ||
Check that the expected files exist in the output directory. | ||
:param output_dir: Path to the output directory | ||
:param sample_ids: List of sample IDs | ||
:return: True if all expected files exist, False otherwise | ||
:rtype: bool | ||
""" | ||
for sample_id in sample_ids: | ||
expected_files = [ | ||
f"{sample_id}/{sample_id}_fastp.csv", | ||
f"{sample_id}/{sample_id}_fastp.json", | ||
f"{sample_id}/{sample_id}_short.bam", | ||
f"{sample_id}/{sample_id}_short.bam.bai", | ||
f"{sample_id}/{sample_id}_short_combined_alignment_qc.csv", | ||
f"{sample_id}/{sample_id}_short_depths.tsv", | ||
f"{sample_id}/{sample_id}_short_freebayes.vcf", | ||
f"{sample_id}/{sample_id}_short_low_coverage_regions.bed", | ||
] | ||
|
||
for expected_file in expected_files: | ||
expected_file_path = os.path.join(output_dir, expected_file) | ||
if not os.path.exists(expected_file_path): | ||
print(f"Expected file {expected_file_path} not found") | ||
return False | ||
|
||
return True | ||
|
||
|
||
def main(args): | ||
|
||
output_dir = os.path.dirname(args.output) | ||
os.makedirs(output_dir, exist_ok=True) | ||
|
||
provenance_schema_url = "https://raw.githubusercontent.com/BCCDC-PHL/pipeline-provenance-schema/main/schema/pipeline-provenance.json" | ||
provenance_schema_path = ".github/data/pipeline-provenance.json" | ||
urllib.request.urlretrieve(provenance_schema_url, provenance_schema_path) | ||
|
||
provenance_schema = None | ||
with open(provenance_schema_path) as f: | ||
provenance_schema = json.load(f) | ||
|
||
provenace_files_glob = f"{args.pipeline_outdir}/**/*_provenance.yml" | ||
provenance_files = glob.glob(provenace_files_glob, recursive=True) | ||
|
||
sample_ids = [os.path.basename(provenance_file).split("_")[0] for provenance_file in provenance_files] | ||
|
||
# TODO: Add more tests | ||
tests = [ | ||
{ | ||
"test_name": "provenance_format_valid", | ||
"test_passed": check_provenance_format_valid(provenance_files, provenance_schema), | ||
}, | ||
{ | ||
"test_name": "all_expected_files_exist", | ||
"test_passed": check_expected_files_exist(args.pipeline_outdir, sample_ids), | ||
}, | ||
] | ||
|
||
output_fields = [ | ||
"test_name", | ||
"test_result" | ||
] | ||
|
||
output_path = args.output | ||
with open(output_path, 'w') as f: | ||
writer = csv.DictWriter(f, fieldnames=output_fields, extrasaction='ignore') | ||
writer.writeheader() | ||
for test in tests: | ||
if test["test_passed"]: | ||
test["test_result"] = "PASS" | ||
else: | ||
test["test_result"] = "FAIL" | ||
writer.writerow(test) | ||
|
||
for test in tests: | ||
if not test['test_passed']: | ||
exit(1) | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser(description='Check outputs') | ||
parser.add_argument('--pipeline-outdir', type=str, help='Path to the pipeline output directory') | ||
parser.add_argument('-o', '--output', type=str, help='Path to the output file') | ||
args = parser.parse_args() | ||
main(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -e -o pipefail | ||
|
||
source ${HOME}/.bashrc | ||
|
||
eval "$(conda shell.bash hook)" | ||
|
||
conda activate check-outputs | ||
|
||
|
||
.github/scripts/check_outputs.py --pipeline-outdir .github/data/test_output -o artifacts/check_outputs_results.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
conda env create -f .github/environments/art.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
conda env create -f .github/environments/check-outputs.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/bin/bash | ||
|
||
mkdir -p .github/data/refs | ||
|
||
curl -o .github/data/refs/NC_000962.3.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=text&id=NC_000962.3&db=nucleotide&rettype=fasta" | ||
curl -o .github/data/refs/NC_002973.6.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=text&id=NC_002973.6&db=nucleotide&rettype=fasta" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,11 @@ | ||
#!/bin/bash | ||
set -eo pipefail | ||
|
||
echo Install Nextflow .. >> artifacts/test_artifact.log | ||
set -eo pipefail | ||
|
||
wget -qO- https://get.nextflow.io | bash | ||
artifacts_dir="artifacts" | ||
|
||
mkdir -p /opt/nextflow/bin | ||
echo Install Nextflow .. >> ${artifacts_dir}/test.log | ||
|
||
mv nextflow /opt/nextflow/bin | ||
wget -qO- https://get.nextflow.io | bash | ||
|
||
echo "export PATH=/opt/nextflow/bin:$PATH" >> ~/.bashrc | ||
sudo mv nextflow /usr/local/bin/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
|
||
artifacts_dir="artifacts" | ||
|
||
echo "Prepare artifacts .." >> ${artifacts_dir}/test.log | ||
|
||
mkdir -p ${artifacts_dir}/fastq | ||
|
||
mv .github/data/fastq/*.fastq.gz ${artifacts_dir}/fastq | ||
|
||
mkdir -p ${artifacts_dir}/pipeline_outputs | ||
|
||
mv .github/data/test_output/* ${artifacts_dir}/pipeline_outputs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/bin/bash | ||
|
||
set -eo pipefail | ||
|
||
sed -i 's/cpus = 8/cpus = 4/g' nextflow.config | ||
sed -i 's/cpus = 12/cpus = 4/g' nextflow.config | ||
sed -i 's/cpus = 16/cpus = 4/g' nextflow.config | ||
sed -i 's/cpus = 24/cpus = 4/g' nextflow.config | ||
sed -i 's/cpus = 24/cpus = 4/g' nextflow.config | ||
sed -i "s/memory = '36G'/memory = '2G'/g" nextflow.config | ||
|
||
nextflow run main.nf \ | ||
-profile conda \ | ||
--cache ${HOME}/.conda/envs \ | ||
--fastq_input .github/data/fastq \ | ||
--outdir .github/data/test_output \ | ||
--min_depth 5 \ | ||
--ref .github/data/refs/NC_000962.3.fa \ | ||
--collect_outputs \ | ||
-with-report .github/data/test_output/nextflow_report.html \ | ||
-with-trace .github/data/test_output/nextflow_trace.tsv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/bin/bash | ||
|
||
|
||
source ${HOME}/.bashrc | ||
|
||
eval "$(conda shell.bash hook)" | ||
|
||
conda activate art | ||
|
||
mkdir -p .github/data/fastq | ||
|
||
while IFS=',' read -r sample_id assembly; do | ||
art_illumina \ | ||
--paired \ | ||
--in ${assembly} \ | ||
--fcov 12 \ | ||
--len 150 \ | ||
--mflen 400 \ | ||
--sdev 100 \ | ||
--rndSeed 42 \ | ||
--qShift 0 \ | ||
--qShift2 0 \ | ||
--out .github/data/fastq/${sample_id}_R | ||
|
||
rm -f .github/data/fastq/${sample_id}_R1.aln | ||
rm -f .github/data/fastq/${sample_id}_R2.aln | ||
|
||
mv .github/data/fastq/${sample_id}_R1.fq .github/data/fastq/${sample_id}_R1.fastq | ||
mv .github/data/fastq/${sample_id}_R2.fq .github/data/fastq/${sample_id}_R2.fastq | ||
|
||
gzip -f .github/data/fastq/${sample_id}_R1.fastq | ||
gzip -f .github/data/fastq/${sample_id}_R2.fastq | ||
|
||
done < .github/data/reads_to_simulate.csv | ||
|
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.