From aeceeaac627dd1a7159182d43caa81fdfb2a4d5d Mon Sep 17 00:00:00 2001 From: "Martin O. Pollard" Date: Tue, 22 Aug 2023 15:46:29 +0100 Subject: [PATCH] Create full test without GATK GenomicsDBImport which is not supported on ARM --- setup/config_files/full_test_arm.yml | 141 +++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 setup/config_files/full_test_arm.yml diff --git a/setup/config_files/full_test_arm.yml b/setup/config_files/full_test_arm.yml new file mode 100644 index 0000000..6380282 --- /dev/null +++ b/setup/config_files/full_test_arm.yml @@ -0,0 +1,141 @@ +--- +- general_settings: + install_dir: "/data/tools/" +- type: cpu + benchmarks: + - type: multithreaded + settings: + tag: bwa + command: "${path_to_program} mem -t ${threads} -K 100000000 -o ${result_path}/output.${repeatn}.${processn}.sam ${input_datapath}/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set.fna ${input_datapath}/RMNISTHS_3xdownsample.fastq" + program: "bwa" + programversion: "0.7.17+139f68f" + shell: False + dataset_file: "/benchmarking/setup/bwa_data.txt" + datadir: "/data/datasets/bwa/" + result_dir: "/data/results/runs/" + repeats: 3 + units: 3x WGS + process_thread: + - processes: 2 + threads: $N/2 + - processes: 4 + threads: $N/4 + - processes: 1 + threads: range(2,$N,4) + clear_caches: True + - type: multithreaded + settings: + tag: bwa-mem2 + command: "${path_to_program} mem -t ${threads} -K 100000000 -o ${result_path}/output.${repeatn}.${processn}.sam ${input_datapath}/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set.fna ${input_datapath}/RMNISTHS_3xdownsample.fastq" + program: "bwa-mem2" + programversion: "2.2.1+simde_benchmarking" + shell: False + dataset_file: "/benchmarking/setup/bwa_data.txt" + datadir: "/data/datasets/bwa/" + result_dir: "/data/results/runs/" + repeats: 3 + units: 3x WGS + process_thread: + - processes: 2 + threads: $N/2 + - processes: 4 + threads: $N/4 + - processes: 1 + threads: range(2,$N,4) + clear_caches: True + - type: multithreaded + settings: + tag: salmon + command: "${path_to_program} quant --threads ${threads} -i ${input_datapath}/referenceGenome_index4/ -l A -1 ${input_datapath}/SRR10103759/C1_1.fq.gz -2 ${input_datapath}/SRR10103759/C1_2.fq.gz -o ${result_path}output.${repeatn}.${processn}.quant.sf" + program: "salmon" + programversion: "1.10.1" + shell: False + dataset_file: + file: "/benchmarking/setup/salmon_input_data.txt" + dest: "SRR10103759" + dataset_tag: "default" + datadir: "/data/datasets/salmon/" + result_dir: "/data/results/runs/" + repeats: 3 + units: run + process_thread: + - processes: 1 + threads: $N + - processes: 2 + threads: $N/2 + - processes: 4 + threads: $N/4 + clear_caches: True + - type: mbw + settings: + program: "mbw" + programversion: "1.4a" + arguments: "512MiB" + - type: multithreaded + settings: + tag: bcftools-1.15.1 + command: "${path_to_program} mpileup --threads ${threads} -Ou -f ${input_datapath}/b37_human_decoy_reference.fasta ${input_datapath}/RMNISTHS_30xdownsample_chr22.cram | ${install_path}bcftools call --threads ${threads} -mv -o ${result_path}output.${repeatn}.${processn}.vcf.gz" + program: "bcftools" + programversion: "1.15.1" + shell: True + dataset_file: "/benchmarking/setup/variantcaller_data.txt" + datadir: "/data/datasets/variantcaller/" + result_dir: "/data/results/runs/" + repeats: 3 + units: Chr22 @ 30x from WGS + process_thread: + - processes: 1 + threads: $N + - processes: 2 + threads: $N/2 + - processes: 4 + threads: $N/4 + clear_caches: True + - type: multithreaded + settings: + tag: minimap2 + command: "${path_to_program} -x ava-pb -t ${threads} ${input_datapath}/pacbio_filtered.fastq ${input_datapath}/pacbio_filtered.fastq | gzip -1 > ${result_path}output.${repeatn}.${processn}.paf.gz" + program: "minimap2" + programversion: "2.26+24d0af6" + shell: True + dataset_file: "/benchmarking/setup/minimapasm_data.txt" + datadir: "/data/datasets/minimapasm/" + result_dir: "/data/results/runs/" + repeats: 3 + units: random subset of 30X E.coli from a PacBio RS II P5-C3 single SMRTcell + process_thread: + - processes: 2 + threads: $N/2 + - processes: 1 + threads: $N + - processes: 4 + threads: $N/4 + clear_caches: True + - type: multithreaded + settings: + tag: miniasm + command: "${path_to_program} -f ${input_datapath}/pacbio_filtered.fastq ${input_datapath}/pacbio_filtered.paf.gz > ${result_path}output.${repeatn}.${processn}.gfa" + program: "miniasm" + programversion: "0.3+ce615d1" + shell: True + dataset_file: "/benchmarking/setup/minimapasm_data.txt" + datadir: "/data/datasets/minimapasm/" + result_dir: "/data/results/runs/" + repeats: 3 + units: random subset of 30X E.coli from a PacBio RS II P5-C3 single SMRTcell + process_thread: + - processes: 2 + threads: 1 + - processes: 1 + threads: 1 + - processes: 4 + threads: 1 + clear_caches: True +- type: disk + benchmarks: + - type: iozone + settings: + program: "iozone" + programversion: "3.492" + arguments: "-a" + result_dir: "/data/results/runs"