Skip to content

Commit

Permalink
Added tests to compare md5sums for VCF or SAM records while ignoring …
Browse files Browse the repository at this point in the history
…variable headers.
  • Loading branch information
williamrowell committed Jul 20, 2024
1 parent 5116f47 commit 2ec2e38
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 0 deletions.
49 changes: 49 additions & 0 deletions src/wdlci/wdl_tests/calculate_sam_record_md5sum.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
version 1.0

# Compare md5sums of SAM records in input files, e.g., BAMs or CRAMs while ignoring headers
# Input type: SAM/BAM/CRAM
task calculate_sam_record_md5sum {
input {
File current_run_output
File validated_output
}

Int disk_size = ceil(size(current_run_output, "GB") + size(validated_output, "GB") + 50)

command <<<
set -euo pipefail

err() {
message=$1

echo -e "[ERROR] $message" >&2
}

# Compare files
echo "Comparing SAM record md5sums"
current_run_md5sum=$(samtools view ~{current_run_output} | md5sum | cut -d ' ' -f 1)
validated_output_md5sum=$(samtools view ~{validated_output} | md5sum | cut -d ' ' -f 1)

if [[ "$current_run_md5sum" != "$validated_output_md5sum" ]]; then
err "SAM record md5sums did not match:
Expected md5sum: [$validated_output_md5sum]
Current run md5sum: [$current_run_md5sum]"
exit 1
else
echo "SAM record md5sums matched for file [~{basename(validated_output)}]"
fi
>>>

output {
}

runtime {
docker: "dnastack/dnastack-wdl-ci-tools:0.0.1"
cpu: 1
memory: "3.75 GB"
disk: disk_size + " GB"
disks: "local-disk " + disk_size + " HDD"
preemptible: 1
}
}
49 changes: 49 additions & 0 deletions src/wdlci/wdl_tests/calculate_vcf_record_md5sum.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
version 1.0

# Compare md5sums of VCF records in input files, ignoring headers
# Input type: VCF/BCF/VCF.gz/BCF.gz
task calculate_vcf_record_md5sum {
input {
File current_run_output
File validated_output
}

Int disk_size = ceil(size(current_run_output, "GB") + size(validated_output, "GB") + 50)

command <<<
set -euo pipefail

err() {
message=$1

echo -e "[ERROR] $message" >&2
}

# Compare files
echo "Comparing VCF record md5sums"
current_run_md5sum=$(bcftools view -H ~{current_run_output} | md5sum | cut -d ' ' -f 1)
validated_output_md5sum=$(bcftools view -H ~{validated_output} | md5sum | cut -d ' ' -f 1)

if [[ "$current_run_md5sum" != "$validated_output_md5sum" ]]; then
err "VCF record md5sums did not match:
Expected md5sum: [$validated_output_md5sum]
Current run md5sum: [$current_run_md5sum]"
exit 1
else
echo "VCF record md5sums matched for file [~{basename(validated_output)}]"
fi
>>>

output {
}

runtime {
docker: "dnastack/dnastack-wdl-ci-tools:0.0.1"
cpu: 1
memory: "3.75 GB"
disk: disk_size + " GB"
disks: "local-disk " + disk_size + " HDD"
preemptible: 1
}
}

0 comments on commit 2ec2e38

Please sign in to comment.