diff --git a/LICENSE b/LICENSE index e3f3cbb..543054b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2017, GATK workflows +Copyright (c) 2017, Broad Institute All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/generic.google-papi.options.json b/generic.google-papi.options.json new file mode 100644 index 0000000..bd8cea6 --- /dev/null +++ b/generic.google-papi.options.json @@ -0,0 +1,6 @@ +{ + "read_from_cache":false, + "default_runtime_attributes": { + "zones": "us-central1-a us-central1-b us-central1-c us-central1-f" + } +} \ No newline at end of file diff --git a/validate-bam.inputs.json b/validate-bam.inputs.json new file mode 100644 index 0000000..05f5245 --- /dev/null +++ b/validate-bam.inputs.json @@ -0,0 +1,11 @@ +{ + "ValidateBamsWf.bam_list": [ + "gs://gatk-test-data/wgs_bam/NA12878_24RG_hg38/NA12878_24RG_small.hg38.bam", + "gs://gatk-test-data/wgs_bam/NA12878_24RG_hg38/NA12878_24RG_med.hg38.bam" + ], + + "ValidateBamsWf.ValidateBAM.validation_mode": "SUMMARY", + + "ValidateBamsWf.ValidateBAM.mem_size": "1 GB", + "ValidateBamsWf.ValidateBAM.disk_size": 100 +} diff --git a/validate-bam.wdl b/validate-bam.wdl new file mode 100644 index 0000000..3d2aca4 --- /dev/null +++ b/validate-bam.wdl @@ -0,0 +1,79 @@ +## Copyright Broad Institute, 2017 +## +## This WDL performs format validation on SAM/BAM files in a list +## +## Requirements/expectations : +## - List of SAM or BAM files to validate +## - Explicit request of either SUMMARY or VERBOSE mode in inputs.json +## +## Outputs: +## - Set of .txt files containing the validation reports, one per input file +## +## Cromwell version support +## - Successfully tested on v24 and v29 +## - Does not work on versions < v23 due to output syntax +## +## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. +## For program versions, see docker containers. +## +## LICENSING : +## This script is released under the WDL source code license (BSD-3) (see LICENSE in +## https://github.com/broadinstitute/wdl). Note however that the programs it calls may +## be subject to different licenses. Users are responsible for checking that they are +## authorized to run all programs before running this script. Please see the docker +## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed +## licensing information pertaining to the included programs. + +# WORKFLOW DEFINITION +workflow ValidateBamsWf { + Array[File] bam_list + + # Process the input files in parallel + scatter (input_bam in bam_list) { + + # Get the basename, i.e. strip the filepath and the extension + String bam_basename = basename(input_bam, ".bam") + + # Run the validation + call ValidateBAM { + input: + bam_file = input_bam, + output_basename = bam_basename + ".validation" + } + } + + # Outputs that will be retained when execution is complete + output { + Array[File] validation_reports = ValidateBAM.validation_report + } +} + +# TASK DEFINITIONS + +# Validate a SAM or BAM using Picard ValidateSamFile +task ValidateBAM { + File bam_file + String output_basename + String validation_mode + Int disk_size + String mem_size + + String output_name = "${output_basename}_${validation_mode}.txt" + + command { + java -Xmx3000m -jar /usr/gitc/picard.jar \ + ValidateSamFile \ + I=${bam_file} \ + OUTPUT=${output_name} \ + MODE=${validation_mode} + } + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.3-1469027018" + memory: mem_size + cpu: "1" + disks: "local-disk " + disk_size + " HDD" + } + output { + File validation_report = "${output_name}" + } +}