From e755a9ca4a1e675fc0a3ce82b0d3cf15d3d26785 Mon Sep 17 00:00:00 2001 From: geneticsjesse Date: Mon, 25 Nov 2024 16:35:11 -0500 Subject: [PATCH 1/5] Add test to compare arrays of arrays of strings --- .../wdl_tests/compare_array_array_strings.wdl | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 src/wdlci/wdl_tests/compare_array_array_strings.wdl diff --git a/src/wdlci/wdl_tests/compare_array_array_strings.wdl b/src/wdlci/wdl_tests/compare_array_array_strings.wdl new file mode 100644 index 0000000..722c99f --- /dev/null +++ b/src/wdlci/wdl_tests/compare_array_array_strings.wdl @@ -0,0 +1,51 @@ +version 1.0 + +# Validate and compare input Array[Array[String]] types +# Input type: Array[Array[String]] + +task validate_and_compare_pbsv_splits { + input { + Array[Array[String]] current_run_output + Array[Array[String]] validated_output + } + + Int disk_size = 10 + + File current_lines_file = write_lines(flatten(current_run_output)) + File validated_lines_file = write_lines(flatten(validated_output)) + + Array[String] current_lines = flatten(current_run_output) + Array[String] validated_lines = flatten(validated_output) + + command <<< + set -euo pipefail + + err() { + message=$1 + echo -e "[ERROR] $message" >&2 + } + + # Compare the flattened arrays + if ! diff -q "~{current_lines_file}" "~{validated_lines_file}"; then + err "Flattened arrays are not identical. Differences found: + Expected output: [~{sep="," current_lines}] + Current run output: [~{sep="," validated_lines}]" + exit 1 + else + echo "Flattened arrays matched: ~{sep="," validated_lines}" + fi + + >>> + + output { + } + + runtime { + docker: "ubuntu:xenial" + cpu: 1 + memory: "3.75 GB" + disk: disk_size + " GB" + disks: "local-disk " + disk_size + " HDD" + preemptible: 1 + } +} \ No newline at end of file From 4b659c783f21e49f186746f0500e6e2d77d8e782 Mon Sep 17 00:00:00 2001 From: geneticsjesse Date: Mon, 25 Nov 2024 16:50:39 -0500 Subject: [PATCH 2/5] Expand on test objective --- src/wdlci/wdl_tests/compare_array_array_strings.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wdlci/wdl_tests/compare_array_array_strings.wdl b/src/wdlci/wdl_tests/compare_array_array_strings.wdl index 722c99f..7b66b73 100644 --- a/src/wdlci/wdl_tests/compare_array_array_strings.wdl +++ b/src/wdlci/wdl_tests/compare_array_array_strings.wdl @@ -1,6 +1,6 @@ version 1.0 -# Validate and compare input Array[Array[String]] types +# Check if two arrays of arrays of strings are identical by comparing the flattened arrays # Input type: Array[Array[String]] task validate_and_compare_pbsv_splits { From 750e36ca518ab0697c512b815ab65f1bf094bf4f Mon Sep 17 00:00:00 2001 From: geneticsjesse Date: Wed, 4 Dec 2024 19:47:32 -0500 Subject: [PATCH 3/5] Compare using diff and write_tsv; avoid flattening --- .../wdl_tests/compare_array_array_strings.wdl | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/wdlci/wdl_tests/compare_array_array_strings.wdl b/src/wdlci/wdl_tests/compare_array_array_strings.wdl index 7b66b73..422d757 100644 --- a/src/wdlci/wdl_tests/compare_array_array_strings.wdl +++ b/src/wdlci/wdl_tests/compare_array_array_strings.wdl @@ -1,9 +1,9 @@ version 1.0 -# Check if two arrays of arrays of strings are identical by comparing the flattened arrays +# Check if two arrays of arrays of strings are identical by writing a TSV file of the data structure # Input type: Array[Array[String]] -task validate_and_compare_pbsv_splits { +task compare_array_array_strings { input { Array[Array[String]] current_run_output Array[Array[String]] validated_output @@ -11,12 +11,6 @@ task validate_and_compare_pbsv_splits { Int disk_size = 10 - File current_lines_file = write_lines(flatten(current_run_output)) - File validated_lines_file = write_lines(flatten(validated_output)) - - Array[String] current_lines = flatten(current_run_output) - Array[String] validated_lines = flatten(validated_output) - command <<< set -euo pipefail @@ -25,16 +19,12 @@ task validate_and_compare_pbsv_splits { echo -e "[ERROR] $message" >&2 } - # Compare the flattened arrays - if ! diff -q "~{current_lines_file}" "~{validated_lines_file}"; then - err "Flattened arrays are not identical. Differences found: - Expected output: [~{sep="," current_lines}] - Current run output: [~{sep="," validated_lines}]" - exit 1 + if diff -q ~{write_tsv(current_run_output)} ~{write_tsv(validated_output)}; then + echo "Nested array of strings are identical." else - echo "Flattened arrays matched: ~{sep="," validated_lines}" + err "Nested array of strings not identical." + exit 1 fi - >>> output { From e820e86e5ed6199bee968e5b6efc09b6e84f3288 Mon Sep 17 00:00:00 2001 From: geneticsjesse Date: Thu, 5 Dec 2024 07:40:19 -0500 Subject: [PATCH 4/5] Rename test --- ...ay_array_strings.wdl => compare_nested_array_of_strings.wdl} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/wdlci/wdl_tests/{compare_array_array_strings.wdl => compare_nested_array_of_strings.wdl} (89%) diff --git a/src/wdlci/wdl_tests/compare_array_array_strings.wdl b/src/wdlci/wdl_tests/compare_nested_array_of_strings.wdl similarity index 89% rename from src/wdlci/wdl_tests/compare_array_array_strings.wdl rename to src/wdlci/wdl_tests/compare_nested_array_of_strings.wdl index 422d757..503e7ca 100644 --- a/src/wdlci/wdl_tests/compare_array_array_strings.wdl +++ b/src/wdlci/wdl_tests/compare_nested_array_of_strings.wdl @@ -1,6 +1,6 @@ version 1.0 -# Check if two arrays of arrays of strings are identical by writing a TSV file of the data structure +# Check if two nested arrays of strings are identical by writing a TSV file of the data structure # Input type: Array[Array[String]] task compare_array_array_strings { From c8ae948f8cce9497cc19b7f9ec7fe204a3bca87d Mon Sep 17 00:00:00 2001 From: geneticsjesse Date: Thu, 5 Dec 2024 09:14:22 -0500 Subject: [PATCH 5/5] Add additional check for length of flattened nested arrays --- .../compare_nested_array_of_strings.wdl | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/wdlci/wdl_tests/compare_nested_array_of_strings.wdl b/src/wdlci/wdl_tests/compare_nested_array_of_strings.wdl index 503e7ca..45e40f4 100644 --- a/src/wdlci/wdl_tests/compare_nested_array_of_strings.wdl +++ b/src/wdlci/wdl_tests/compare_nested_array_of_strings.wdl @@ -11,6 +11,9 @@ task compare_array_array_strings { Int disk_size = 10 + Int current_run_output_length = length(flatten(current_run_output)) + Int validated_output_length = length(flatten(validated_output)) + command <<< set -euo pipefail @@ -19,11 +22,18 @@ task compare_array_array_strings { echo -e "[ERROR] $message" >&2 } - if diff -q ~{write_tsv(current_run_output)} ~{write_tsv(validated_output)}; then - echo "Nested array of strings are identical." - else - err "Nested array of strings not identical." + if [[ ~{current_run_output_length} != ~{validated_output_length} ]]; then + err "Nested array of strings have different flattened lengths. + Current run output length: [~{current_run_output_length}] + Validated output length: [~{validated_output_length}]" exit 1 + else + if diff -q ~{write_tsv(current_run_output)} ~{write_tsv(validated_output)}; then + echo "Nested array of strings are identical." + else + err "Nested array of strings are of the same length but are not identical." + exit 1 + fi fi >>>