Skip to content

Commit

Permalink
Merge branch 'telatin-tests'
Browse files Browse the repository at this point in the history
  • Loading branch information
lskatz committed Feb 5, 2024
2 parents 444058f + dd076c1 commit 4201a65
Show file tree
Hide file tree
Showing 14 changed files with 216 additions and 8 deletions.
Binary file modified paper/sample.json.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 7 additions & 0 deletions tests/10_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ set -e
thisDir=$(dirname $0);
export PATH=$thisDir/../target/release:$PATH

# Check whether hyperfine is installed
if ! command -v hyperfine &> /dev/null
then
echo "hyperfine could not be found. It is required for benchmarking."
exit
fi

# Hyperfine parameters
# Locally, just run one time per test but in the cloud, boost it to ten
num_runs=10
Expand Down
5 changes: 3 additions & 2 deletions tests/benchmark_sample.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ export PATH=$thisDir/../target/release:$PATH
hyperfine --export-json=$reportsDir/sample.json --warmup 2 --shell $SHELL --runs $num_runs \
-n "Fasten sample" "cat $large_R1 | fasten_sample --frequency 0.1" \
-n "seqkit sample" "cat $large_R1 | seqkit sample --proportion 0.1" \
-n "Seqtk sample" "seqtk seq -f 0.1 $large_R1";
-n "Seqtk sample" "seqtk seq -f 0.1 $large_R1" \
-n "Seqfu sample" "seqfu cat --skip 10 $large_R1";

plot_whisker.py --title "subsample reads (reps=$num_runs)" --labels "fasten sample,seqkit sample,seqtk sample" --output $reportsDir/sample.json.png $reportsDir/sample.json
plot_whisker.py --title "subsample reads (reps=$num_runs)" --labels "fasten sample,seqkit sample,seqtk sample,seqfu cat" --output $reportsDir/sample.json.png $reportsDir/sample.json

6 changes: 3 additions & 3 deletions tests/fasten_combine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ if [ "$reads_not_collapsed" != "$original_reads" ]; then
fi

reads_collapsed=$(cat $INPUT $INPUT $INPUT | ./target/debug/fasten_combine | ./target/debug/fasten_metrics --each-read)
total_quals=$(echo "$reads_collapsed" | cut -f 3 | tail -n +2 | paste -sd+ | bc -l)
total_quals=$(echo "$reads_collapsed" | cut -f 3 | tail -n +2 | awk '{if(NR>1) printf "+"; printf $1} END{print "\n";}' | bc -l)
if [ "$total_quals" != "259.31" ]; then
echo "Test failed for total expected quality when collapsing three sets of reads"
exit 1
fi

pe_collapsed=$(cat $INPUT $INPUT $INPUT | ./target/debug/fasten_combine --paired-end | ./target/debug/fasten_metrics --each-read)
IDs=$(echo "$pe_collapsed" | cut -f 1 | tail -n +2 | paste -sd+)
pe_quals=$(echo "$pe_collapsed" | cut -f 3 | tail -n +2 | paste -sd+ | bc -l)
IDs=$(echo "$pe_collapsed" | cut -f 1 | tail -n +2 | awk '{if(NR>1) printf "+"; printf $1}')
pe_quals=$(echo "$pe_collapsed" | cut -f 3 | tail -n +2 | awk '{if(NR>1) printf "+"; printf $1} END{print "\n";}' | bc -l)
if [ "$IDs" != "1/1+1/2+2/1+2/2+3/1+3/2+4/1+4/2" ]; then
echo "Test failed for total expected quality when collapsing three sets of reads using --paired-end"
exit 1
Expand Down
23 changes: 23 additions & 0 deletions tests/fasten_convert.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash
# Minimal test suite for fasten (telatin 2024)

THIS_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
THIS_SCRIPT_NAME=$(basename "$0" | sed 's/\.sh//g')
source "${THIS_SCRIPT_DIR}/test_functions.sh"

IN_FILE="${THIS_SCRIPT_DIR}/../testdata/four_reads.fastq"

"$BIN" --out-format FASTA < "$IN_FILE" > "$TEST_TMP_FILE"
FASTA_COUNT=$(grep -c ">" "$TEST_TMP_FILE")
FASTQ_COUNT=$(grep -c "^@" "$TEST_TMP_FILE")
equal "$FASTA_COUNT" "4" "Testing that the output is in FASTA format"
equal "$FASTQ_COUNT" "0" "Testing that the output is in not FASTQ format"

"$BIN" --out-format FASTQ --in-format FASTA < "$TEST_TMP_FILE" > "$TEST_TMP_FILE.2"

FASTA_COUNT=$(grep -c "^>" "$TEST_TMP_FILE.2")
FASTQ_COUNT=$(grep -c "^@r" "$TEST_TMP_FILE.2")

equal "$FASTQ_COUNT" "4" "Testing that the output is in not FASTA format"
equal "$FASTQ_COUNT" "4" "Testing that the output is in FASTQ format"
rm "$TEST_TMP_FILE.2"
6 changes: 6 additions & 0 deletions tests/fasten_inspect.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env bash
# Minimal test suite for fasten (telatin 2024)

THIS_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
THIS_SCRIPT_NAME=$(basename "$0" | sed 's/\.sh//g')
source "${THIS_SCRIPT_DIR}/test_functions.sh"
18 changes: 18 additions & 0 deletions tests/fasten_mutate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash
# Minimal test suite for fasten mutate (telatin 2024)

THIS_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
THIS_SCRIPT_NAME=$(basename "$0" | sed 's/\.sh//g')
source "${THIS_SCRIPT_DIR}/test_functions.sh"

INFILE="${THIS_SCRIPT_DIR}/../testdata/four_reads.fastq"


"$BIN" --snps 1 < "$INFILE" > "$TEST_TMP_FILE"

MD5_IN_EXPECTED="8a08ae75226dfacd60f6fe2a1000f100"
MD5=$(getmd5 "$TEST_TMP_FILE" | cut -f 1 -d " ")
MD5_IN=$(getmd5 "$INFILE" | cut -f 1 -d " ")

equal "$MD5_IN" "$MD5_IN_EXPECTED" "Testing that the input file wasnt changed"
different "$MD5" "$MD5_IN" "Testing that the output is different from the input"
6 changes: 6 additions & 0 deletions tests/fasten_normalize.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env bash
# Minimal test suite for fasten normalise (telatin 2024)

THIS_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
THIS_SCRIPT_NAME=$(basename "$0" | sed 's/\.sh//g')
source "${THIS_SCRIPT_DIR}/test_functions.sh"
39 changes: 39 additions & 0 deletions tests/fasten_progress.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Minimal test suite for fasten progress (telatin 2024)

THIS_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
THIS_SCRIPT_NAME=$(basename "$0" | sed 's/\.sh//g')
source "${THIS_SCRIPT_DIR}/test_functions.sh"

GOT_PERL=$(which perl)

if [ -z "$GOT_PERL" ]; then
echo "Perl not found, skipping test"
exit 0
fi


## Here we test that STDOUT is passed using --print

# shellcheck disable=SC2016
TOT=$("$GOT_PERL" -e 'my $c=0;for (1..1000) {
$c++;
print "\@fasten_test$c\nAAA\n+\nIII\n";
sleep 0.1;
}' | "$BIN" --id "test-suite" --print | grep -c 'fasten_test' | grep -w 1000)

equal "$TOT" "1000" "Testing sort order of 1000 reads"


## Here we test the final message
# shellcheck disable=SC2016
"$GOT_PERL" -e 'my $c=0;for (1..1000) {
$c++;
print "\@fasten_test$c\nAAA\n+\nIII\n";
sleep 0.1;
}' | "$BIN" --id "test-suite" 2> "$TEST_TMP_FILE"

END=$(grep "Finished" "$TEST_TMP_FILE" | cut -f 3 -d ":")
equal "$END" " Finished progress on 4000 reads" "Testing progress output"

done_testing
4 changes: 2 additions & 2 deletions tests/fasten_regex.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ if [ "$(wc -l <<< "$pe_filtered")" -ne 24 ]; then
exit 1
fi

if [ "$(echo "$pe_filtered" | ./target/debug/fasten_metrics --each-read | tail -n +2 | cut -f 1 | paste -sd+)" != "read0/1+read0/2+read1/1+read1/2+read2/1+read2/2" ]; then
if [ "$(echo "$pe_filtered" | ./target/debug/fasten_metrics --each-read | tail -n +2 | cut -f 1 | awk '{if(NR>1) printf "+"; printf $1}' )" != "read0/1+read0/2+read1/1+read1/2+read2/1+read2/2" ]; then
echo "ERROR filtering for the right read names"
exit 1
fi

if [ "$(echo "$r1_filtered" | ./target/debug/fasten_regex --regex read1 --which ID | ./target/debug/fasten_metrics --each-read | tail -n +2 | cut -f 1 | paste -sd+)" != "read1/1+read1/2" ]; then
if [ "$(echo "$r1_filtered" | ./target/debug/fasten_regex --regex read1 --which ID | ./target/debug/fasten_metrics --each-read | tail -n +2 | cut -f 1 | awk '{if(NR>1) printf "+"; printf $1}' )" != "read1/1+read1/2" ]; then
echo "ERROR running regex on IDs for read1"
exit 1
fi
Expand Down
12 changes: 12 additions & 0 deletions tests/fasten_sort.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash
# Minimal test suite for fasten sort (telatin 2024)

THIS_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
THIS_SCRIPT_NAME=$(basename "$0" | sed 's/\.sh//g')
source "${THIS_SCRIPT_DIR}/test_functions.sh"

# Test fasten_sort with two reads
FIRST=$(echo -e "@ciao\nAAA\n+\nIII\n@andrea\nCCC\n+\nEEE" | $BIN | head -n 1)
equal "$FIRST" "@andrea" "Testing sort order of two reads"

done_testing
2 changes: 1 addition & 1 deletion tests/fasten_trim.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ if [ "$reads_not_trimmed" != "$original_reads" ]; then
exit 1
fi

onebase=$(./target/debug/fasten_trim --first-base 3 --last-base 4 < testdata/four_reads.pe.fastq | perl -lane 'print if($i++ % 4 == 1);' | paste -sd'_')
onebase=$(./target/debug/fasten_trim --first-base 3 --last-base 4 < testdata/four_reads.pe.fastq | perl -lane 'print if($i++ % 4 == 1);' |awk 'NR > 1 { printf "_"; } { printf $1; } END { printf "\n"; }')
shouldbe="T_T_G_A_C_A_C_A"
if [ "$onebase" != "$shouldbe" ]; then
echo "ERROR trimming to the third base"
Expand Down
5 changes: 5 additions & 0 deletions tests/lib/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,17 @@ zcat $large_interleaved | fasten_sort --sort-by GC --paired-end | gzip -c > $lar

which bbnorm.sh
which fasten_clean
which seqfu

# Version information
seqtk 2>&1 | grep -i version | sed 's/^/seqtk /'
seqkit version | grep -m 1 v
fasten_clean --version
fastq_to_fasta -h | grep "Part of FASTX"
bbnorm.sh version 2>&1 | grep 'BBMap version'
seqfu --version

# hyperfine
which hyperfine
which plot_whisker.py || echo "WARNING: plot_whisker.py from hyperfine not found in path: will not be able to plot graphs."

91 changes: 91 additions & 0 deletions tests/test_functions.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env bash

# This should be sourced by other test scripts: die if not
if [ -z "$THIS_SCRIPT_DIR" ]; then
echo "ERROR: test_functions.sh should be sourced by other test scripts"
exit 1
fi
TEST_TMP_FILE=$(mktemp)
NUM=0
FAIL=0
RED='\033[0;31m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color
BIN=$(readlink -f "${THIS_SCRIPT_DIR}/../target/release/${THIS_SCRIPT_NAME}")
DEB_BIN=$(readlink -f "${THIS_SCRIPT_DIR}/../target/debug/${THIS_SCRIPT_NAME}")


echo -e " *** ${GREEN}Testing $THIS_SCRIPT_NAME${NC} (test: $TEST_TMP_FILE)*** "



function test {
NUM=$((NUM+1))
local msg="$1"
local condition=$2
if [ "$condition" ]; then
echo -e "${GREEN}OK${NC}\t$NUM: $msg"
else
FAIL=$((FAIL+1))
echo -e "${RED}FAIL${NC}\t$NUM: $msg"
fi
}

function equal {
local got="$1"
local expected="$2"
local msg="$3"
NUM=$((NUM+1))
if [ "$got" == "$expected" ]; then
echo -e "${GREEN}OK${NC}\t$NUM: $msg [$got]"
else
FAIL=$((FAIL+1))
echo -e "${RED}FAIL${NC}\t$NUM: $msg"
echo -e "\tGot: $got"
echo -e "\tExpected: $expected"
fi
}

function different {
local got="$1"
local expected="$2"
local msg="$3"
NUM=$((NUM+1))
if [ "$got" != "$expected" ]; then
echo -e "${GREEN}OK${NC}\t$NUM: $msg [$got != $expected]"
else
FAIL=$((FAIL+1))
echo -e "${RED}FAIL${NC}\t$NUM: $msg"
echo -e "\tGot: $got"
echo -e "\tequals to: $expected"
fi
}

function getmd5 {
# use md5sum on Linux, md5 on OSX
if [ "$(uname)" == "Darwin" ]; then
md5 -q "$1"
else
md5sum "$1" | cut -f 1 -d " "
fi
}
function done_testing {
if [ -e "$TEST_TMP_FILE" ]; then
rm "$TEST_TMP_FILE"
fi
if [ "$FAIL" -eq 0 ]; then
echo -e "${GREEN}OK!${NC}\tAll $NUM tests passed${NC}"
exit 0
else
echo -e "${RED}$FAIL/$NUM errors${NC}\ttests failed${NC}"
exit 1
fi
}

test "Release binary $BIN" "-e $BIN"
test "Release debug $DEB_BIN" "-e $DEB_BIN"
test "Release binary --help" "$DEB_BIN --help"
test "Debug binary --help" "$DEB_BIN --help"

test "Release binary --version" "$DEB_BIN --version"
test "Debug binary --version" "$DEB_BIN --version"

0 comments on commit 4201a65

Please sign in to comment.