From 4aa26f6d1d0daf70833a79c48be9578f76f1719b Mon Sep 17 00:00:00 2001 From: Ethan Williams Date: Thu, 24 Oct 2024 14:18:00 -0400 Subject: [PATCH] Just covid mts (#27) * work on covid-mts * add covid-mts to CI --- .github/workflows/tests.yml | 2 +- covid-mts/cleanup.sh | 10 ++-- covid-mts/deps.sh | 14 ++++++ covid-mts/hashes/outputs.md5sum | 4 ++ covid-mts/hashes/outputs_small.md5sum | 4 ++ covid-mts/hashes/small/1.hash | 1 - covid-mts/hashes/small/2.hash | 1 - covid-mts/hashes/small/3.hash | 1 - covid-mts/hashes/small/4.hash | 1 - covid-mts/input.sh | 11 +++++ covid-mts/input/.gitignore | 1 + covid-mts/inputs.sh | 15 ------ covid-mts/run.sh | 57 +++++++--------------- covid-mts/verify.sh | 69 +++++---------------------- 14 files changed, 71 insertions(+), 120 deletions(-) create mode 100755 covid-mts/deps.sh create mode 100644 covid-mts/hashes/outputs.md5sum create mode 100644 covid-mts/hashes/outputs_small.md5sum delete mode 100644 covid-mts/hashes/small/1.hash delete mode 100644 covid-mts/hashes/small/2.hash delete mode 100644 covid-mts/hashes/small/3.hash delete mode 100644 covid-mts/hashes/small/4.hash create mode 100755 covid-mts/input.sh create mode 100644 covid-mts/input/.gitignore delete mode 100755 covid-mts/inputs.sh diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2d3e3dca..dbf5a5d3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - benchmark: [nlp, file-enc, unix50, log-analysis, max-temp, uniq-ips, media-conv, sklearn] + benchmark: [nlp, file-enc, unix50, log-analysis, max-temp, uniq-ips, media-conv, sklearn, covid-mts] steps: - name: Checkout code diff --git a/covid-mts/cleanup.sh b/covid-mts/cleanup.sh index 2e2e12f3..d8680650 100755 --- a/covid-mts/cleanup.sh +++ b/covid-mts/cleanup.sh @@ -1,5 +1,9 @@ #!/bin/bash -cd "$(realpath $(dirname "$0"))" -rm -rf ./inputs -rm -rf ./outputs +REPO_TOP=$(git rev-parse --show-toplevel) +eval_dir="${REPO_TOP}/covid-mts" +outputs_dir="${eval_dir}/outputs" +input_dir="${eval_dir}/input" + +rm -rf "$outputs_dir" +rm -rf "$input_dir" diff --git a/covid-mts/deps.sh b/covid-mts/deps.sh new file mode 100755 index 00000000..4d423348 --- /dev/null +++ b/covid-mts/deps.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +sudo apt update + +sudo apt install -y \ + sudo \ + curl \ + wget \ + unzip \ + python3-pip \ + vim \ + libarchive-tools libncurses5-dev libncursesw5-dev zstd liblzma-dev libbz2-dev zip unzip \ + git + diff --git a/covid-mts/hashes/outputs.md5sum b/covid-mts/hashes/outputs.md5sum new file mode 100644 index 00000000..a5d5e642 --- /dev/null +++ b/covid-mts/hashes/outputs.md5sum @@ -0,0 +1,4 @@ +49b36b9a7531dc79e0d9040ceced1461 outputs/1.out +2596cd73ea1406ae863d853899957def outputs/2.out +6a6df2aec7e1ba347f0c7f5b39fbdc6b outputs/3.out +e1612db0634d0eff467c0e6dcf287755 outputs/4.out diff --git a/covid-mts/hashes/outputs_small.md5sum b/covid-mts/hashes/outputs_small.md5sum new file mode 100644 index 00000000..d227e44c --- /dev/null +++ b/covid-mts/hashes/outputs_small.md5sum @@ -0,0 +1,4 @@ +7f8dd6c33d3a13de844bcdef61ecce4d outputs_small/1.out +011d767cb5741eab175c31462ff63b54 outputs_small/2.out +645c86dfc08a2233edf50613327adaa3 outputs_small/3.out +79fec1cbbb6e608413ffce247120d859 outputs_small/4.out diff --git a/covid-mts/hashes/small/1.hash b/covid-mts/hashes/small/1.hash deleted file mode 100644 index 9b74f4e6..00000000 --- a/covid-mts/hashes/small/1.hash +++ /dev/null @@ -1 +0,0 @@ -e2bce195c4833af9f5be080efa1be87436e6c1597cb4a1a4e63775b3bfbb9e6d diff --git a/covid-mts/hashes/small/2.hash b/covid-mts/hashes/small/2.hash deleted file mode 100644 index 4b47e1c7..00000000 --- a/covid-mts/hashes/small/2.hash +++ /dev/null @@ -1 +0,0 @@ -c0b74e21ec4202a01dd19d6e7a656d0ee39eee3f1ec772db6951a6ad0a079aed diff --git a/covid-mts/hashes/small/3.hash b/covid-mts/hashes/small/3.hash deleted file mode 100644 index 693f040b..00000000 --- a/covid-mts/hashes/small/3.hash +++ /dev/null @@ -1 +0,0 @@ -e518c50210e67c19440a5300ed819061a6b791008db5f01ac35abe9f1d950dff diff --git a/covid-mts/hashes/small/4.hash b/covid-mts/hashes/small/4.hash deleted file mode 100644 index 83ee9473..00000000 --- a/covid-mts/hashes/small/4.hash +++ /dev/null @@ -1 +0,0 @@ -a278628a3d911043466beb9d2eb49b0d194273ddf1a0b194bd5e56960b877b0c diff --git a/covid-mts/input.sh b/covid-mts/input.sh new file mode 100755 index 00000000..e24dfadd --- /dev/null +++ b/covid-mts/input.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +REPO_TOP=$(git rev-parse --show-toplevel) +eval_dir="${REPO_TOP}/covid-mts" +input_dir="${eval_dir}/input" + +mkdir -p "$input_dir" + +curl --insecure 'https://atlas-group.cs.brown.edu/data/covid-mts/in.csv.gz' | gunzip > "$input_dir/in.csv" + +curl --insecure 'https://atlas-group.cs.brown.edu/data/covid-mts/in_small.csv.gz' | gunzip > "$input_dir/in_small.csv" diff --git a/covid-mts/input/.gitignore b/covid-mts/input/.gitignore new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/covid-mts/input/.gitignore @@ -0,0 +1 @@ + diff --git a/covid-mts/inputs.sh b/covid-mts/inputs.sh deleted file mode 100755 index a40333e9..00000000 --- a/covid-mts/inputs.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -cd "$(realpath $(dirname "$0"))" -mkdir -p inputs -cd inputs - -if [ ! -f ./in.csv ]; then - curl -f 'https://atlas-group.cs.brown.edu/data/covid-mts/in.csv.gz'> in.csv.gz - gzip -d in.csv.gz -fi - -if [ ! -f ./in_small.csv ]; then - curl -f 'https://atlas-group.cs.brown.edu/data/covid-mts/in_small.csv.gz' > in_small.csv.gz - gzip -d in_small.csv.gz -fi diff --git a/covid-mts/run.sh b/covid-mts/run.sh index d85ad017..d60c69ac 100755 --- a/covid-mts/run.sh +++ b/covid-mts/run.sh @@ -1,47 +1,22 @@ #!/bin/bash -export SUITE_DIR=$(realpath $(dirname "$0")) -export TIMEFORMAT=%R -cd $SUITE_DIR - -if [[ "$1" == "--small" ]]; then - echo "Using small input" - input_file="$SUITE_DIR/inputs/in_small.csv" -else - echo "Using default input" - input_file="$SUITE_DIR/inputs/in.csv" +REPO_TOP=$(git rev-parse --show-toplevel) +eval_dir="${REPO_TOP}/covid-mts" +input_dir="${eval_dir}/input" +outputs_dir="${eval_dir}/outputs" +scripts_dir="${eval_dir}/scripts" + +suffix="" +if [[ "$@" == *"--small"* ]]; then + suffix="_small" fi -mkdir -p "outputs" -all_res_file="./outputs/covid-mts.res" -> $all_res_file - -# time_file stores the time taken for each script -# mode_res_file stores the time taken and the script name for every script in a mode (e.g. bash, pash, dish, fish) -# all_res_file stores the time taken for each script for every script run, making it easy to copy and paste into the spreadsheet -covid-mts() { - mkdir -p "outputs/$1" - mode_res_file="./outputs/$1/covid-mts.res" - > $mode_res_file - - echo executing covid-mts $1 $(date) | tee -a $mode_res_file $all_res_file - - for number in `seq 4` ## initial: FIXME 5.sh is not working yet - do - script="${number}" - script_file="./scripts/$script.sh" - output_dir="./outputs/$1/$script/" - output_file="./outputs/$1/$script.out" - time_file="./outputs/$1/$script.time" - log_file="./outputs/$1/$script.log" - - if [[ "$1" == "bash" ]]; then - (time bash $script_file $input_file > $output_file ) 2> $time_file - fi +input_file="$input_dir/in$suffix.csv" +output_scoped="$outputs_dir/outputs$suffix" +mkdir -p "$output_scoped" - cat "${time_file}" >> $all_res_file - echo "$script_file $(cat "$time_file")" | tee -a $mode_res_file - done -} +"$scripts_dir/1.sh" "$input_file" > "$output_scoped/1.out" +"$scripts_dir/2.sh" "$input_file" > "$output_scoped/2.out" +"$scripts_dir/3.sh" "$input_file" > "$output_scoped/3.out" +"$scripts_dir/4.sh" "$input_file" > "$output_scoped/4.out" -covid-mts "bash" diff --git a/covid-mts/verify.sh b/covid-mts/verify.sh index 09c8e391..83521719 100755 --- a/covid-mts/verify.sh +++ b/covid-mts/verify.sh @@ -1,65 +1,22 @@ #!/bin/bash -# Exit immediately if a command exits with a non-zero status -# set -e - -cd "$(realpath $(dirname "$0"))" - -mkdir -p hashes/small +REPO_TOP=$(git rev-parse --show-toplevel) +eval_dir="${REPO_TOP}/covid-mts" +outputs_dir="${eval_dir}/outputs" +scripts_dir="${eval_dir}/scripts" +hashes_dir="${eval_dir}/hashes" +suffix="" if [[ "$@" == *"--small"* ]]; then - hash_folder="hashes/small" -else - hash_folder="hashes" + suffix="_small" fi if [[ "$@" == *"--generate"* ]]; then - # Directory to iterate over - directory="outputs/bash" - - # Loop through all .out files in the directory - for file in "$directory"/*.out - do - # Extract the filename without the directory path and extension - filename=$(basename "$file" .out) - - # Generate SHA-256 hash - hash=$(shasum -a 256 "$file" | awk '{ print $1 }') - - # Save the hash to a file - echo "$hash" > "$hash_folder/$filename.hash" - - # Print the filename and hash - echo "File: $hash_folder/$filename.hash | SHA-256 Hash: $hash" - done + # give relative paths to md5sum + (cd "$outputs_dir"; md5sum "outputs$suffix"/* > "$hashes_dir/outputs$suffix.md5sum") + exit 0 fi -# Loop through all directories in the parent directory -for folder in "outputs"/*/ -do - # Remove trailing slash - folder=${folder%/} - - echo "Verifying folder: $folder" - - # Loop through all .out files in the current directory - for file in "$folder"/*.out - do - # Extract the filename without the directory path and extension - filename=$(basename "$file" .out) - - if [ ! -f "$folder/$filename.hash" ]; then - # Generate SHA-256 hash - hash=$(shasum -a 256 "$file" | awk '{ print $1 }') - - # Save the hash to a file - echo "$hash" > "$folder/$filename.hash" - fi - - # Compare the hash with the hash in the hashes directory - diff "$hash_folder/$filename.hash" "$folder/$filename.hash" - - # Print the filename and hash - echo "File: $folder/$filename | SHA-256 Hash: $(cat "$folder/$filename.hash")" - done -done +# give relative paths to md5sum +(cd "$outputs_dir"; md5sum --check --quiet --status "$hashes_dir/outputs$suffix.md5sum") +echo covid-mts$suffix $?