Skip to content

Commit

Permalink
Just covid mts (#27)
Browse files Browse the repository at this point in the history
* work on covid-mts

* add covid-mts to CI
  • Loading branch information
EtomicBomb authored Oct 24, 2024
1 parent 97d9bd7 commit 4aa26f6
Show file tree
Hide file tree
Showing 14 changed files with 71 additions and 120 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
benchmark: [nlp, file-enc, unix50, log-analysis, max-temp, uniq-ips, media-conv, sklearn]
benchmark: [nlp, file-enc, unix50, log-analysis, max-temp, uniq-ips, media-conv, sklearn, covid-mts]

steps:
- name: Checkout code
Expand Down
10 changes: 7 additions & 3 deletions covid-mts/cleanup.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#!/bin/bash

cd "$(realpath $(dirname "$0"))"
rm -rf ./inputs
rm -rf ./outputs
REPO_TOP=$(git rev-parse --show-toplevel)
eval_dir="${REPO_TOP}/covid-mts"
outputs_dir="${eval_dir}/outputs"
input_dir="${eval_dir}/input"

rm -rf "$outputs_dir"
rm -rf "$input_dir"
14 changes: 14 additions & 0 deletions covid-mts/deps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

sudo apt update

sudo apt install -y \
sudo \
curl \
wget \
unzip \
python3-pip \
vim \
libarchive-tools libncurses5-dev libncursesw5-dev zstd liblzma-dev libbz2-dev zip unzip \
git

4 changes: 4 additions & 0 deletions covid-mts/hashes/outputs.md5sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
49b36b9a7531dc79e0d9040ceced1461 outputs/1.out
2596cd73ea1406ae863d853899957def outputs/2.out
6a6df2aec7e1ba347f0c7f5b39fbdc6b outputs/3.out
e1612db0634d0eff467c0e6dcf287755 outputs/4.out
4 changes: 4 additions & 0 deletions covid-mts/hashes/outputs_small.md5sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
7f8dd6c33d3a13de844bcdef61ecce4d outputs_small/1.out
011d767cb5741eab175c31462ff63b54 outputs_small/2.out
645c86dfc08a2233edf50613327adaa3 outputs_small/3.out
79fec1cbbb6e608413ffce247120d859 outputs_small/4.out
1 change: 0 additions & 1 deletion covid-mts/hashes/small/1.hash

This file was deleted.

1 change: 0 additions & 1 deletion covid-mts/hashes/small/2.hash

This file was deleted.

1 change: 0 additions & 1 deletion covid-mts/hashes/small/3.hash

This file was deleted.

1 change: 0 additions & 1 deletion covid-mts/hashes/small/4.hash

This file was deleted.

11 changes: 11 additions & 0 deletions covid-mts/input.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

REPO_TOP=$(git rev-parse --show-toplevel)
eval_dir="${REPO_TOP}/covid-mts"
input_dir="${eval_dir}/input"

mkdir -p "$input_dir"

curl --insecure 'https://atlas-group.cs.brown.edu/data/covid-mts/in.csv.gz' | gunzip > "$input_dir/in.csv"

curl --insecure 'https://atlas-group.cs.brown.edu/data/covid-mts/in_small.csv.gz' | gunzip > "$input_dir/in_small.csv"
1 change: 1 addition & 0 deletions covid-mts/input/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

15 changes: 0 additions & 15 deletions covid-mts/inputs.sh

This file was deleted.

57 changes: 16 additions & 41 deletions covid-mts/run.sh
Original file line number Diff line number Diff line change
@@ -1,47 +1,22 @@
#!/bin/bash

export SUITE_DIR=$(realpath $(dirname "$0"))
export TIMEFORMAT=%R
cd $SUITE_DIR

if [[ "$1" == "--small" ]]; then
echo "Using small input"
input_file="$SUITE_DIR/inputs/in_small.csv"
else
echo "Using default input"
input_file="$SUITE_DIR/inputs/in.csv"
REPO_TOP=$(git rev-parse --show-toplevel)
eval_dir="${REPO_TOP}/covid-mts"
input_dir="${eval_dir}/input"
outputs_dir="${eval_dir}/outputs"
scripts_dir="${eval_dir}/scripts"

suffix=""
if [[ "$@" == *"--small"* ]]; then
suffix="_small"
fi

mkdir -p "outputs"
all_res_file="./outputs/covid-mts.res"
> $all_res_file

# time_file stores the time taken for each script
# mode_res_file stores the time taken and the script name for every script in a mode (e.g. bash, pash, dish, fish)
# all_res_file stores the time taken for each script for every script run, making it easy to copy and paste into the spreadsheet
covid-mts() {
mkdir -p "outputs/$1"
mode_res_file="./outputs/$1/covid-mts.res"
> $mode_res_file

echo executing covid-mts $1 $(date) | tee -a $mode_res_file $all_res_file

for number in `seq 4` ## initial: FIXME 5.sh is not working yet
do
script="${number}"
script_file="./scripts/$script.sh"
output_dir="./outputs/$1/$script/"
output_file="./outputs/$1/$script.out"
time_file="./outputs/$1/$script.time"
log_file="./outputs/$1/$script.log"

if [[ "$1" == "bash" ]]; then
(time bash $script_file $input_file > $output_file ) 2> $time_file
fi
input_file="$input_dir/in$suffix.csv"
output_scoped="$outputs_dir/outputs$suffix"
mkdir -p "$output_scoped"

cat "${time_file}" >> $all_res_file
echo "$script_file $(cat "$time_file")" | tee -a $mode_res_file
done
}
"$scripts_dir/1.sh" "$input_file" > "$output_scoped/1.out"
"$scripts_dir/2.sh" "$input_file" > "$output_scoped/2.out"
"$scripts_dir/3.sh" "$input_file" > "$output_scoped/3.out"
"$scripts_dir/4.sh" "$input_file" > "$output_scoped/4.out"

covid-mts "bash"
69 changes: 13 additions & 56 deletions covid-mts/verify.sh
Original file line number Diff line number Diff line change
@@ -1,65 +1,22 @@
#!/bin/bash

# Exit immediately if a command exits with a non-zero status
# set -e

cd "$(realpath $(dirname "$0"))"

mkdir -p hashes/small
REPO_TOP=$(git rev-parse --show-toplevel)
eval_dir="${REPO_TOP}/covid-mts"
outputs_dir="${eval_dir}/outputs"
scripts_dir="${eval_dir}/scripts"
hashes_dir="${eval_dir}/hashes"

suffix=""
if [[ "$@" == *"--small"* ]]; then
hash_folder="hashes/small"
else
hash_folder="hashes"
suffix="_small"
fi

if [[ "$@" == *"--generate"* ]]; then
# Directory to iterate over
directory="outputs/bash"

# Loop through all .out files in the directory
for file in "$directory"/*.out
do
# Extract the filename without the directory path and extension
filename=$(basename "$file" .out)

# Generate SHA-256 hash
hash=$(shasum -a 256 "$file" | awk '{ print $1 }')

# Save the hash to a file
echo "$hash" > "$hash_folder/$filename.hash"

# Print the filename and hash
echo "File: $hash_folder/$filename.hash | SHA-256 Hash: $hash"
done
# give relative paths to md5sum
(cd "$outputs_dir"; md5sum "outputs$suffix"/* > "$hashes_dir/outputs$suffix.md5sum")
exit 0
fi

# Loop through all directories in the parent directory
for folder in "outputs"/*/
do
# Remove trailing slash
folder=${folder%/}

echo "Verifying folder: $folder"

# Loop through all .out files in the current directory
for file in "$folder"/*.out
do
# Extract the filename without the directory path and extension
filename=$(basename "$file" .out)

if [ ! -f "$folder/$filename.hash" ]; then
# Generate SHA-256 hash
hash=$(shasum -a 256 "$file" | awk '{ print $1 }')

# Save the hash to a file
echo "$hash" > "$folder/$filename.hash"
fi

# Compare the hash with the hash in the hashes directory
diff "$hash_folder/$filename.hash" "$folder/$filename.hash"

# Print the filename and hash
echo "File: $folder/$filename | SHA-256 Hash: $(cat "$folder/$filename.hash")"
done
done
# give relative paths to md5sum
(cd "$outputs_dir"; md5sum --check --quiet --status "$hashes_dir/outputs$suffix.md5sum")
echo covid-mts$suffix $?

0 comments on commit 4aa26f6

Please sign in to comment.