From 41b441a8114956a62553f80c3cee2fd75652d9f5 Mon Sep 17 00:00:00 2001 From: Jey Puget Gil Date: Wed, 24 Jul 2024 13:52:16 +0200 Subject: [PATCH] Add support for generating multiple versions of BSBM dataset --- README.md | 2 ++ entrypoint.sh | 19 +++++++++++++++++-- generate-n | 26 ++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) create mode 100755 generate-n diff --git a/README.md b/README.md index 0dad878..9ba9af6 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ This is the Dockerized version of the Berlin SPARQL Benchmark. ## Usage ```bash docker run -v "$PWD:/data" vcity/bsbm generate [args] +docker run -v "$PWD:/data" vcity/bsbm generate-n [args] docker run -v "$PWD:/data" vcity/bsbm qualification [args] docker run -v "$PWD:/data" vcity/bsbm testdriver [args] ``` @@ -18,6 +19,7 @@ If you want more information about the different arguments, please refer to the ```bash docker run vcity/bsbm generate -help +docker run vcity/bsbm generate-n -help docker run vcity/bsbm qualification -help docker run vcity/bsbm testdriver -help ``` diff --git a/entrypoint.sh b/entrypoint.sh index 3ee5098..300970c 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -1,8 +1,9 @@ #!/bin/bash if [ $# == 0 ]; then - echo "Usage: $0 [generate|testdriver|qualification] ?[args]" + echo "Usage: $0 [generate|generate-n|testdriver|qualification] ?[args]" ./generate -help + ./generate-n -help ./testdriver -help ./qualification -help exit 1 @@ -14,7 +15,21 @@ command=$1 mkdir -p /data chmod 777 /data -if [ "$command" = "generate" ]; then +if [ "$command" = "generate" ] || [ "$command" = "generate-n" ]; then + ./$@ + + # get the -fn parameter value, if it exists else set it to "dataset" + fn=$(echo $@ | grep -oP '(?<=-fn )[^ ]+' || echo "dataset") + # get the -s parameter value, if it exists else set it to "nt" + s=$(echo $@ | grep -oP '(?<=-s )[^ ]+' || echo "nt") + mv $fn*.$s /data + + # check if the -ud parameter exists + if [[ $@ == *"-ud"* ]]; then + udf=$(echo $@ | grep -oP '(?<=-udf )[^ ]+' || echo "dataset_update") + mv $udf*.nt /data + fi +elif [ "$command" = "generate" ]; then ./$@ # get the -fn parameter value, if it exists else set it to "dataset" diff --git a/generate-n b/generate-n new file mode 100755 index 0000000..df370cd --- /dev/null +++ b/generate-n @@ -0,0 +1,26 @@ +#!/bin/bash +if [ ! -e "./generate-n" ] +then + echo "Please run this script from the exec directory" + exit 1 +fi +if [ $# == 0 ] +then + echo "For generator options type ./generate-n -help" +fi +# check that $1 is a integer +if ! [[ $1 =~ ^[0-9]+$ ]] +then + echo "Please provide an integer as the first argument" + exit 1 +fi + +number_of_versions=$1 +echo "Generating $number_of_versions versions" + +# loop through the number of versions and call ./generate +for i in $(seq 1 $number_of_versions) +do + echo "Generating version $i" + ./generate "${@:2}" +done