diff --git a/README.md b/README.md index 9cae6b0d..2e0091d7 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,6 @@ If you'd like to run the stream, clone this repo and execute the command below. ``` /ngen-datastream/scripts/stream.sh /ngen-datastream/configs/conf_datastream_daily.json ``` -requires `jq` ### Example `conf_datastream.json` ``` diff --git a/configs/conf_datastream_daily.json b/configs/conf_datastream_daily.json deleted file mode 100644 index 78c3a5ca..00000000 --- a/configs/conf_datastream_daily.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "globals" : { - "start_date" : "DAILY", - "end_date" : "", - "data_dir" : "", - "relative_to" : "", - "resource_dir" : "" - } - -} \ No newline at end of file diff --git a/configs/conf_datastream_daily.sh b/configs/conf_datastream_daily.sh new file mode 100644 index 00000000..8f6dc17c --- /dev/null +++ b/configs/conf_datastream_daily.sh @@ -0,0 +1,11 @@ +# CONFIGURATION FILE FOR NGEN-DATASTREAM + +START_DATE="DAILY" +# END_DATE="" +# DATA_PATH="" +# RESOURCE_PATH="" +# RELATIVE_TO="" +# SUBSET_ID_TYPE="" +# SUBSET_ID="" +# HYDROFABRIC_VERSION="" + diff --git a/configs/conf_datastream_subset.json b/configs/conf_datastream_subset.json deleted file mode 100644 index 66d561c5..00000000 --- a/configs/conf_datastream_subset.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "globals" : { - "start_date" : "DAILY", - "end_date" : "", - "data_dir" : "", - "relative_to" : "", - "resource_dir" : "" - }, - - "subset":{ - "id_type" : "hl_uri", - "id" : "Gages-04185000", - "version" : "v20" - } - -} \ No newline at end of file diff --git a/configs/conf_datastream_subset.sh b/configs/conf_datastream_subset.sh new file mode 100644 index 00000000..c46b9a2f --- /dev/null +++ b/configs/conf_datastream_subset.sh @@ -0,0 +1,11 @@ +# CONFIGURATION FILE FOR NGEN-DATASTREAM + +START_DATE="DAILY" +# END_DATE="" +# DATA_PATH="" +# RESOURCE_PATH="" +# RELATIVE_TO="" +SUBSET_ID_TYPE="hl_uri" +SUBSET_ID="Gages-04185000" +HYDROFABRIC_VERSION="v20" + diff --git a/configs/conf_datastream_template.sh b/configs/conf_datastream_template.sh new file mode 100644 index 00000000..e01149e3 --- /dev/null +++ b/configs/conf_datastream_template.sh @@ -0,0 +1,11 @@ +# CONFIGURATION FILE FOR NGEN-DATASTREAM + +# START_DATE="" +# END_DATE="" +# DATA_PATH="" +# RESOURCE_PATH="" +# RELATIVE_TO="" +# SUBSET_ID_TYPE="" +# SUBSET_ID="" +# HYDROFABRIC_VERSION="" + diff --git a/python/configure-datastream.py b/python/configure-datastream.py index 41fd2b3c..ed25dc39 100644 --- a/python/configure-datastream.py +++ b/python/configure-datastream.py @@ -3,6 +3,23 @@ from pathlib import Path import pytz as tz +def generate_config(args): + config = { + "globals": { + "start_date": args.start_date, + "end_date": args.end_date, + "data_dir": args.data_dir, + "relative_to": args.relative_to, + "resource_dir": args.resource_dir + }, + "subset": { + "id_type": args.subset_id_type, + "id": args.subset_id, + "version": args.hydrofabric_version + } + } + return config + def write_json(conf, out_dir, name): conf_path = Path(out_dir,name) with open(conf_path,'w') as fp: @@ -38,7 +55,7 @@ def create_ds_confs_daily(conf, today, tomorrow): nwm_conf = { "forcing_type" : "operational_archive", "start_date" : today, - "end_date" : today, # If we specify this as tomorrow, we will get 2 days worth of data, just how nwmurl works + "end_date" : today, "runinput" : 2, "varinput" : 5, "geoinput" : 1, @@ -57,8 +74,8 @@ def create_confs(conf): if conf['globals']['start_date'] == "DAILY": now = datetime.now(tz.timezone('US/Eastern')) - today = now.replace(hour=0, minute=0, second=0, microsecond=0) - tomorrow = today + timedelta(days=1) + today = now.replace(hour=1, minute=0, second=0, microsecond=0) + tomorrow = today + timedelta(hours=23) today_ds_confs = today.strftime('%Y%m%d%H%M') tomorrow_ds_confs = tomorrow.strftime('%Y%m%d%H%M') @@ -93,10 +110,6 @@ def create_confs(conf): with open(realization_file,'r') as fp: data = json.load(fp) - template_realization = Path(resources_config_dir,"realization.json") - template_realization_rename = Path(resources_config_dir,"realization_template.json") - os.system(f'mv {template_realization} {template_realization_rename}') - data['time']['start_time'] = today_realization data['time']['end_time'] = tomorrow_realization write_json(data,ngen_config_dir,'realization.json') @@ -117,19 +130,21 @@ def create_confs(conf): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument( - dest="infile", type=str, help="A json containing user inputs to run ngen-datastream" - ) + parser.add_argument("--conf", type=str, help="A json containing user inputs to run ngen-datastream") + parser.add_argument("--start-date", help="Set the start date") + parser.add_argument("--end-date", help="Set the end date") + parser.add_argument("--data-dir", help="Set the data directory") + parser.add_argument("--relative-to", help="Set the relative directory") + parser.add_argument("--resource-dir", help="Set the resource directory") + parser.add_argument("--subset-id-type", help="Set the subset ID type") + parser.add_argument("--subset-id", help="Set the subset ID") + parser.add_argument("--hydrofabric-version", help="Set the Hydrofabric version") + args = parser.parse_args() - if args.infile[0] == '{': - conf = json.loads(args.infile) + if not args.conf: + conf = generate_config(args) else: - if 's3' in args.infile: - os.system(f'wget {args.infile}') - filename = args.infile.split('/')[-1] - conf = json.load(open(filename)) - else: - conf = json.load(open(args.infile)) + conf = args.conf create_confs(conf) diff --git a/scripts/stream.sh b/scripts/stream.sh index 745b34fa..41829bc2 100755 --- a/scripts/stream.sh +++ b/scripts/stream.sh @@ -9,34 +9,67 @@ build_docker_container() { local DOCKER_IMAGE="$2" if docker inspect "$DOCKER_TAG" &>/dev/null; then - echo "The Docker container '$DOCKER_TAG' exists. Not building." + echo "The Docker container "$DOCKER_TAG" exists. Not building." else echo "Building $DOCKER_TAG container..." - docker build "$DOCKER_IMAGE" -t "$DOCKER_TAG" --no-cache + docker build $DOCKER_IMAGE -t $DOCKER_TAG --no-cache fi } -if [ $# -ne 1 ]; then - echo "Usage: $0 " +usage() { + echo "Usage: $0 [options]" + echo "Options:" + echo " -s, --start-date " + echo " -e, --end-date " + echo " -d, --data-path " + echo " -r, --resource-path " + echo " -t, --relative-to " + echo " -i, --id-type " + echo " -I, --id " + echo " -v, --version " + echo " -c, --conf-file " exit 1 -fi +} -CONFIG_FILE="$1" -if [ ! -f "$CONFIG_FILE" ]; then - echo "File not found: $CONFIG_FILE" - exit 1 +START_DATE="" +END_DATE="" +DATA_PATH="" +RESOURCE_PATH="" +RELATIVE_TO="" +SUBSET_ID_TYPE="" +SUBSET_ID="" +HYDROFABRIC_VERSION="" +CONF_FILE="" + +while [ "$#" -gt 0 ]; do + case "$1" in + -s|--start-date) START_DATE="$2"; shift 2;; + -e|--end-date) END_DATE="$2"; shift 2;; + -d|--data-path) DATA_PATH="$2"; shift 2;; + -r|--resource-path) RESOURCE_PATH="$2"; shift 2;; + -t|--relative-to) RELATIVE_TO="$2"; shift 2;; + -i|--id-type) SUBSET_ID_TYPE="$2"; shift 2;; + -I|--id) SUBSET_ID="$2"; shift 2;; + -v|--version) HYDROFABRIC_VERSION="$2"; shift 2;; + -c|--conf-file) CONF_FILE="$2"; shift 2;; + *) usage;; + esac +done + +if [ -n "$CONF_FILE" ]; then + echo "Configuration option provided" $CONF_FILE + if [ -e "$CONF_FILE" ]; then + echo "Any variables defined in "$CONF_FILE" will override cli args" + echo "Using options:" + cat $CONF_FILE + source "$CONF_FILE" + else + echo $CONF_FILE" not found!!" + exit 1 + fi +else + echo "No configuration file detected, using cli args" fi -config=$(cat "$CONFIG_FILE") - -START_DATE=$(echo "$config" | jq -r '.globals.start_date') -END_DATE=$(echo "$config" | jq -r '.globals.end_date') -DATA_PATH=$(echo "$config" | jq -r '.globals.data_dir') -RESOURCE_PATH=$(echo "$config" | jq -r '.globals.resource_dir') -RELATIVE_TO=$(echo "$config" | jq -r '.globals.relative_to') - -SUBSET_ID_TYPE=$(echo "$config" | jq -r '.subset.id_type') -SUBSET_ID=$(echo "$config" | jq -r '.subset.id') -HYDROFABRIC_VERSION=$(echo "$config" | jq -r '.subset.version') if [ $START_DATE == "DAILY" ]; then DATA_PATH="${PACAKGE_DIR%/}/data/$(env TZ=US/Eastern date +'%Y%m%d')" @@ -58,6 +91,14 @@ fi mkdir -p $DATA_PATH NGEN_RUN_PATH="${DATA_PATH%/}/ngen-run" + +if [ -e "$DATA_PATH" ]; then + : +else + echo "$DATA_PATH doesn't not exist!" + exit 1 +fi + DATASTREAM_CONF_PATH="${DATA_PATH%/}/datastream-configs" DATASTREAM_RESOURCES="${DATA_PATH%/}/datastream-resources" mkdir -p $DATASTREAM_CONF_PATH @@ -70,22 +111,27 @@ mkdir -p $NGEN_OUTPUT_PATH GEOPACKGE_NGENRUN="datastream.gpkg" GEOPACKAGE_NGENRUN_PATH="${NGEN_CONFIG_PATH%/}/$GEOPACKGE_NGENRUN" -if [ -e "$RESOURCE_PATH" ]; then +if [ -n "$RESOURCE_PATH" ]; then + echo "Resource path option provided" $RESOURCE_PATH if [[ $RESOURCE_PATH == *"https://"* ]]; then - echo "curl'ing $DATASTREAM_RESOURCES $RESOURCE_PATH" - curl -# -L -o $DATASTREAM_RESOURCES $RESOURCE_PATH - if [[ $RESOURCE_PATH == *".tar."* ]]; then - tar -xzvf $(basename $RESOURCE_PATH) - fi + echo "curl'ing $DATASTREAM_RESOURCES $RESOURCE_PATH" + curl -# -L -o $DATASTREAM_RESOURCES $RESOURCE_PATH + if [[ $RESOURCE_PATH == *".tar."* ]]; then + tar -xzvf $(basename $RESOURCE_PATH) + fi else - cp -r $RESOURCE_PATH $DATASTREAM_RESOURCES - fi - GEOPACKAGE_RESOURCES_PATH=$(find "$DATASTREAM_RESOURCES" -type f -name "*.gpkg") - GEOPACKAGE=$(basename $GEOPACKAGE_RESOURCES_PATH) - + if [ -e "$RESOURCE_PATH" ]; then + echo "Copying into current data path "$DATA_PATH + cp -r $RESOURCE_PATH $DATASTREAM_RESOURCES + GEOPACKAGE_RESOURCES_PATH=$(find "$DATASTREAM_RESOURCES" -type f -name "*.gpkg") + GEOPACKAGE=$(basename $GEOPACKAGE_RESOURCES_PATH) + else + echo $RESOURCE_PATH " provided doesn't exist!" + fi + fi else # if a resource path is not supplied, generate one with defaults - echo "Generating datastream resources with defaults" + echo "No resouce path provided. Generating datastream resources with defaults" DATASTREAM_RESOURCES_CONFIGS=${DATASTREAM_RESOURCES%/}/ngen-configs mkdir -p $DATASTREAM_RESOURCES mkdir -p $DATASTREAM_RESOURCES_CONFIGS @@ -145,7 +191,7 @@ else fi fi -echo "Using geopackage" $GEOPACKAGE, "Named $GEOPACKGE_NGENRUN for ngen_run" +echo "Using geopackage $GEOPACKAGE, Named $GEOPACKGE_NGENRUN for ngen_run" DOCKER_DIR="$(dirname "${SCRIPT_DIR%/}")/docker" DOCKER_MOUNT="/mounted_dir" @@ -160,7 +206,7 @@ build_docker_container "$DOCKER_TAG" "$FP_DOCKER" WEIGHTS_FILENAME=$(find "$DATASTREAM_RESOURCES" -type f -name "*weights*") if [ -e "$WEIGHTS_FILENAME" ]; then - echo "Using weights found in resources directory" "$WEIGHTS_FILENAME" + echo "Using weights found in resources directory $WEIGHTS_FILENAME" mv "$WEIGHTS_FILENAME" ""$DATASTREAM_RESOURCES"/weights.json" else echo "Weights file not found. Creating from" $GEOPACKAGE @@ -189,7 +235,15 @@ fi python3 -m pip install --upgrade pip pip3 install -r $PACAKGE_DIR/requirements.txt --no-cache CONF_GENERATOR="$PACAKGE_DIR/python/configure-datastream.py" -python3 $CONF_GENERATOR $CONFIG_FILE +python3 $CONF_GENERATOR \ + --start-date "$START_DATE" \ + --end-date "$END_DATE" \ + --data-dir "$DATA_PATH" \ + --relative-to "$RELATIVE_TO" \ + --resource-dir "$RESOURCE_PATH" \ + --subset-id-type "$SUBSET_ID_TYPE" \ + --subset-id "$SUBSET_ID" \ + --hydrofabric-version "$HYDROFABRIC_VERSION" echo "Creating nwm filenames file" docker run -it --rm -v "$DATA_PATH:"$DOCKER_MOUNT"" \