Skip to content

Commit

Permalink
remove jq and source env variables instead
Browse files Browse the repository at this point in the history
  • Loading branch information
JordanLaserGit committed Jan 22, 2024
1 parent 1afd20e commit 6bb5690
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 80 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ If you'd like to run the stream, clone this repo and execute the command below.
```
/ngen-datastream/scripts/stream.sh /ngen-datastream/configs/conf_datastream_daily.json
```
requires `jq`

### Example `conf_datastream.json`
```
Expand Down
10 changes: 0 additions & 10 deletions configs/conf_datastream_daily.json

This file was deleted.

11 changes: 11 additions & 0 deletions configs/conf_datastream_daily.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# CONFIGURATION FILE FOR NGEN-DATASTREAM

START_DATE="DAILY"
# END_DATE=""
# DATA_PATH=""
# RESOURCE_PATH=""
# RELATIVE_TO=""
# SUBSET_ID_TYPE=""
# SUBSET_ID=""
# HYDROFABRIC_VERSION=""

16 changes: 0 additions & 16 deletions configs/conf_datastream_subset.json

This file was deleted.

11 changes: 11 additions & 0 deletions configs/conf_datastream_subset.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# CONFIGURATION FILE FOR NGEN-DATASTREAM

START_DATE="DAILY"
# END_DATE=""
# DATA_PATH=""
# RESOURCE_PATH=""
# RELATIVE_TO=""
SUBSET_ID_TYPE="hl_uri"
SUBSET_ID="Gages-04185000"
HYDROFABRIC_VERSION="v20"

11 changes: 11 additions & 0 deletions configs/conf_datastream_template.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# CONFIGURATION FILE FOR NGEN-DATASTREAM

# START_DATE=""
# END_DATE=""
# DATA_PATH=""
# RESOURCE_PATH=""
# RELATIVE_TO=""
# SUBSET_ID_TYPE=""
# SUBSET_ID=""
# HYDROFABRIC_VERSION=""

51 changes: 33 additions & 18 deletions python/configure-datastream.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,23 @@
from pathlib import Path
import pytz as tz

def generate_config(args):
config = {
"globals": {
"start_date": args.start_date,
"end_date": args.end_date,
"data_dir": args.data_dir,
"relative_to": args.relative_to,
"resource_dir": args.resource_dir
},
"subset": {
"id_type": args.subset_id_type,
"id": args.subset_id,
"version": args.hydrofabric_version
}
}
return config

def write_json(conf, out_dir, name):
conf_path = Path(out_dir,name)
with open(conf_path,'w') as fp:
Expand Down Expand Up @@ -38,7 +55,7 @@ def create_ds_confs_daily(conf, today, tomorrow):
nwm_conf = {
"forcing_type" : "operational_archive",
"start_date" : today,
"end_date" : today, # If we specify this as tomorrow, we will get 2 days worth of data, just how nwmurl works
"end_date" : today,
"runinput" : 2,
"varinput" : 5,
"geoinput" : 1,
Expand All @@ -57,8 +74,8 @@ def create_confs(conf):

if conf['globals']['start_date'] == "DAILY":
now = datetime.now(tz.timezone('US/Eastern'))
today = now.replace(hour=0, minute=0, second=0, microsecond=0)
tomorrow = today + timedelta(days=1)
today = now.replace(hour=1, minute=0, second=0, microsecond=0)
tomorrow = today + timedelta(hours=23)

today_ds_confs = today.strftime('%Y%m%d%H%M')
tomorrow_ds_confs = tomorrow.strftime('%Y%m%d%H%M')
Expand Down Expand Up @@ -93,10 +110,6 @@ def create_confs(conf):
with open(realization_file,'r') as fp:
data = json.load(fp)

template_realization = Path(resources_config_dir,"realization.json")
template_realization_rename = Path(resources_config_dir,"realization_template.json")
os.system(f'mv {template_realization} {template_realization_rename}')

data['time']['start_time'] = today_realization
data['time']['end_time'] = tomorrow_realization
write_json(data,ngen_config_dir,'realization.json')
Expand All @@ -117,19 +130,21 @@ def create_confs(conf):

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
dest="infile", type=str, help="A json containing user inputs to run ngen-datastream"
)
parser.add_argument("--conf", type=str, help="A json containing user inputs to run ngen-datastream")
parser.add_argument("--start-date", help="Set the start date")
parser.add_argument("--end-date", help="Set the end date")
parser.add_argument("--data-dir", help="Set the data directory")
parser.add_argument("--relative-to", help="Set the relative directory")
parser.add_argument("--resource-dir", help="Set the resource directory")
parser.add_argument("--subset-id-type", help="Set the subset ID type")
parser.add_argument("--subset-id", help="Set the subset ID")
parser.add_argument("--hydrofabric-version", help="Set the Hydrofabric version")

args = parser.parse_args()

if args.infile[0] == '{':
conf = json.loads(args.infile)
if not args.conf:
conf = generate_config(args)
else:
if 's3' in args.infile:
os.system(f'wget {args.infile}')
filename = args.infile.split('/')[-1]
conf = json.load(open(filename))
else:
conf = json.load(open(args.infile))
conf = args.conf

create_confs(conf)
124 changes: 89 additions & 35 deletions scripts/stream.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,67 @@ build_docker_container() {
local DOCKER_IMAGE="$2"

if docker inspect "$DOCKER_TAG" &>/dev/null; then
echo "The Docker container '$DOCKER_TAG' exists. Not building."
echo "The Docker container "$DOCKER_TAG" exists. Not building."
else
echo "Building $DOCKER_TAG container..."
docker build "$DOCKER_IMAGE" -t "$DOCKER_TAG" --no-cache
docker build $DOCKER_IMAGE -t $DOCKER_TAG --no-cache
fi
}

if [ $# -ne 1 ]; then
echo "Usage: $0 <datastream-config.json>"
usage() {
echo "Usage: $0 [options]"
echo "Options:"
echo " -s, --start-date <start_date> "
echo " -e, --end-date <end_date> "
echo " -d, --data-path <data_path> "
echo " -r, --resource-path <resource_path> "
echo " -t, --relative-to <relative_to> "
echo " -i, --id-type <id_type> "
echo " -I, --id <id> "
echo " -v, --version <version> "
echo " -c, --conf-file <conf_file> "
exit 1
fi
}

CONFIG_FILE="$1"
if [ ! -f "$CONFIG_FILE" ]; then
echo "File not found: $CONFIG_FILE"
exit 1
START_DATE=""
END_DATE=""
DATA_PATH=""
RESOURCE_PATH=""
RELATIVE_TO=""
SUBSET_ID_TYPE=""
SUBSET_ID=""
HYDROFABRIC_VERSION=""
CONF_FILE=""

while [ "$#" -gt 0 ]; do
case "$1" in
-s|--start-date) START_DATE="$2"; shift 2;;
-e|--end-date) END_DATE="$2"; shift 2;;
-d|--data-path) DATA_PATH="$2"; shift 2;;
-r|--resource-path) RESOURCE_PATH="$2"; shift 2;;
-t|--relative-to) RELATIVE_TO="$2"; shift 2;;
-i|--id-type) SUBSET_ID_TYPE="$2"; shift 2;;
-I|--id) SUBSET_ID="$2"; shift 2;;
-v|--version) HYDROFABRIC_VERSION="$2"; shift 2;;
-c|--conf-file) CONF_FILE="$2"; shift 2;;
*) usage;;
esac
done

if [ -n "$CONF_FILE" ]; then
echo "Configuration option provided" $CONF_FILE
if [ -e "$CONF_FILE" ]; then
echo "Any variables defined in "$CONF_FILE" will override cli args"
echo "Using options:"
cat $CONF_FILE
source "$CONF_FILE"
else
echo $CONF_FILE" not found!!"
exit 1
fi
else
echo "No configuration file detected, using cli args"
fi
config=$(cat "$CONFIG_FILE")

START_DATE=$(echo "$config" | jq -r '.globals.start_date')
END_DATE=$(echo "$config" | jq -r '.globals.end_date')
DATA_PATH=$(echo "$config" | jq -r '.globals.data_dir')
RESOURCE_PATH=$(echo "$config" | jq -r '.globals.resource_dir')
RELATIVE_TO=$(echo "$config" | jq -r '.globals.relative_to')

SUBSET_ID_TYPE=$(echo "$config" | jq -r '.subset.id_type')
SUBSET_ID=$(echo "$config" | jq -r '.subset.id')
HYDROFABRIC_VERSION=$(echo "$config" | jq -r '.subset.version')

if [ $START_DATE == "DAILY" ]; then
DATA_PATH="${PACAKGE_DIR%/}/data/$(env TZ=US/Eastern date +'%Y%m%d')"
Expand All @@ -58,6 +91,14 @@ fi

mkdir -p $DATA_PATH
NGEN_RUN_PATH="${DATA_PATH%/}/ngen-run"

if [ -e "$DATA_PATH" ]; then
:
else
echo "$DATA_PATH doesn't not exist!"
exit 1
fi

DATASTREAM_CONF_PATH="${DATA_PATH%/}/datastream-configs"
DATASTREAM_RESOURCES="${DATA_PATH%/}/datastream-resources"
mkdir -p $DATASTREAM_CONF_PATH
Expand All @@ -70,22 +111,27 @@ mkdir -p $NGEN_OUTPUT_PATH
GEOPACKGE_NGENRUN="datastream.gpkg"
GEOPACKAGE_NGENRUN_PATH="${NGEN_CONFIG_PATH%/}/$GEOPACKGE_NGENRUN"

if [ -e "$RESOURCE_PATH" ]; then
if [ -n "$RESOURCE_PATH" ]; then
echo "Resource path option provided" $RESOURCE_PATH
if [[ $RESOURCE_PATH == *"https://"* ]]; then
echo "curl'ing $DATASTREAM_RESOURCES $RESOURCE_PATH"
curl -# -L -o $DATASTREAM_RESOURCES $RESOURCE_PATH
if [[ $RESOURCE_PATH == *".tar."* ]]; then
tar -xzvf $(basename $RESOURCE_PATH)
fi
echo "curl'ing $DATASTREAM_RESOURCES $RESOURCE_PATH"
curl -# -L -o $DATASTREAM_RESOURCES $RESOURCE_PATH
if [[ $RESOURCE_PATH == *".tar."* ]]; then
tar -xzvf $(basename $RESOURCE_PATH)
fi
else
cp -r $RESOURCE_PATH $DATASTREAM_RESOURCES
fi
GEOPACKAGE_RESOURCES_PATH=$(find "$DATASTREAM_RESOURCES" -type f -name "*.gpkg")
GEOPACKAGE=$(basename $GEOPACKAGE_RESOURCES_PATH)

if [ -e "$RESOURCE_PATH" ]; then
echo "Copying into current data path "$DATA_PATH
cp -r $RESOURCE_PATH $DATASTREAM_RESOURCES
GEOPACKAGE_RESOURCES_PATH=$(find "$DATASTREAM_RESOURCES" -type f -name "*.gpkg")
GEOPACKAGE=$(basename $GEOPACKAGE_RESOURCES_PATH)
else
echo $RESOURCE_PATH " provided doesn't exist!"
fi
fi
else
# if a resource path is not supplied, generate one with defaults
echo "Generating datastream resources with defaults"
echo "No resouce path provided. Generating datastream resources with defaults"
DATASTREAM_RESOURCES_CONFIGS=${DATASTREAM_RESOURCES%/}/ngen-configs
mkdir -p $DATASTREAM_RESOURCES
mkdir -p $DATASTREAM_RESOURCES_CONFIGS
Expand Down Expand Up @@ -145,7 +191,7 @@ else
fi
fi

echo "Using geopackage" $GEOPACKAGE, "Named $GEOPACKGE_NGENRUN for ngen_run"
echo "Using geopackage $GEOPACKAGE, Named $GEOPACKGE_NGENRUN for ngen_run"

DOCKER_DIR="$(dirname "${SCRIPT_DIR%/}")/docker"
DOCKER_MOUNT="/mounted_dir"
Expand All @@ -160,7 +206,7 @@ build_docker_container "$DOCKER_TAG" "$FP_DOCKER"

WEIGHTS_FILENAME=$(find "$DATASTREAM_RESOURCES" -type f -name "*weights*")
if [ -e "$WEIGHTS_FILENAME" ]; then
echo "Using weights found in resources directory" "$WEIGHTS_FILENAME"
echo "Using weights found in resources directory $WEIGHTS_FILENAME"
mv "$WEIGHTS_FILENAME" ""$DATASTREAM_RESOURCES"/weights.json"
else
echo "Weights file not found. Creating from" $GEOPACKAGE
Expand Down Expand Up @@ -189,7 +235,15 @@ fi
python3 -m pip install --upgrade pip
pip3 install -r $PACAKGE_DIR/requirements.txt --no-cache
CONF_GENERATOR="$PACAKGE_DIR/python/configure-datastream.py"
python3 $CONF_GENERATOR $CONFIG_FILE
python3 $CONF_GENERATOR \
--start-date "$START_DATE" \
--end-date "$END_DATE" \
--data-dir "$DATA_PATH" \
--relative-to "$RELATIVE_TO" \
--resource-dir "$RESOURCE_PATH" \
--subset-id-type "$SUBSET_ID_TYPE" \
--subset-id "$SUBSET_ID" \
--hydrofabric-version "$HYDROFABRIC_VERSION"

echo "Creating nwm filenames file"
docker run -it --rm -v "$DATA_PATH:"$DOCKER_MOUNT"" \
Expand Down

0 comments on commit 6bb5690

Please sign in to comment.