diff --git a/.github/workflows/push-to-dockerhub.yml b/.github/workflows/push-to-dockerhub.yml new file mode 100644 index 0000000..9547a28 --- /dev/null +++ b/.github/workflows/push-to-dockerhub.yml @@ -0,0 +1,32 @@ +name: Push To Docker hub + +on: + push: + tags: + - v* + +jobs: + push_to_registry: + name: Push Docker image to Docker Hub + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v3 + + - name: Get tag version + id: tag_name + run: echo ::set-output name=SOURCE_TAG::${GITHUB_REF#refs/tags/} + + - name: Log in to Docker Hub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_TOKEN }} + + - name: Build and push Docker image + uses: docker/build-push-action@v2 + with: + context: . + push: true + tags: ghrcdaac/dmrpp-generator:${{ steps.tag_name.outputs.SOURCE_TAG }} + \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index 57e1978..0000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,17 +0,0 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## [Unreleased] - xxxx-xx-xx - -### Added -### Changed -- https://github.com/ghrcdaac/dmrpp-generator/issues/12 - - Modification to fix the above issue. -### Deprecated -### Removed -### Fixed -### Security diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d8ba4c9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +FROM opendap/besd:3.20.13-664 +RUN yum -y update && \ + yum -y upgrade +HEALTHCHECK NONE +# Adding a user +RUN adduser worker +RUN yum install -y nano && \ + yum install -y wget +USER worker +WORKDIR /home/worker +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py38_4.8.2-Linux-x86_64.sh && \ + bash Miniconda3-py38_4.8.2-Linux-x86_64.sh -b && \ + rm Miniconda3-py38_4.8.2-Linux-x86_64.sh +ENV HOME="/home/worker" PATH="/home/worker/miniconda3/bin:${PATH}" +RUN pip install ipython &&\ + pip install pytest +RUN mkdir $HOME/build +ENV BUILD=$HOME/build +COPY --chown=worker setup.py requirements*txt $BUILD/ +RUN pip install -r $BUILD/requirements.txt +COPY --chown=worker dmrpp_generator $BUILD/dmrpp_generator +COPY --chown=worker generate_dmrpp.py $BUILD/generate_dmrpp.py +COPY --chown=worker tests $BUILD/tests +RUN \ + cd $BUILD; \ + python setup.py install +WORKDIR $BUILD +RUN pytest --junitxml=./test_results/test_dmrpp_generator.xml tests && \ + rm -rf tests +CMD ["python", "generate_dmrpp.py"] +ENTRYPOINT [] diff --git a/README.md b/README.md index 2e44f67..e1f38e8 100644 --- a/README.md +++ b/README.md @@ -6,32 +6,12 @@ |____/|_| |_|_| \_\_| |_| ``` - # Overview -DMR++ files generator is a cloud based activity that generate DMRPP files from netCDF4 and HDF files -## 📖 Documentation -- Release note [v4.1.1](https://ghrcdaac.github.io/dmrpp-generator/#v411). -- Release note [v4.1.0](https://ghrcdaac.github.io/dmrpp-generator/#v410). -- Release note [v4.0.9](https://ghrcdaac.github.io/dmrpp-generator/#v409). -- Release note [v4.0.8](https://ghrcdaac.github.io/dmrpp-generator/#v408). -- Release note [v4.0.7](https://ghrcdaac.github.io/dmrpp-generator/#v407). -- Release note [v4.0.6](https://ghrcdaac.github.io/dmrpp-generator/#v406). -- Release note [v4.0.5](https://ghrcdaac.github.io/dmrpp-generator/#v405). -- Release note [v4.0.4](https://ghrcdaac.github.io/dmrpp-generator/#v404). -- Release note [v4.0.3](https://ghrcdaac.github.io/dmrpp-generator/#v403). -- Release note [v4.0.2](https://ghrcdaac.github.io/dmrpp-generator/#v402). -- Release note [v4.0.1](https://ghrcdaac.github.io/dmrpp-generator/#v401). -- Release note [v4.0.0](https://ghrcdaac.github.io/dmrpp-generator/#v400). -- Release note [v3.5.0](https://ghrcdaac.github.io/dmrpp-generator/#v350). -- Release note [v3.4.0](https://ghrcdaac.github.io/dmrpp-generator/#v340). -- Release note [v3.3.1](https://ghrcdaac.github.io/dmrpp-generator/#v331). -- Release note [v3.3.0.beta](https://ghrcdaac.github.io/dmrpp-generator/#v330beta). -- Release note [v3.2.1](https://ghrcdaac.github.io/dmrpp-generator/#v321). -- Release note [v3.2.0](https://ghrcdaac.github.io/dmrpp-generator/#v320). -- Release note [v3.1.2](https://ghrcdaac.github.io/dmrpp-generator/#v312). -- Release note [v3.1.1](https://ghrcdaac.github.io/dmrpp-generator/#v311). -- Release note [v3.1.0](https://ghrcdaac.github.io/dmrpp-generator/#v310). -- Release note [v3.0.1.beta](https://ghrcdaac.github.io/dmrpp-generator/#v301beta). +This repo consists of two components. The DMR++ activity terraform module and a python CLI to the DMR++ Docker +container. + +Current OPeNDAP BESD image: +https://github.com/ghrcdaac/dmrpp-generator/blob/ce1b53772cf9d501d4576a8d94f4f6868e526f7d/Dockerfile#L1 ## Versioning We are following `v..` versioning convention, where: @@ -40,15 +20,24 @@ We are following `v..` versioning convention, where: * `+1` means we upgraded/patched the dependencies this software relays on. Can lead to breaking changes. * `+1` means we fixed a bug and/or added a feature. Breaking changes are not expected. -# 🔨 Pre-requisite -This module is meant to run within Cumulus stack. +# Pre-requisite +The prerequisites depend on which use case is needed. + +## Terraform Module +This module is meant to used within the Cumulus stack. If you don't have Cumulus stack deployed yet please consult [this repo](https://github.com/nasa/cumulus) and follow the [documetation](https://nasa.github.io/cumulus/docs/cumulus-docs-readme) to provision it. -# Deploying with Cumulus Stack +## DMR++ Python CLI +For each release after v4.1.0, there will be a python wheel published in the release assets. This can be installed and +used locally via pip like the following: +`pip install https://github.com/ghrcdaac/dmrpp-generator/releases/download/v1.0.0-test/dmrpp_file_generator-4.1.2-py3-none-any.whl` +The python module uses Docker compose to generate dmrpp files locally so no other dependencies should be needed. + +# Deploying the Terraform module with the Cumulus Stack In [main.tf](https://github.com/nasa/cumulus-template-deploy/blob/master/cumulus-tf/main.tf) file (where you defined cumulus module) add - ```code + ```terraform module "dmrpp-generator" { // Required parameters source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download//dmrpp-generator.zip" @@ -67,7 +56,6 @@ module "dmrpp-generator" { log_destination_arn = var.aws_log_mechanism // default to null } - ``` In [variables.tf](https://github.com/nasa/cumulus-template-deploy/blob/master/cumulus-tf/variables.tf) file you need to define @@ -78,7 +66,6 @@ variable "dmrpp-generator-docker-image" { ``` Assuming you already defined the region and the prefix - # Add the activity to your workflow In your [workflow.tf](https://github.com/nasa/cumulus-template-deploy/blob/master/cumulus-tf/hello_world_workflow.tf) add ```code @@ -122,7 +109,6 @@ In your [workflow.tf](https://github.com/nasa/cumulus-template-deploy/blob/maste Where `` is the next step in your workflow. ## Cumulus Collection Configuration - Add the options desired to the collection definition as follows: ```code @@ -150,12 +136,9 @@ Add the options desired to the collection definition as follows: } } ``` - For a list of all configuration options see: https://docs.opendap.org/index.php?title=DMR%2B%2B#:~:text=4.2%20Command%20line%20options - ## Cumulus Workflow Configuration - If your workflow is used by multiple collections which use a common dmrpp config, the config can be set at the workflow's `${StepName}.Parameters.cma.task_config.dmrpp` instead of in the collection @@ -201,7 +184,7 @@ dmrpp_config = { ``` ## Timeout Configuration -The subprocess call to the besd library has a configurable timeout value. It will default to 60 seconds +The subprocess call to the BESD library has a configurable timeout value. It will default to 60 seconds if not configured. There are two ways to provide a custom value. 1. Setting the `get_dmrpp_timeout` terraform variable 2. Adding `get_dmrpp_timeout` to the collection definition: `collection.meta.dmrpp` @@ -217,3 +200,72 @@ timeout is not respected. This can be configured in two ways. If the value is provided in the collection definition this will take precedence over the environment variable. + +# DMR++ Python CLI +# How to install +Find the version you want to use and get the asset URL for the .whl file and install like the following example command: +```shell +pip install https://github.com/ghrcdaac/dmrpp-generator/releases/download/v/dmrpp_file_generator--py3-none-any.whl +``` + +# Supported get_dmrpp configuration +## Via env vars +Create a PAYLOAD environment variable holding dmrpp options +``` +PAYLOAD='{"dmrpp_regex": "^.*.nc4", "options":[{"flag": "-M"}, {"flag": "-s", "opt": "s3://ghrcsbxw-public/dmrpp_config/file.config","download": "true"}]}' +``` +`dmrpp_regex` is optional to override the DMRPP-Generator regex + +# Generate DMRpp files locally without Hyrax server +`generate-validate-dmrpp` now uses docker compose v2. Please update to +docker compose v2 or you will get the error +`/bin/sh: 1: docker compose: not found` +```shell +$generate-validate-dmrpp --help +usage: generate-validate-dmrpp [-h] -p NC_HDF_PATH [-prt PORT] [-pyld PAYLOAD] [-vldt VALIDATE] + +Generate and validate DMRPP files. + +optional arguments: + -h, --help show this help message and exit + -p NC_HDF_PATH, --path NC_HDF_PATH + Path to netCDF4 and HDF5 folder + -prt PORT, --port PORT + Port number to Hyrax local server + -pyld PAYLOAD, --payload PAYLOAD + Payload to execute get_dmrpp binary + -vldt VALIDATE, --validate VALIDATE + Validate netCDF4 and HDF5 files against OPeNDAP local server + +``` + +The folder `` should contain netCDF and/or HDF files +```code +generate-validate-dmrpp -p -vldt false +``` + +# Generate DMRpp files locally with Hyrax server (for validation) + +```shell +generate-validate-dmrpp -p +``` +A prompt will ask you to visit localhost:8080. If you want to change the default port run the command with +```shell +generate-validate-dmrpp -p -prt 8889 +Now you can validate the result in localhost:8889 +``` + + +# Generate missing metadata for non-netcdf compliant data (the -b switch) +```code +generate-validate-dmrpp -p -pyld $PAYLOAD +``` +or +```shell +docker run --rm -it --env-file ./env.list -v :/workstation ghrcdaac/dmrpp-generator +``` +where PAYLOAD contains your flags and switches +```shell +PAYLOAD={"options":[{"flag": "-M"}, {"flag": "-u", "opt": "/usr/share/hyrax"}]} +``` + diff --git a/create_release.sh b/create_release.sh index 01413c1..03af173 100644 --- a/create_release.sh +++ b/create_release.sh @@ -1,7 +1,7 @@ #!/bin/bash set -ex export VERSION=$1 -export RELEASE_NAME=`basename $GITHUB_REPO` +export RELEASE_NAME=$(basename "$GITHUB_REPO") ## Create Release export RELEASE_URL=$(curl -H\ @@ -9,39 +9,51 @@ export RELEASE_NAME=`basename $GITHUB_REPO` -d "{\"tag_name\": \"$VERSION\", \"target_commitsh\": \"$VERSION\", \"name\": \"$VERSION\", \"body\": \"Release $VERSION\" }"\ -H "Content-Type: application/json"\ -X POST\ - https://api.github.com/repos/$GITHUB_REPO/releases |grep \"url\" |grep releases |sed -e 's/.*\(https.*\)\"\,/\1/'| sed -e 's/api/uploads/') + https://api.github.com/repos/"$GITHUB_REPO"/releases \ + | grep \"url\" | grep releases | sed -e 's/.*\(https.*\)\"\,/\1/' | sed -e 's/api/uploads/') ## Build TF modules that require source building function create_zip_file() { - BUILD_DIR=/tmp/${RELEASE_NAME} DESTINATION_DIR=${PWD}/dist - rm -rf ${DESTINATION_DIR} - mkdir -p ${BUILD_DIR} ${DESTINATION_DIR} - cp -r modules ${BUILD_DIR} - cp *tf ${BUILD_DIR} - cd ${BUILD_DIR} + rm -rf "${DESTINATION_DIR}" + mkdir -p "${BUILD_DIR}" "${DESTINATION_DIR}" + cp -r modules "${BUILD_DIR}" + cp -- *tf "${BUILD_DIR}" + cd "${BUILD_DIR}" sed -i "s/VERSION_SUB/${VERSION}/g" variables.tf - zip -r9 ${RELEASE_NAME}.zip . - mv ${RELEASE_NAME}.zip ${DESTINATION_DIR}/. - cd $DESTINATION_DIR - rm -rf ${BUILD_DIR} + zip -r9 "${RELEASE_NAME}".zip . + mv "${RELEASE_NAME}".zip "${DESTINATION_DIR}"/. + cd "$DESTINATION_DIR" + rm -rf "${BUILD_DIR}" } - +function create_wheel() { + pip install -r requirements_dev.txt + python -m build --wheel --outdir . . +} #### Release package create_zip_file ### Post the release -curl -X POST -H "Authorization: token $GITHUB_TOKEN" --data-binary "@${RELEASE_NAME}.zip" -H "Content-type: application/octet-stream" $RELEASE_URL/assets?name=${RELEASE_NAME}.zip - -## Create Release for dmrpp docker image -curl -H\ - "Authorization: token $GITHUB_TOKEN"\ - -d "{\"tag_name\": \"$VERSION\", \"target_commitsh\": \"$VERSION\", \"name\": \"$VERSION\", \"body\": \"Release $VERSION https://ghrcdaac.github.io/dmrpp-generator\" }"\ - -H "Content-Type: application/json"\ - -X POST\ - https://api.github.com/repos/$GUTHUB_DOCKER_IMAGE/releases +curl -X POST \ + -H "Authorization: token $GITHUB_TOKEN" --data-binary "@${RELEASE_NAME}.zip" \ + -H "Content-type: application/octet-stream" \ + "$RELEASE_URL"/assets?name="${RELEASE_NAME}".zip + +### Create and post the wheel +cd ../ +create_wheel +content=$(cat ./dmrpp_generator/version.py) +[[ $content =~ ([0-9]+.[0-9]+.[0-9]+) ]] +curl -L \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer $GITHUB_TOKEN" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + -H "Content-Type: application/octet-stream" \ + "$RELEASE_URL/assets?name=dmrpp_file_generator-${BASH_REMATCH[1]}-py3-none-any.whl" \ + --data-binary "@dmrpp_file_generator-${BASH_REMATCH[1]}-py3-none-any.whl" diff --git a/dmrpp_generator/__init__.py b/dmrpp_generator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dmrpp_generator/dmrpp_options.py b/dmrpp_generator/dmrpp_options.py new file mode 100644 index 0000000..5192b59 --- /dev/null +++ b/dmrpp_generator/dmrpp_options.py @@ -0,0 +1,120 @@ +import logging +import os +import re +from tempfile import mkdtemp +import boto3 +import requests + + +class DMRppOptions: + """ + DMRpp Options + """ + def __init__(self, host_path=mkdtemp()) -> None: + self.s3_client = boto3.client('s3') + self.session = requests.Session() + self.host_path = host_path.rstrip("/") + + def __download_files(self, link): + """ + Calls the corresponding download function for the url's protocol. + :param link: Location to find the file to download + """ + filename = os.path.basename(link) + local_path = f'{self.host_path}/{filename}' + protocol = re.match(r'.+?(?=:)', link).group() + switcher = {'http': self.__get_http_file, 'https': self.__get_http_file, + 's3': self.__get_s3_file} + if not os.path.isfile(local_path): + switcher.get(protocol, self.__switcher_default)(link=link, local_path=local_path, protocol=protocol) + return local_path + + @staticmethod + def __switcher_default(protocol, **kwargs): + """ + + """ + message = f"The protocol {protocol} is not implemented yet: called using {kwargs}" + logging.error(message) + raise Exception(message) + + def __get_http_file(self, link, local_path, **kwargs): + """ + Downloads the file at the url and stores it at the local path. + :param url: Url of the file to download. + :param local_path: Location to write the downloaded file to. + """ + try: + response = self.session.get(link) + with open(local_path, 'wb') as file: + file.write(response.content) + except Exception as err: + err_msg = f"called using {kwargs}, error : {err}" + logging.error(msg=str(err_msg)) + raise err + pass + + def __get_s3_file(self, link, local_path, **kwargs): + """ + Downloads the file at the s3_link and stores it at the local path. + :param s3_link: s3 link of the file to download. + :param local_path: Location to write the downloaded file to. + """ + reg_res = re.match(r'^.*://([^/]*)/(.*)', link) + bucket_name = reg_res.group(1) + key = reg_res.group(2) + try: + self.s3_client.download_file(bucket_name, key, local_path) + except Exception as err: + err_msg = f"called using {kwargs}, error : {err}" + logging.error(msg=str(err_msg)) + raise err + pass + + def get_dmrpp_option(self, dmrpp_meta): + """ + :param dmrpp_meta: DMR meta string + :return A sequential string with the flags and URLs in order + """ + res_str = '' + for option in dmrpp_meta.get('options', []): + flag = option.get('flag') + if not flag: + raise Exception("A DMRPP flag should be present") + res_str = f'{res_str} {flag}' + file_link = option.get('opt', '') + download = option.get('download') == 'true' + location = self.__download_files(file_link) if download else file_link + res_str = f'{res_str} {location}' + return " ".join(f"{res_str} -b".split()) + + +if __name__ == '__main__': + test_dict = { + "options": [ + { + "flag": "-M" + }, + { + "flag": "-s", + "opt": "https://catalog.uah.edu/grad/colleges-departments/science/earth-system-science/earth-system-science.pdf", + "download": "true" + }, + { + "flag": "-c", + "opt": "s3://ghrcsbxw-public/aces1cont__1/aces1cont_2002.212_v2.50.tar.cmr.json", + "download": "false" + }, + { + "flag": "-k", + "opt": "" + } + ] + } + + sn = DMRppOptions() + print(sn.get_dmrpp_option(test_dict)) + # sn.get_http_file(url='https://catalog.uah.edu/grad/colleges-departments/science/earth-system-science/ + # earth-system-science.pdf', host_path='.') + # sn.get_s3_file(s3_link='s3://ghrcsbxw-public/aces1cont__1/aces1cont_2002.212_v2.50.tar.cmr.json', host_path='.') + pass diff --git a/dmrpp_generator/generate_and_validate_dmrpp.py b/dmrpp_generator/generate_and_validate_dmrpp.py new file mode 100755 index 0000000..99e6837 --- /dev/null +++ b/dmrpp_generator/generate_and_validate_dmrpp.py @@ -0,0 +1,154 @@ +#! /usr/bin/python3 +import argparse +import subprocess +import time +import os +from multiprocessing import Process +import tempfile + + +def print_progress_bar(iteration, total, prefix='', suffix='', decimals=1, separate_bar='-', length=100, fill='█', + print_end="\r"): + """ + Call in a loop to create terminal progress bar + @params: + iteration - Required : current iteration (Int) + total - Required : total iterations (Int) + prefix - Optional : prefix string (Str) + suffix - Optional : suffix string (Str) + decimals - Optional : positive number of decimals in percent complete (Int) + separate_bar - Optional : what will separate the bar as it fills + length - Optional : character length of bar (Int) + fill - Optional : bar fill character (Str) + printEnd - Optional : end character (e.g. "\r", "\r\n") (Str) + """ + percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) + filled_length = int(length * iteration // total) + f_bar = fill * filled_length + separate_bar * (length - filled_length) + print(f'\r{prefix} |{f_bar}| {percent}% {suffix}', end=print_end) + # Print New Line on Complete + if iteration == total: + print() + + +def generate_docker_compose(): + _, dockercompose_file_location = tempfile.mkstemp(suffix=".yml") + with open(dockercompose_file_location,'w', encoding="utf-8") as dockercompose_file: + dockercompose_file.write( + """ +version: '3' +services: + dmrpp: + # Path to dockerfile. + # '.' represents the current directory in which + # docker-compose.yml is present. + image: ghrcdaac/dmrpp-generator:v4.1.1 + environment: + - PAYLOAD=${PAYLOAD} + # Mount volume + volumes: + - ${NC_FILES_PATH:-/tmp}:/usr/share/hyrax + + hyrax: + + # image to fetch from docker hub + image: opendap/hyrax:snapshot + ports: + - "${PORT:-8080}:8080" + volumes: + - ${NC_FILES_PATH:-/tmp}:/usr/share/hyrax/ + working_dir: /usr/share/hyrax + container_name: hyrax + + """ + ) + return dockercompose_file_location + + +def progress_bar(file_number, prefix='Generating:', suffix='Complete', length=50, fill='█', separate_bar='-'): + items = list(range(0, min(file_number * 25, 600))) + items_length = len(items) + # Initial call to print 0% progress + print_progress_bar(iteration=0, total=items_length, prefix=prefix, suffix=suffix, length=length, fill=fill, + separate_bar=separate_bar) + for i, _ in enumerate(items): + time.sleep(0.1) + # Update Progress Bar + print_progress_bar(iteration=i + 1, total=items_length, prefix=prefix, suffix=suffix, length=length, fill=fill, + separate_bar=separate_bar) + + +def check_docker_version(log_file_path): + with open(log_file_path, "a+", encoding='utf-8') as output: + dkr_comp_version = 'docker compose' + cmd = f"{dkr_comp_version} version" + subprocess.run(cmd, shell=True, check=False, stdout=output, stderr=output) + output.seek(0) + err_grab = output.readlines()[-1] + if err_grab == f'/bin/sh: 1: {dkr_comp_version}: not found\n': + dkr_comp_version = 'docker-compose' + return dkr_comp_version + + +def run_docker_compose(payload, nc_hdf_path, port, dmrrpp_service, log_file_path): + dockercompose_file_location = generate_docker_compose() + dkr_comp_version = check_docker_version(log_file_path) + + with open(log_file_path, "a", encoding='utf-8') as output: + try: + cmd = f"PAYLOAD='{payload}' NC_FILES_PATH={nc_hdf_path} PORT={port} {dkr_comp_version} " \ + f"-f {dockercompose_file_location} up {dmrrpp_service}" + subprocess.run( + cmd, + shell=True, check=False, stdout=output, + stderr=output) + except KeyboardInterrupt: + cmd = f" {dkr_comp_version} -f {dockercompose_file_location} down {dmrrpp_service}" + subprocess.run(cmd, shell=True, check=False, + stdout=output, + stderr=output) + os.remove(dockercompose_file_location) + + +def main(): + parser = argparse.ArgumentParser(description='Generate and validate DMRPP files.') + parser.add_argument('-p', '--path', dest='nc_hdf_path', nargs=1, required=True, + help='Path to netCDF4 and HDF5 folder') + parser.add_argument('-prt', '--port', dest='port', nargs=1, default=["8080"], + help='Port number to Hyrax local server') + parser.add_argument('-pyld', '--payload', dest='payload', nargs=1, default=['{}'], + help='Payload to execute get_dmrpp binary') + parser.add_argument('-vldt', '--validate', dest='validate', nargs=1, default=['true'], + help='Validate netCDF4 and HDF5 files against OPeNDAP local server') + + args = parser.parse_args() + nc_hdf_path, port, payload, validate = [getattr(args, ele)[0] for ele in args.__dict__.keys()] + no_need_validation = validate not in ['true', '1', 'yes', 'y'] + # If the user doesn't want validation run dmrpp service alone without Hyrax UI + dmrrpp_service = "dmrpp" if no_need_validation else "" + # Depending on needing the validation the user should get either a path or Hyrax UI link + visit_link_path_message = f"{nc_hdf_path}" if no_need_validation else f"http://localhost:{port}/opendap (^C to kill the server)" + + # Counting number of files to estimate the work + _, _, files = next(os.walk(nc_hdf_path)) + # Remove dmrpp suffix from the list of files + [files.remove(file_) for file_ in files[:] if file_.endswith('.dmrpp')] + _, log_file_location = tempfile.mkstemp(prefix="dmrpp-generator-") + message_visit_server = "" if no_need_validation else f"Results served ( 🌎 ):\t{visit_link_path_message}" + try: + p_1 = Process(target=progress_bar, args=(len(files),)) + p_2 = Process(target=run_docker_compose, args=(payload, nc_hdf_path, port, dmrrpp_service, log_file_location)) + p_1.start() + p_2.start() + p_1.join() + print(f"{message_visit_server}\nLogs are located here (🪵 ):\t{log_file_location}") + p_2.join() + except KeyboardInterrupt: + print("Shutting down the server...") + p_1 = Process(target=progress_bar, args=(3, "Progress", 'Complete', 10, '💀', '🔥',)) + p_1.start() + p_1.join() + + +if __name__ == "__main__": + main() diff --git a/dmrpp_generator/main.py b/dmrpp_generator/main.py new file mode 100644 index 0000000..6c69cd4 --- /dev/null +++ b/dmrpp_generator/main.py @@ -0,0 +1,201 @@ +import logging +import os +from re import search +import subprocess +from cumulus_process import Process, s3 +from cumulus_logger import CumulusLogger +from .version import __version__ +from .dmrpp_options import DMRppOptions + +LOGGER_TO_CW = CumulusLogger(name="DMRPP-Generator") + + +class CmdStd: + """ + class to satisfy stdout and stderr + """ + stdout = "" + stderr = "" + + +class DMRPPGenerator(Process): + """ + Class to generate dmrpp files from hdf and netCDf files + The input will be *.nc *nc4 *.hdf + The output *.nc.dmrpp *nc4.dmrpp *.hdf.dmrpp + """ + + def __init__(self, **kwargs): + config = kwargs.get('config', {}) + # any keys on collection config override keys from workflow config + self.dmrpp_meta = { + **config.get('dmrpp', {}), # from workflow + **config.get('collection', {}).get('meta', {}).get('dmrpp', {}), # from collection + } + self.processing_regex = self.dmrpp_meta.get( + 'dmrpp_regex', '.*\\.(((?i:(h|hdf)))(e)?5|nc(4)?)(\\.bz2|\\.gz|\\.Z)?' + ) + + super().__init__(**kwargs) + self.path = self.path.rstrip('/') + "/" + # Enable logging the default is True + enable_logging = os.getenv('ENABLE_CW_LOGGING', 'True') in [True, "true", "t", 1] + self.dmrpp_version = f"DMRPP {__version__}" + self.logger_to_cw = LOGGER_TO_CW if enable_logging else logging + self.logger_to_cw.info(f'config: {self.config}') + self.timeout = int(self.dmrpp_meta.get( + 'get_dmrpp_timeout', os.getenv('GET_DMRPP_TIMEOUT', '60')) + ) + self.enable_subprocess_logging = self.dmrpp_meta.get( + 'enable_subprocess_logging', os.getenv('ENABLE_SUBPROCESS_LOGGING', False) + ) + + self.logger_to_cw.info(f'get_dmrpp_timeout: {self.timeout}') + + @property + def input_keys(self): + + return { + 'input_files': f"{self.processing_regex}(\\.cmr\\.xml|\\.json)?$" + } + + @staticmethod + def get_file_type(filename, files): + """ + Get custom file type, default to metadata + :param filename: Granule file name + :param files: list of collection files + :return: file type if defined + """ + + for collection_file in files: + if search(collection_file.get('regex', '*.'), filename): + return collection_file.get('type', 'metadata') + return 'metadata' + + @staticmethod + def get_bucket(filename, files, buckets): + """ + Extract the bucket from the files + :param filename: Granule file name + :param files: list of collection files + :param buckets: Object holding buckets info + :return: Bucket object + """ + bucket_type = "public" + for file in files: + if search(file.get('regex', '*.'), filename): + bucket_type = file['bucket'] + break + return buckets[bucket_type] + + def upload_file_to_s3(self, filename, uri): + """ Upload a local file to s3 if collection payload provided """ + return s3.upload(filename, uri, extra={}) + + def process(self): + """ + Override the processing wrapper + :return: + """ + collection = self.config.get('collection') + collection_files = collection.get('files', []) + buckets = self.config.get('buckets') + granules = self.input['granules'] + for granule in granules: + dmrpp_files = [] + for file_ in granule['files']: + if not search(f"{self.processing_regex}$", file_['fileName']): + self.logger_to_cw.debug(f"{self.dmrpp_version}: regex {self.processing_regex}" + f" does not match filename {file_['fileName']}") + continue + self.logger_to_cw.debug(f"{self.dmrpp_version}: regex {self.processing_regex}" + f" matches filename to process {file_['fileName']}") + input_file_path = file_.get('filename', f's3://{file_["bucket"]}/{file_["key"]}') + output_file_paths = self.dmrpp_generate(input_file=input_file_path, dmrpp_meta=self.dmrpp_meta) + + for output_file_path in output_file_paths: + if output_file_path: + output_file_basename = os.path.basename(output_file_path) + dmrpp_file = { + "bucket": self.get_bucket(output_file_basename, collection_files, buckets)['name'], + "fileName": output_file_basename, + "key": os.path.join(os.path.dirname(file_.get('key')), output_file_basename), + "size": os.path.getsize(output_file_path), + "type": self.get_file_type(output_file_basename, collection_files), + } + dmrpp_files.append(dmrpp_file) + self.upload_file_to_s3(output_file_path, f's3://{dmrpp_file["bucket"]}/{dmrpp_file["key"]}') + + # Remove old dmrpp files if they exist before adding new ones + i = 0 + while i < len(granule['files']): + temp = granule['files'][i] + if str(temp.get('fileName')).endswith('dmrpp'): + granule['files'].pop(i) + else: + i += 1 + + granule['files'] += dmrpp_files + + return self.input + + def get_dmrpp_command(self, dmrpp_meta, input_path, output_filename, local=False): + """ + Getting the command line to create DMRPP files + """ + dmrpp_meta = dmrpp_meta if isinstance(dmrpp_meta, dict) else {} + dmrpp_options = DMRppOptions(self.path) + options = dmrpp_options.get_dmrpp_option(dmrpp_meta=dmrpp_meta) + local_option = f"-u file://{output_filename}" if '-u' in options else '' + dmrpp_cmd = f"get_dmrpp {options} {input_path} -o {output_filename}.dmrpp" \ + f" {local_option} {os.path.basename(output_filename)}" + return " ".join(dmrpp_cmd.split()) + + def add_missing_files(self, dmrpp_meta, file_name): + """ + Adds missing file + """ + # If the missing file was not generated + if not os.path.isfile(file_name): + return [] + # If it was generated and the flag was set + options = dmrpp_meta.get('options', []) + if {'flag': '-M'} in options: + return [file_name] + return [] + + def run_command(self, cmd): + """ Run cmd as a system command """ + stdout = None + stderr = None + + if self.enable_subprocess_logging: + stdout = subprocess.PIPE + stderr = subprocess.STDOUT + + try: + out = subprocess.run(cmd.split(), stdout=stdout, stderr=stderr, timeout=self.timeout, check=True) + except Exception as e: + self.logger_to_cw.info(f'cmd: {cmd}') + raise Exception(f'get_dmrpp failed. \ncmd: {cmd} \nException: {e}') + + return out + + def dmrpp_generate(self, input_file, local=False, dmrpp_meta=None): + """ + Generate DMRPP from S3 file + """ + # Force dmrpp_meta to be an object + dmrpp_meta = dmrpp_meta if isinstance(dmrpp_meta, dict) else {} + file_name = input_file if local else s3.download(input_file, path=self.path) + cmd = self.get_dmrpp_command(dmrpp_meta, self.path, file_name, local) + self.run_command(cmd) + out_files = [f"{file_name}.dmrpp"] + self.add_missing_files(dmrpp_meta, f'{file_name}.dmrpp.missing') + return out_files + + +if __name__ == "__main__": + dmr = DMRPPGenerator(input=[], config={}) + meta = {"options": [{"flag": "-s", "opt": "htp://localhost/config.conf", "download": "true"}, {"flag": "-M"}]} + dmr.get_dmrpp_command(meta, dmr.path, "file_name.nc") diff --git a/dmrpp_generator/version.py b/dmrpp_generator/version.py new file mode 100644 index 0000000..7a979b6 --- /dev/null +++ b/dmrpp_generator/version.py @@ -0,0 +1 @@ +__version__ = "v4.1.2" diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..4ab7095 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,23 @@ +version: '3' +services: + dmrpp: + # Path to dockerfile. + # '.' represents the current directory in which + # docker-compose.yml is present. + image: ghrcdaac/dmrpp-generator:v3.2.1.1 + environment: + - PAYLOAD=${PAYLOAD} + # Mount volume + volumes: + - ${NC_FILES_PATH:-/tmp}:/usr/share/hyrax + + hyrax: + + # image to fetch from docker hub + image: opendap/hyrax:snapshot + ports: + - "${PORT:-8080}:8080" + volumes: + - ${NC_FILES_PATH:-/tmp}:/usr/share/hyrax/ + working_dir: /usr/share/hyrax + container_name: hyrax diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index 9c3ee76..0000000 --- a/docs/index.md +++ /dev/null @@ -1,669 +0,0 @@ - -# 📖 Release notes -## v4.1.1 -This release: -* The CLI assumed providing a local file assumed that it should be applied to the dmr++ -u option. -## 🏃 Migration Steps to v4.1.1 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v4.1.1 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v4.1.1/dmrpp-generator.zip" -... -} - -## v4.1.0 -This release: -* The dmrpp resource was updated to use Cumulus ECS service version 15.0.4 -## 🏃 Migration Steps to v4.1.0 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v4.1.0 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v4.1.0/dmrpp-generator.zip" -... -} -``` - -## v4.0.9 -This release: -* The subprocess will have stdout and stderr default to None to reduce log spam. It can be enabled by passing -"enable_subprocess_logging": true into the event or setting the environment variable ENABLE_SUBPROCESS_LOGGING. -## 🏃 Migration Steps to v4.0.9 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v4.0.9 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v4.0.9/dmrpp-generator.zip" -... -} -``` - -## v4.0.8 -This release: -* default_log_retention_days is now configurable -## 🏃 Migration Steps to v4.0.8 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v4.0.8 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v4.0.8/dmrpp-generator.zip" -... -} -``` - -## v4.0.7 -This release: -* Updated to besd:3.20.13-664 https://wiki.earthdata.nasa.gov/pages/viewpage.action?pageId=304939198 -## 🏃 Migration Steps to v4.0.7 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v4.0.7 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v4.0.7/dmrpp-generator.zip" -... -} -``` - -## v4.0.6 -This release: -* Updated cumulus service version to v14.1.0 -* Added a variable to configure dmrpp to timeout. Can be set with a terraform variable or passed in by the workflow. -Defaults to 60 seconds -* Updated to besd:3.20.13-563 https://wiki.earthdata.nasa.gov/pages/viewpage.action?pageId=296466475 -## 🏃 Migration Steps to v4.0.6 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v4.0.6 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v4.0.6/dmrpp-generator.zip" -... -} -``` - -## v4.0.5 -This release: -* The subprocess call to Hyrax will now raise an exception based of result code: https://bugs.earthdata.nasa.gov/browse/GHRCCLOUD-4502 -* S3 exceptions for upload will no longer be caught: https://bugs.earthdata.nasa.gov/browse/GHRCCLOUD-4515 -## 🏃 Migration Steps to v4.0.5 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v4.0.5 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v4.0.5/dmrpp-generator.zip" -... -} -``` - -## v4.0.4 -This release: -* Updated the desd docker image to 3.20.13-310 which should resolve issue with the Bathy_SBES dataset. -## 🏃 Migration Steps to v4.0.4 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v4.0.4 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v4.0.4/dmrpp-generator.zip" -... -} -``` - -## v4.0.3 -This release: -* Updated the code to remove existing dmrpp files from the granule file list before adding newly created ones. -## 🏃 Migration Steps to v4.0.3 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v4.0.3 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v4.0.3/dmrpp-generator.zip" -... -} -``` - -## v4.0.2 -This release: -* Updated the hyrax besd version to 3.20.13-184. -## 🏃 Migration Steps to v4.0.2 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v4.0.2 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v4.0.2/dmrpp-generator.zip" -... -} -``` - -## v4.0.1 -This release: -* Updated the hyrax besd version to 3.20.13-130. -## 🏃 Migration Steps to v4.0.1 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v4.0.1 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v4.0.1/dmrpp-generator.zip" -... -} -``` - -## v4.0.0 -This release: -* Updated to cumulus v11.1.3 -## 🏃 Migration Steps to v4.0.0 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v4.0.0 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v4.0.0/dmrpp-generator.zip" -... -} -``` - -## v3.5.0 -This release: -* Uses opendap/besd:3.20.10-462 as base image -* Supports: -* * Fix Hyrax issue where it was unable to produce missing data files for level 3&4 granules that do not contain explicit domain coordinate data values - - -## 🏃 Migration Steps to v3.5.0 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v3.5.0 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v3.5.0/dmrpp-generator.zip" -... -} -``` - - -## v3.4.0 -This release: -* Uses opendap/besd:3.20.10-386 as base image -* Supports: -* * Fix Hyrax/OLFS generated links/URLs so that ForceLinksToHTTPS -* * Add support for HDF5 FIllValue chunks to the dmrpp_module -* * Add support for HDF5 FillValue feature to dmr++ generation - -## 🏃 Migration Steps to v3.4.0 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v3.4.0 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v3.4.0/dmrpp-generator.zip" -... -} -``` - -## v3.3.1 -This release: -* Compatible with Cumulus v10 and v11 -* docker_image variable default to the correct release (can be override from the module definition) - -## 🏃 Migration Steps to v3.3.1 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v3.3.1 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v3.3.1/dmrpp-generator.zip" -... -// Override dmrpp docker image version -docker_image = "ghrcdaac/dmrpp-generator" -... -} -``` - -## v3.3.0.beta (🚨 Not an official release) -This release: -* Compatible with Cumulus v10 -* fixes issue 24 [i_24](https://github.com/ghrcdaac/dmrpp-file-generator-docker/issues/24) - -## 🏃 Migration Steps to v3.3.0.beta -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v3.3.0.beta -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v3.3.0.beta/dmrpp-generator.zip" -... -// Change the value of your docker image to point to v3.3.0.beta tag -docker_image = "ghrcdaac/dmrpp-generator:v3.3.0.beta -... -} -``` - - -## v3.2.1 -This release: -* Support turning on and off logging to cloud watch `enable_cw_logging` variable [example](https://github.com/ghrcdaac/dmrpp-generator#deploying-with-cumulus-stack) -* Allow `dmrpp_config` to be defined within the worflow configuration. The collection definition defining `dmrpp_config` will override the one defined in the workflow configuration [example](https://github.com/ghrcdaac/dmrpp-generator#cumulus-workflow-configuration) -* Fixes DMRPP logging to cloudwatch duplicate entries - -## 🏃 Migration Steps to v3.2.1 -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v3.2.0 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v3.2.1/dmrpp-generator.zip" -... -// Change the value of your docker image to point to v3.2.1 tag -docker_image = "ghrcdaac/dmrpp-generator:v3.2.1 -... -// To turn off logging to cloudwatch group -enable_cw_logging = false -} -``` - -## 🏃 Migration Steps to v3.2.0 - release -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v3.2.0 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v3.2.0/dmrpp-generator.zip" -... -// Change the value of your docker image to point to v3.2.0 tag -docker_image = "ghrcdaac/dmrpp-generator:v3.2.0 -... -} -``` -## v3.2.0 -This release: -* Uses opendap/besd:3.20.9-91 as base image - -## 🏃 Migration Steps to v3.2.0 - release -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v3.2.0 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v3.2.0/dmrpp-generator.zip" -... -// Change the value of your docker image to point to v3.2.0 tag -docker_image = "ghrcdaac/dmrpp-generator:v3.2.0 -... -} -``` - - -## v3.1.2 -This release: -* Add logging capability to cloudwatch log groups -* Uses a simpler logic to check the file regular expression [PR 22](https://github.com/ghrcdaac/dmrpp-file-generator-docker/pull/22) -* Uses opendap/besd:3.20.9-76 -* Fixes [issue 14](https://github.com/ghrcdaac/dmrpp-generator/issues/14) - -## 🏃 Migration Steps to v3.1.2 - release -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v3.1.2 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v3.1.2/dmrpp-generator.zip" -... -// Change the value of your docker image to point to v3.1.2 tag -docker_image = "ghrcdaac/dmrpp-generator:v3.1.2 -... -} -``` - - -## v3.1.1 -This release: -* Upgrades the base cumulus module to v10.1.1 -* Uses opendap/besd:3.20.9-15 - -## 🚨 Breaking Changes v3.1.1 -* Works with Cumulus v10.1.1 -* Relays on opendap/besd:3.20.9-15 - -## 🏃 Migration Steps to v3.1.1 - release -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v3.1.1 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v3.1.1/dmrpp-generator.zip" -... -// Change the value of your docker image to point to v3.1.1 tag -docker_image = "ghrcdaac/dmrpp-generator:v3.1.1 -... -} -``` - -## v3.1.0 -This release: -* Uses cumulus v10.1.1 -* Support custom DMRPP file reg_ex -* Support creating and validating locally - - -## 🚨 v3.1.0 Changes -Added the ability to custom the regular expression for DMRPP generator. For example this configuration -```code - -{ - "config": { - "meta": { - "dmrpp": { - "dmrpp_regex" : "^.*.H6", - "options": [ - ... - ] - } - } -} -``` -Will process only files with `H6` extension - -## 🏃 Migration Steps to v3.1.0 - release -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v3.1.0 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v3.1.0/dmrpp-generator.zip" -... -// Change the value of your docker image to point to v3.1.0 tag -docker_image = "ghrcdaac/dmrpp-generator:v3.1.0 -... -} -``` - - - -## v3.0.1.beta -This release: -* Uses cumulus v10.1.1 -* Support `HDF5 | hdf5` extensions -* Support get_dmrpp options and flags - - - - -## 🚨 Breaking Changes v3.0.1.beta -To pass the flags you need to define the meta config as follow -``` -{ - ... - "meta": { - "dmrpp": { - "options": [ - { - "flag": "-M" - }, - { - "flag": "-s", - "opt": "s3://ghrcsbxw-public/dmrpp_config/file.config", - "download": "true" - }, - { - "flag": "-c", - "opt": "s3://ghrcsbxw-public/aces1cont__1/aces1cont_2002.212_v2.50.tar.cmr.json", - "download": "false" - } - ] - } - ... - } - ... -} -``` -Check [DMR++](https://docs.opendap.org/index.php?title=DMR%2B%2B) for DMRPP options -## 🏃 Migration Steps to v3.0.1.beta - release -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v3.0.1.beta -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v3.0.1.beta/dmrpp-generator.zip" -... -// Change the value of your docker image to point to v3.0.1.beta tag -docker_image = "ghrcdaac/dmrpp-generator:v3.0.1.beta" -... -} -``` - - -## v2.1.1 -This release: -* Support `H5` extension - - -## 🏃 Migration Steps to v2.1.1 - release -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v2.1.1 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v2.1.1/dmrpp-generator.zip" -... -// Change the value of your docker image to point to v2.1.1 tag -docker_image = "ghrcdaac/dmrpp-generator:v2.1.1" -... -} -``` - -## 🚨 Breaking Changes v2.1.1 -None - -## v2.1.0 -This release: -* Fixes the issue [issue 12](https://github.com/ghrcdaac/dmrpp-generator/issues/12) - -## 🏃 Migration Steps to v2.1.0 - release -```code -module "dmrpp-generator" { -// Change the source url in your terraform file to point to v2.1.0 -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v2.1.0/dmrpp-generator.zip" -... -// Change the value of your docker image to point to v2.1.0 tag -docker_image = "ghrcdaac/dmrpp-generator:v2.1.0" -... -// Add destination log to remote kinesis (optional) -log_destination_arn = var.log_destination_arn -} -``` -## 🚨 Breaking Changes v2.1.0 -Only compatible with Cumulus v7+ - -## v2.0.1 -This release: -* Fixes the issue of `filepath` passed from `move-granules` step function - - - -## v2.0.0 -This release: -* Fixes the issue [issue 11](https://github.com/ghrcdaac/dmrpp-generator/issues/11). - - -## v1.1.0 -This release: -* Fixes the file type issue, now you can define a custom dmrpp file type. -* Uses a new hyrax release [base_image](https://hub.docker.com/r/opendap/besd) - -## v1.0.6 -This release: -* Fixes the issue of AWS provider [issue#10](https://github.com/ghrcdaac/dmrpp-generator/issues/10) -* Reads from `url_path` key passed in the payload. Also, the provider was taking off from the main.tf - -## v1.0.5 -This release fixes the problem adding a type of meta data for the dmrpp file and also changing some spacings. - -## v1.0.4 -This release fixes the problem of assuming the granuleId is the same as the file name [issue#9](https://github.com/ghrcdaac/dmrpp-generator/issues/9) - - - - -## 🏃 Migration Steps to v2.0.1 -Change the source url in your terraform file to point to v2.0.0 release -```code -module "dmrpp-generator" { -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v2.0.1/dmrpp-generator.zip" -... -} -``` -Change the value of your docker image to point to v2.0.1 tag -```code -module "dmrpp-generator" { -... -docker_image = "ghrcdaac/dmrpp-generator:v2.0.1" -} - - - -## 🏃 Migration Steps to v2.0.0 -Change the source url in your terraform file to point to v2.0.0 release -```code -module "dmrpp-generator" { -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v2.0.0/dmrpp-generator.zip" -... -} -``` -Change the value of your docker image to point to v2.0.0 tag -```code -module "dmrpp-generator" { -... -docker_image = "ghrcdaac/dmrpp-generator:v2.0.0" -} - -``` - -## Features added to v2.0.0 -Now you can pass DMRPP options via collection definition. -For more info about DMRPP options please refer to [DMRPP Documentation](https://docs.google.com/presentation/d/1ZTeWjk6bUBgKP5iD2NVb_Ur8ZcvOpVbfd7pAl8c5bCs/edit#slide=id.p) - -```code -{ - "name" : "foo" - ... - "meta": { - "dmrpp": { - "create_missing_cf" : "-M" - } - ... - } - ... -} -``` -`create_missing_cf` is an arbitrary key name, you can achieve the same result by passing -```code -{ - "name" : "foo" - ... - "meta": { - "dmrpp": { - "option1" : "-M" - } - ... - } - ... -} -``` -For more supported DMRPP options (example `-v`) -```code -```code -{ - "name" : "foo" - ... - "meta": { - "dmrpp": { - "option1" : "-M", - "option2" : "-v" - } - ... - } - ... -} -``` - - -## 🚨 Breaking Changes v2.0.0 -DMRPP activity is using `url_path` instead of `filepath`. - -## 🏃 Migration Steps to v1.1.0 -Change the source url in your terraform file to point to v1.1.0 release -```code -module "dmrpp-generator" { -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v1.1.0/dmrpp-generator.zip" -... -} -``` -Change the value of your docker image to point to v1.1.0 tag -```code -module "dmrpp-generator" { -... -docker_image = "ghrcdaac/dmrpp-generator:v1.1.0" -} - -``` - -## 🏃 Migration Steps to v1.0.6 -Change the source url in your terraform file to point to v1.0.6 release -```code -module "dmrpp-generator" { -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v1.0.6/dmrpp-generator.zip" -... -} -``` -Change the value of your docker image to point to v1.0.6 tag -```code -module "dmrpp-generator" { -... -docker_image = "ghrcdaac/dmrpp-generator:v1.0.6" -} - -``` - -## 🏃 Migration Steps to v1.0.5 -Change the source url in your terraform file to point to v1.0.5 release -```code -module "dmrpp-generator" { -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v1.0.5/dmrpp-generator.zip" -... -} -``` -Change the value of your docker image to point to v1.0.5 tag -```code -module "dmrpp-generator" { -... -docker_image = "ghrcdaac/dmrpp-generator:v1.0.5" -} - -``` - -## 🏃 Migration Steps to v1.0.4 -Change the source url in your terraform file to point to v1.0.4 release -```code -module "dmrpp-generator" { -source = "https://github.com/ghrcdaac/dmrpp-generator/releases/download/v1.0.4/dmrpp-generator.zip" -... -} -``` -Change the value of your docker image to point to v1.0.4 tag -```code -module "dmrpp-generator" { -... -docker_image = "ghrcdaac/dmrpp-generator:v1.0.4" -} - -``` -## 🚨 Breaking Changes -The workflow is accepting the whole payload -```code - "HyraxProcessing": { - "Parameters": { - "cma": { - "event.$": "$", - "task_config": { - "buckets": "{$.meta.buckets}", - "distribution_endpoint": "{$.meta.distribution_endpoint}", - "files_config": "{$.meta.collection.files}", - "fileStagingDir": "{$.meta.collection.url_path}", - "granuleIdExtraction": "{$.meta.collection.granuleIdExtraction}", - "collection": "{$.meta.collection}" - } - } - }, - "Type": "Task", - "Resource": "${module.dmrpp-generator.dmrpp_task_id}", - "Catch": [ - { - "ErrorEquals": [ - "States.ALL" - ], - "ResultPath": "$.exception", - "Next": "WorkflowFailed" - } - ], - "Retry": [ - { - "ErrorEquals": [ - "States.ALL" - ], - "IntervalSeconds": 2, - "MaxAttempts": 3 - } - ], - "Next": "" - } -``` diff --git a/generate_dmrpp.py b/generate_dmrpp.py new file mode 100644 index 0000000..6ba034c --- /dev/null +++ b/generate_dmrpp.py @@ -0,0 +1,20 @@ +from os import listdir, getenv +from os.path import isfile, join, basename +import json +from re import match +import logging +from dmrpp_generator.main import DMRPPGenerator +logging.getLogger() + +if __name__ == "__main__": + payload = getenv('PAYLOAD', '{}') + meta = json.loads(payload) + workstation_path = getenv('MOUNT_VOL', '/usr/share/hyrax/') + join_path = lambda x: join(workstation_path, x) + input_files = [join_path(f) for f in listdir(workstation_path) if isfile(join_path(f))] + dmrpp = DMRPPGenerator(input=input_files) + dmrpp.path = workstation_path + dmrpp.processing_regex = meta.get('dmrpp_regex', dmrpp.processing_regex) + for input_file in input_files: + if match(f"{dmrpp.processing_regex}$", basename(input_file)): + dmrpp.dmrpp_generate(input_file, local=True, dmrpp_meta=meta) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a5e8bc1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +boto3==1.26.90 +certifi==2023.7.22 +cumulus-message-adapter==2.0.3 +cumulus-process~=1.3.0 +setuptools~=52.0.0 +requests~=2.26.0 diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..0541ec1 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1 @@ +build==1.0.3 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e30b6ef --- /dev/null +++ b/setup.py @@ -0,0 +1,43 @@ +from os import path +from codecs import open as codopen +from importlib import import_module +from setuptools import setup, find_packages + + +here = path.abspath(path.dirname(__file__)) + +__version__ = import_module('dmrpp_generator.version').__version__ + + +# get dependencies + +with codopen(path.join(here, 'requirements.txt'), encoding='utf-8') as f: + all_reqs = f.read().split('\n') +install_requires = [x.strip() for x in all_reqs] + + +setup( + name='dmrpp_file_generator', + version=__version__, + author='Abdelhak Marouane (am0089@uah.edu)', + description='Library to generate DMRpp files from netCDF and HDF files, can be used with ECS activity', + url='https://github.com/ghrcdaac/dmrpp-generator', + license='Apache 2.0', + classifiers=[ + 'Framework :: Pytest', + 'Topic :: Scientific/Engineering', + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'License :: Freeware', + 'Programming Language :: Python :: 3.7', + ], + entry_points={ + 'console_scripts': [ + 'dmrpp-generator=dmrpp_generator.main:DMRPPGenerator.cli', + 'generate-validate-dmrpp=dmrpp_generator.generate_and_validate_dmrpp:main' + ] + }, + packages=find_packages(exclude=['docs', 'tests*']), + include_package_data=True, + install_requires=install_requires, +) diff --git a/tests/fixtures/OCEAN_BOLUS_VELOCITY_day_mean_2017-12-31_ECCO_V4r4_latlon_0p50deg.nc b/tests/fixtures/OCEAN_BOLUS_VELOCITY_day_mean_2017-12-31_ECCO_V4r4_latlon_0p50deg.nc new file mode 100644 index 0000000..0531cea Binary files /dev/null and b/tests/fixtures/OCEAN_BOLUS_VELOCITY_day_mean_2017-12-31_ECCO_V4r4_latlon_0p50deg.nc differ diff --git a/tests/fixtures/payload.json b/tests/fixtures/payload.json new file mode 100644 index 0000000..5ad76a5 --- /dev/null +++ b/tests/fixtures/payload.json @@ -0,0 +1,71 @@ +{ + "config": { + "files_config": [ + { + "bucket": "private", + "regex": "^tpw_v07r\\d{2}_\\d{6}\\.nc$", + "sampleFileName": "tpw_v07r01_201910.nc" + }, + { + "bucket": "protected", + "regex": "^tpw_v07r\\d{2}_\\d{6}\\.dmrpp$", + "sampleFileName": "tpw_v07r01_201910.dmrpp" + } + ], + "buckets": { + "protected": { + "type": "protected", + "name": "ghrc-app-protected" + }, + "protected-1": { + "type": "protected", + "name": "ghrc-app-restore-protected" + }, + "internal": { + "type": "internal", + "name": "ghrc-app-internal" + }, + "private": { + "type": "private", + "name": "ghrc-app-private" + }, + "public": { + "type": "public", + "name": "ghrc-app-public" + } + }, + "collection": { + "name": "rss1tpwnv7r01new", + "version": "7R01", + "dataType": "netCDF", + "process": "rssm1tpwn", + "provider_path": "/vapor/monthly_1deg", + "url_path": "rss1tpwn__1", + "duplicateHandling": "replace", + "granuleId": "^(tpw_v07r).*\\.nc$", + "granuleIdExtraction": "(tpw_v07r.*)", + "sampleFileName": "tpw_v07r01_201812.nc", + "meta": { + "dmrpp": { + "options" : [ + { + "flag": "-M" + } + ] + } + }, + "files": [ + { + "bucket": "private", + "regex": "^.*.nc$", + "sampleFileName": "tpw_v07r01_201910.nc" + }, + { + "bucket": "protected", + "regex": "^tpw_v07r\\d{2}_\\d{6}\\.(dmrpp|missing)$", + "sampleFileName": "tpw_v07r01_201910.dmrpp" + } + ] + } +} +} diff --git a/tests/fixtures/tpw_v07r01_201910.nc b/tests/fixtures/tpw_v07r01_201910.nc new file mode 100644 index 0000000..1f41862 Binary files /dev/null and b/tests/fixtures/tpw_v07r01_201910.nc differ diff --git a/tests/test_dmrpp_command_line.py b/tests/test_dmrpp_command_line.py new file mode 100644 index 0000000..1427572 --- /dev/null +++ b/tests/test_dmrpp_command_line.py @@ -0,0 +1,72 @@ +import os +from unittest import TestCase + +from dmrpp_generator.main import DMRPPGenerator + +class TestDMRPPCommandLine(TestCase): + """ + test DMRPP comand line + """ + + dmrpp = DMRPPGenerator(input=[], config={}) + def test_1_local(self): + """ + Testing local no env var + :return: + """ + self.assertEqual('get_dmrpp -b foo -o bar.dmrpp bar', self.dmrpp.get_dmrpp_command({}, 'foo', 'bar')) + + def test_2_local_m(self): + """ + Testing local with env var true + :return: + """ + + meta = {'options': [{'flag': '-M'}]} + self.assertEqual('get_dmrpp -M -b foo -o bar.dmrpp bar', self.dmrpp.get_dmrpp_command(dmrpp_meta=meta, + input_path='foo', + output_filename='bar')) + + def test_3_local_no_m(self): + """ + Testing local with env var false + :return: + """ + meta = {'options': []} + self.assertEqual('get_dmrpp -b foo -o bar.dmrpp bar', self.dmrpp.get_dmrpp_command(meta, 'foo', 'bar')) + + def test_4_local_m(self): + """ + Testing local with env var 1 + :return: + """ + meta = {'options': [{'flag': '-M'}]} + self.assertEqual('get_dmrpp -M -b foo -o bar.dmrpp bar', self.dmrpp.get_dmrpp_command(meta, 'foo', 'bar')) + + def test_5_cumulus_no_meta_config(self): + """ + Testing cumulus no config 1 + :return: + """ + + self.assertEqual('get_dmrpp -b foo -o bar.dmrpp bar', self.dmrpp.get_dmrpp_command({}, 'foo', 'bar')) + + + def test_6_cumulus_m(self): + """ + Testing cumulus true + :return: + """ + dmrpp_meta = {'options': [{'flag': '-M'}]} + self.assertEqual('get_dmrpp -M -b foo -o bar.dmrpp bar', self.dmrpp.get_dmrpp_command(dmrpp_meta, 'foo', 'bar')) + + + def test_7_cumulus_adding_wrongval(self): + """ + Testing dmrpp ignoring wrong value + :return: + """ + dmrpp_meta = { + "create_missing_cf": "foobar" + } + self.assertEqual('get_dmrpp -b foo -o bar.dmrpp bar', self.dmrpp.get_dmrpp_command(dmrpp_meta, 'foo', 'bar')) diff --git a/tests/test_generate_dmrpp.py b/tests/test_generate_dmrpp.py new file mode 100644 index 0000000..c14ca0d --- /dev/null +++ b/tests/test_generate_dmrpp.py @@ -0,0 +1,118 @@ +import os +from unittest import TestCase +from unittest.mock import patch +import json +from dmrpp_generator.main import DMRPPGenerator + +class StorageValues: + """ + storage values + """ + processing_output = None + + +class TestDMRPPFileGeneration(TestCase): + """ + Test generating dmrpp files. + """ + granule_id = "tpw_v07r01_201910" + granule_name = "tpw_v07r01_201910.nc" + fixture_path = os.path.join(os.path.dirname(__file__), "fixtures") + input_file = { + "granules": [ + { + "granuleId": granule_id, + "dataType": "MODIS_A-JPL-L2P-v2019.0", + "sync_granule_duration": 3759, + "files": [ + { + "bucket": "fake-cumulus-protected", + "checksum": "aa5204f125ae83847b3b80fa2e571b00", + "checksumType": "md5", + "fileName": granule_name, + "key": f"fakepath/2020/001/{granule_name}", + "size": 18232098, + "type": "data", + }, + { + "bucket": "fake-cumulus-public", + "fileName": f"{granule_name}.md5", + "key": "fakepath/2020/001/{granule_name}.md5", + "size": 98, + "type": "metadata", + }, + { + "bucket": "fake-cumulus-public", + "fileName": f"{granule_name}.cmr.json", + "key": f"{granule_name}.cmr.json", + "size": 1381, + "type": "metadata", + } + ], + "version": "2019.0" + } + ] + } + + payload_file = f"{fixture_path}/payload.json" + with open(payload_file, encoding= 'UTF-8') as fle: + payload = json.load(fle) + + payload_data = payload + + process_instance = DMRPPGenerator(input=input_file, config=payload_data['config'], path=fixture_path) + process_instance.path = fixture_path + + @patch('dmrpp_generator.main.DMRPPGenerator.upload_file_to_s3', + return_value={granule_id:f's3://{granule_name}.dmrpp'}) + @patch('cumulus_process.Process.fetch_all', + return_value={'input_key': [os.path.join(os.path.dirname(__file__), f"fixtures/{granule_name}")]}) + @patch('os.remove', return_value=granule_name) + @patch('cumulus_process.s3.download', return_value=f"{process_instance.path}/{granule_name}") + def test_1_check_generate_dmrpp(self, mock_upload, mock_fetch, mock_remove, mock_download): + """ + Testing get correct start date + :return: + """ + _ = mock_upload, mock_fetch, mock_remove, mock_download + + StorageValues.processing_output = self.process_instance.process() + expected_file_path = f"{self.process_instance.path}/{self.granule_name}.dmrpp" + self.assertEqual(os.path.exists(expected_file_path), 1) + + def test_2_check_output(self): + """ + Test the putput schema of the processnig + :return: + """ + self.assertListEqual(['granules'], list(StorageValues.processing_output.keys())) + + def test_3_checkout_dmrpp_output(self): + + dmrpp_file = f"{self.granule_name}.dmrpp" + dmrpp_exists = False + for granules in StorageValues.processing_output.get('granules'): + for file in granules.get('files'): + if file["fileName"] == dmrpp_file: + dmrpp_exists = True + self.assertEqual(True, dmrpp_exists) + + # @patch('dmrpp_generator.main.DMRPPGenerator.upload_file', + # return_value={granule_id:f's3://{granule_name}.dmrpp'}) + # @patch('cumulus_process.Process.fetch_all', + # return_value={'input_key': [os.path.join(os.path.dirname(__file__), f"fixtures/{granule_name}")]}) + # @patch('os.remove', return_value=granule_name) + # @patch('cumulus_process.s3.download', return_value=f"{process_instance.path}/{granule_name}") + # @patch('cumulus_process.s3.upload', return_value=f"s3://fake_s3/{granule_name}") + # def test_4_checkout_missing_nc(self, mock_upload, mock_fetch, mock_remove, mock_download, mock_upload_s3): + # self.payload_data['config']['collection']['meta']['dmrpp']['options'] = [{"flag": "-M"}] + # process_instance = DMRPPGenerator(input=self.input_file, config=self.payload_data['config'], path=self.fixture_path) + # process_instance.path = self.fixture_path + # outputs = process_instance.process() + # missing_nc_file = f"{self.granule_name}.missing" + # missing_nc_file_exists = False + # for granules in outputs['granules']: + # for file in granules.get('files'): + # if file["fileName"] == missing_nc_file: + # missing_nc_file_exists = True + # self.assertEqual(True, missing_nc_file_exists)