diff --git a/Dockerfile b/Dockerfile index 07b7cda..0fecc49 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,11 +25,14 @@ LABEL maintainer="tomkralidis@gmail.com" ENV TZ="Etc/UTC" \ DEBIAN_FRONTEND="noninteractive" \ - DEBIAN_PACKAGES="bash curl git python3-pip python3-setuptools vim" + DEBIAN_PACKAGES="bash cron curl git python3-pip python3-setuptools vim" # copy the app COPY . /app +# add to crontab +COPY ./docker/wis2-gdc-management.cron /etc/cron.d/wis2-gdc-management.cron + RUN apt-get update -y && \ # install dependencies apt-get install -y ${DEBIAN_PACKAGES} && \ @@ -42,6 +45,8 @@ RUN apt-get update -y && \ # cleanup apt autoremove -y && \ apt-get -q clean && \ - rm -rf /var/lib/apt/lists/* + rm -rf /var/lib/apt/lists/* && \ + chmod 0644 /etc/cron.d/wis2-gdc-management.cron && \ + crontab /etc/cron.d/wis2-gdc-management.cron ENTRYPOINT [ "/app/docker/entrypoint.sh" ] diff --git a/README.md b/README.md index da4e87d..de46515 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,9 @@ wis2-gdc register /path/to/dir/or/wcmp2-files # load from wis2box known deployments (https://demo.wis2box.wis.wmo.int) wis2-gdc sync wis2box + +# create an archive of metadata records to a zipfile +wis2-gdc archive foo.zip ``` ### Docker diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index f43a66b..f9c8699 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -66,9 +66,13 @@ services: context: .. env_file: - wis2-gdc.env + environment: + - WIS2_GDC_API_URL_DOCKER=http://wis2-gdc-api depends_on: elasticsearch: condition: service_healthy + volumes: + - wis2-gdc-management-data:/data restart: always command: ["pywis-pubsub", "subscribe", "--config", "/app/docker/pywis-pubsub.yml", "--verbosity", "DEBUG"] networks: @@ -81,6 +85,8 @@ services: image: geopython/pygeoapi:latest depends_on: - wis2-gdc-management + volumes: + - wis2-gdc-management-data:/data healthcheck: test: ["CMD", "curl", "-f", "http://localhost/conformance"] interval: 5s @@ -93,6 +99,7 @@ services: volumes: es-data: + wis2-gdc-management-data: networks: wis2-gdc-net: diff --git a/docker/wis2-gdc-api/Dockerfile b/docker/wis2-gdc-api/Dockerfile index 3e2fb6c..ba8c205 100644 --- a/docker/wis2-gdc-api/Dockerfile +++ b/docker/wis2-gdc-api/Dockerfile @@ -23,7 +23,16 @@ FROM geopython/pygeoapi:latest LABEL maintainer="Tom Kralidis " +ENV PYGEOAPI_CONFIG=/pygeoapi/local.config.yml +ENV PYGEOAPI_OPENAPI=/pygeoapi/local.openapi.yml + RUN pip3 install pywcmp && \ pywcmp bundle sync COPY ./wis2-gdc.yml /pygeoapi/local.config.yml +COPY ./app.py /pygeoapi/pygeoapi/app.py +COPY ./entrypoint.sh /app/docker/wis2-gdc-api/entrypoint.sh + +RUN chmod +x /app/docker/wis2-gdc-api/entrypoint.sh + +ENTRYPOINT [ "/app/docker/wis2-gdc-api/entrypoint.sh" ] diff --git a/docker/wis2-gdc-api/app.py b/docker/wis2-gdc-api/app.py new file mode 100644 index 0000000..a4c9c97 --- /dev/null +++ b/docker/wis2-gdc-api/app.py @@ -0,0 +1,50 @@ +############################################################################### +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +############################################################################### + +from flask import Flask, make_response, redirect +from pygeoapi.flask_app import BLUEPRINT as pygeoapi_blueprint + +app = Flask(__name__, static_url_path='/static') +app.url_map.strict_slashes = False + +app.register_blueprint(pygeoapi_blueprint, url_prefix='/') + +try: + from flask_cors import CORS + CORS(app) +except ImportError: # CORS needs to be handled by upstream server + pass + + +@app.route('/archive.zip') +def archive(): + + headers = { + 'Content-Type': 'application/zip' + } + + with open('/data/archive.zip') as fh: + response = make_response(fh.read(), 200) + response.headers = headers + + return response + + return redirect('https://docs.wis2box.wis.wmo.int', code=302) diff --git a/docker/wis2-gdc-api/entrypoint.sh b/docker/wis2-gdc-api/entrypoint.sh new file mode 100644 index 0000000..bdd58e3 --- /dev/null +++ b/docker/wis2-gdc-api/entrypoint.sh @@ -0,0 +1,82 @@ +#!/bin/bash +############################################################################### +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +############################################################################### + +# pygeoapi entry script + +echo "START /entrypoint.sh" + +set +e + +# gunicorn env settings with defaults +SCRIPT_NAME="/" +CONTAINER_NAME="wis2-gdc-api" +CONTAINER_HOST=${CONTAINER_HOST:=0.0.0.0} +CONTAINER_PORT=${CONTAINER_PORT:=80} +WSGI_WORKERS=${WSGI_WORKERS:=4} +WSGI_WORKER_TIMEOUT=${WSGI_WORKER_TIMEOUT:=6000} +WSGI_WORKER_CLASS=${WSGI_WORKER_CLASS:=gevent} + +# What to invoke: default is to run gunicorn server +entry_cmd=${1:-run} + +# Shorthand +function error() { + echo "ERROR: $@" + exit -1 +} + +# Workdir +cd /pygeoapi + +# Lock all Python files (for gunicorn hot reload) +find . -type f -name "*.py" | xargs chmod -R 0444 + +echo "Trying to generate OpenAPI document" +pygeoapi openapi generate ${PYGEOAPI_CONFIG} --output-file ${PYGEOAPI_OPENAPI} + +[[ $? -ne 0 ]] && error "ERROR: OpenAPI document could not be generated" + +echo "openapi.yml generated continue to pygeoapi" + +case ${entry_cmd} in + # Run pygeoapi server + run) + # SCRIPT_NAME should not have value '/' + [[ "${SCRIPT_NAME}" = '/' ]] && export SCRIPT_NAME="" && echo "make SCRIPT_NAME empty from /" + + echo "Start gunicorn name=${CONTAINER_NAME} on ${CONTAINER_HOST}:${CONTAINER_PORT} with ${WSGI_WORKERS} workers and SCRIPT_NAME=${SCRIPT_NAME}" + exec gunicorn --workers ${WSGI_WORKERS} \ + --worker-class=${WSGI_WORKER_CLASS} \ + --timeout ${WSGI_WORKER_TIMEOUT} \ + --name=${CONTAINER_NAME} \ + --bind ${CONTAINER_HOST}:${CONTAINER_PORT} \ + --reload \ + --reload-extra-file ${PYGEOAPI_CONFIG} \ + pygeoapi.app:app + ;; + *) + error "unknown command arg: must be run (default)" + ;; +esac + +echo "END /entrypoint.sh" + diff --git a/docker/wis2-gdc-management.cron b/docker/wis2-gdc-management.cron new file mode 100644 index 0000000..5348064 --- /dev/null +++ b/docker/wis2-gdc-management.cron @@ -0,0 +1 @@ +0 0 * * * su -c "wis2-gdc archive /data/archive.zip" > /proc/1/fd/1 2>/proc/1/fd/2 diff --git a/wis2_gdc/__init__.py b/wis2_gdc/__init__.py index 5b52bc7..e2b1483 100644 --- a/wis2_gdc/__init__.py +++ b/wis2_gdc/__init__.py @@ -22,7 +22,7 @@ import click from wis2_gdc.registrar import register, setup, teardown -from wis2_gdc.sync import sync +from wis2_gdc.sync import archive, sync __version__ = '0.1.dev0' @@ -35,6 +35,7 @@ def cli(): pass +cli.add_command(archive) cli.add_command(sync) cli.add_command(register) cli.add_command(setup) diff --git a/wis2_gdc/env.py b/wis2_gdc/env.py index 89f2728..a492623 100644 --- a/wis2_gdc/env.py +++ b/wis2_gdc/env.py @@ -22,6 +22,7 @@ import os API_URL = os.environ.get('WIS2_GDC_API_URL') +API_URL_DOCKER = os.environ.get('WIS2_GDC_API_URL_DOCKER') BACKEND_TYPE = os.environ.get('WIS2_GDC_BACKEND_TYPE') BACKEND_CONNECTION = os.environ.get('WIS2_GDC_BACKEND_CONNECTION') BROKER_URL = os.environ.get('WIS2_GDC_BROKER_URL') diff --git a/wis2_gdc/registrar.py b/wis2_gdc/registrar.py index d1f220c..2dbd193 100644 --- a/wis2_gdc/registrar.py +++ b/wis2_gdc/registrar.py @@ -37,10 +37,24 @@ class Registrar: def __init__(self): + """ + Initializer + + :returns: `wis2_gdc.registrar.Registrar` + """ + self.metadata = None self.broker = MQTTPubSubClient(BROKER_URL) - def register(self, metadata: dict): + def register(self, metadata: dict) -> None: + """ + Register a metadata document + + :param metadata: `dict` of metadata document + + :returns: `None` + """ + self.metadata = metadata LOGGER.debug(f'Metadata: {self.metadata}') @@ -52,6 +66,12 @@ def register(self, metadata: dict): self._publish() def _run_ets(self) -> dict: + """ + Helper function to run ETS + + :returns: `dict` of ETS results + """ + LOGGER.info('Running ETS') ts = WMOCoreMetadataProfileTestSuite2(self.metadata) try: @@ -64,10 +84,23 @@ def _run_ets(self) -> dict: LOGGER.error(err) def _run_kpi(self): + """ + Helper function to run KPI + + :returns: `dict` of KPI results + """ + LOGGER.info('Running KPI') pass def _publish(self): + """ + Publish metadata from `wis2_gdc.registrar:Registrar.metadata` + to backend + + :returns: `None` + """ + backend = BACKENDS[BACKEND_TYPE]({'connection': BACKEND_CONNECTION}) LOGGER.info('Saving metadata to backend') backend.save(self.metadata) diff --git a/wis2_gdc/sync.py b/wis2_gdc/sync.py index 5718a11..3210097 100644 --- a/wis2_gdc/sync.py +++ b/wis2_gdc/sync.py @@ -19,12 +19,85 @@ # ############################################################################### +import json +import logging +import zipfile + import click +import requests +from typing import Union from pywis_pubsub import cli_options +from pywis_pubsub.mqtt import MQTTPubSubClient +from wis2_gdc.env import API_URL, API_URL_DOCKER, BROKER_URL from wis2_gdc.harvester import HARVESTERS +LOGGER = logging.getLogger(__name__) + + +def archive_metadata(url: str, archive_zipfile: str) -> None: + """ + Archive all discovery metadata from a GDC to an archive zipfile + + :param url: `str` of GDC API URL + :archive_zipfile: `str` of filename of zipfile + + :returns: `None` + """ + + def _get_next_link(links) -> Union[str, None]: + """ + Inner helper function to derive rel=next link from GDC response + + :param links: `list` of links array + + :returns: `str` of next link or `None` + """ + + for link in links: + if link['rel'] == 'next': + return link['href'] + + return None + + end = False + gdc_items_url = f'{url}/collections/wis2-discovery-metadata/items' + response = None + + with zipfile.ZipFile(archive_zipfile, 'w') as zf: + while not end: + if response is None: + gdc_items_url2 = gdc_items_url + else: + gdc_items_url2 = _get_next_link(response['links']) + + LOGGER.info(f'Querying GDC with {gdc_items_url2}') + response = requests.get(gdc_items_url2).json() + + for feature in response['features']: + LOGGER.debug(f"Saving {feature['id']} to archive") + filename = f"{feature['id']}.json" + zf.writestr(filename, json.dumps(feature)) + + if _get_next_link(response['links']) is None: + end = True + + m = MQTTPubSubClient(BROKER_URL) + m.pub('gdc-reports/archive', f'Archive published at {API_URL}/archive.zip') + m.close() + + +@click.command() +@click.pass_context +@click.argument('archive-zipfile') +@cli_options.OPTION_VERBOSITY +def archive(ctx, archive_zipfile, verbosity='NOTSET'): + """Archive discovery metadata records""" + + click.echo(f'Achiving metadata from GDC {API_URL}') + archive_metadata(API_URL_DOCKER, archive_zipfile) + @click.command @click.argument('harvest_type', nargs=1,