Skip to content

Commit

Permalink
add metadata archiving capability
Browse files Browse the repository at this point in the history
  • Loading branch information
tomkralidis committed Oct 23, 2023
1 parent ab3a2b2 commit f69056e
Show file tree
Hide file tree
Showing 11 changed files with 269 additions and 4 deletions.
9 changes: 7 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@ LABEL maintainer="[email protected]"

ENV TZ="Etc/UTC" \
DEBIAN_FRONTEND="noninteractive" \
DEBIAN_PACKAGES="bash curl git python3-pip python3-setuptools vim"
DEBIAN_PACKAGES="bash cron curl git python3-pip python3-setuptools vim"

# copy the app
COPY . /app

# add to crontab
COPY ./docker/wis2-gdc-management.cron /etc/cron.d/wis2-gdc-management.cron

RUN apt-get update -y && \
# install dependencies
apt-get install -y ${DEBIAN_PACKAGES} && \
Expand All @@ -42,6 +45,8 @@ RUN apt-get update -y && \
# cleanup
apt autoremove -y && \
apt-get -q clean && \
rm -rf /var/lib/apt/lists/*
rm -rf /var/lib/apt/lists/* && \
chmod 0644 /etc/cron.d/wis2-gdc-management.cron && \
crontab /etc/cron.d/wis2-gdc-management.cron

ENTRYPOINT [ "/app/docker/entrypoint.sh" ]
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ wis2-gdc register /path/to/dir/or/wcmp2-files

# load from wis2box known deployments (https://demo.wis2box.wis.wmo.int)
wis2-gdc sync wis2box

# create an archive of metadata records to a zipfile
wis2-gdc archive foo.zip
```

### Docker
Expand Down
7 changes: 7 additions & 0 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,13 @@ services:
context: ..
env_file:
- wis2-gdc.env
environment:
- WIS2_GDC_API_URL_DOCKER=http://wis2-gdc-api
depends_on:
elasticsearch:
condition: service_healthy
volumes:
- wis2-gdc-management-data:/data
restart: always
command: ["pywis-pubsub", "subscribe", "--config", "/app/docker/pywis-pubsub.yml", "--verbosity", "DEBUG"]
networks:
Expand All @@ -81,6 +85,8 @@ services:
image: geopython/pygeoapi:latest
depends_on:
- wis2-gdc-management
volumes:
- wis2-gdc-management-data:/data
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost/conformance"]
interval: 5s
Expand All @@ -93,6 +99,7 @@ services:

volumes:
es-data:
wis2-gdc-management-data:

networks:
wis2-gdc-net:
9 changes: 9 additions & 0 deletions docker/wis2-gdc-api/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,16 @@ FROM geopython/pygeoapi:latest

LABEL maintainer="Tom Kralidis <[email protected]>"

ENV PYGEOAPI_CONFIG=/pygeoapi/local.config.yml
ENV PYGEOAPI_OPENAPI=/pygeoapi/local.openapi.yml

RUN pip3 install pywcmp && \
pywcmp bundle sync

COPY ./wis2-gdc.yml /pygeoapi/local.config.yml
COPY ./app.py /pygeoapi/pygeoapi/app.py
COPY ./entrypoint.sh /app/docker/wis2-gdc-api/entrypoint.sh

RUN chmod +x /app/docker/wis2-gdc-api/entrypoint.sh

ENTRYPOINT [ "/app/docker/wis2-gdc-api/entrypoint.sh" ]
50 changes: 50 additions & 0 deletions docker/wis2-gdc-api/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
###############################################################################
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
###############################################################################

from flask import Flask, make_response, redirect
from pygeoapi.flask_app import BLUEPRINT as pygeoapi_blueprint

app = Flask(__name__, static_url_path='/static')
app.url_map.strict_slashes = False

app.register_blueprint(pygeoapi_blueprint, url_prefix='/')

try:
from flask_cors import CORS
CORS(app)
except ImportError: # CORS needs to be handled by upstream server
pass


@app.route('/archive.zip')
def archive():

headers = {
'Content-Type': 'application/zip'
}

with open('/data/archive.zip') as fh:
response = make_response(fh.read(), 200)
response.headers = headers

return response

return redirect('https://docs.wis2box.wis.wmo.int', code=302)
82 changes: 82 additions & 0 deletions docker/wis2-gdc-api/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/bin/bash
###############################################################################
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
###############################################################################

# pygeoapi entry script

echo "START /entrypoint.sh"

set +e

# gunicorn env settings with defaults
SCRIPT_NAME="/"
CONTAINER_NAME="wis2-gdc-api"
CONTAINER_HOST=${CONTAINER_HOST:=0.0.0.0}
CONTAINER_PORT=${CONTAINER_PORT:=80}
WSGI_WORKERS=${WSGI_WORKERS:=4}
WSGI_WORKER_TIMEOUT=${WSGI_WORKER_TIMEOUT:=6000}
WSGI_WORKER_CLASS=${WSGI_WORKER_CLASS:=gevent}

# What to invoke: default is to run gunicorn server
entry_cmd=${1:-run}

# Shorthand
function error() {
echo "ERROR: $@"
exit -1
}

# Workdir
cd /pygeoapi

# Lock all Python files (for gunicorn hot reload)
find . -type f -name "*.py" | xargs chmod -R 0444

echo "Trying to generate OpenAPI document"
pygeoapi openapi generate ${PYGEOAPI_CONFIG} --output-file ${PYGEOAPI_OPENAPI}

[[ $? -ne 0 ]] && error "ERROR: OpenAPI document could not be generated"

echo "openapi.yml generated continue to pygeoapi"

case ${entry_cmd} in
# Run pygeoapi server
run)
# SCRIPT_NAME should not have value '/'
[[ "${SCRIPT_NAME}" = '/' ]] && export SCRIPT_NAME="" && echo "make SCRIPT_NAME empty from /"

echo "Start gunicorn name=${CONTAINER_NAME} on ${CONTAINER_HOST}:${CONTAINER_PORT} with ${WSGI_WORKERS} workers and SCRIPT_NAME=${SCRIPT_NAME}"
exec gunicorn --workers ${WSGI_WORKERS} \
--worker-class=${WSGI_WORKER_CLASS} \
--timeout ${WSGI_WORKER_TIMEOUT} \
--name=${CONTAINER_NAME} \
--bind ${CONTAINER_HOST}:${CONTAINER_PORT} \
--reload \
--reload-extra-file ${PYGEOAPI_CONFIG} \
pygeoapi.app:app
;;
*)
error "unknown command arg: must be run (default)"
;;
esac

echo "END /entrypoint.sh"

1 change: 1 addition & 0 deletions docker/wis2-gdc-management.cron
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0 0 * * * su -c "wis2-gdc archive /data/archive.zip" > /proc/1/fd/1 2>/proc/1/fd/2
3 changes: 2 additions & 1 deletion wis2_gdc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import click

from wis2_gdc.registrar import register, setup, teardown
from wis2_gdc.sync import sync
from wis2_gdc.sync import archive, sync

__version__ = '0.1.dev0'

Expand All @@ -35,6 +35,7 @@ def cli():
pass


cli.add_command(archive)
cli.add_command(sync)
cli.add_command(register)
cli.add_command(setup)
Expand Down
1 change: 1 addition & 0 deletions wis2_gdc/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import os

API_URL = os.environ.get('WIS2_GDC_API_URL')
API_URL_DOCKER = os.environ.get('WIS2_GDC_API_URL_DOCKER')
BACKEND_TYPE = os.environ.get('WIS2_GDC_BACKEND_TYPE')
BACKEND_CONNECTION = os.environ.get('WIS2_GDC_BACKEND_CONNECTION')
BROKER_URL = os.environ.get('WIS2_GDC_BROKER_URL')
35 changes: 34 additions & 1 deletion wis2_gdc/registrar.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,24 @@

class Registrar:
def __init__(self):
"""
Initializer
:returns: `wis2_gdc.registrar.Registrar`
"""

self.metadata = None
self.broker = MQTTPubSubClient(BROKER_URL)

def register(self, metadata: dict):
def register(self, metadata: dict) -> None:
"""
Register a metadata document
:param metadata: `dict` of metadata document
:returns: `None`
"""

self.metadata = metadata
LOGGER.debug(f'Metadata: {self.metadata}')

Expand All @@ -52,6 +66,12 @@ def register(self, metadata: dict):
self._publish()

def _run_ets(self) -> dict:
"""
Helper function to run ETS
:returns: `dict` of ETS results
"""

LOGGER.info('Running ETS')
ts = WMOCoreMetadataProfileTestSuite2(self.metadata)
try:
Expand All @@ -64,10 +84,23 @@ def _run_ets(self) -> dict:
LOGGER.error(err)

def _run_kpi(self):
"""
Helper function to run KPI
:returns: `dict` of KPI results
"""

LOGGER.info('Running KPI')
pass

def _publish(self):
"""
Publish metadata from `wis2_gdc.registrar:Registrar.metadata`
to backend
:returns: `None`
"""

backend = BACKENDS[BACKEND_TYPE]({'connection': BACKEND_CONNECTION})
LOGGER.info('Saving metadata to backend')
backend.save(self.metadata)
Expand Down
73 changes: 73 additions & 0 deletions wis2_gdc/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,85 @@
#
###############################################################################

import json
import logging
import zipfile

import click
import requests
from typing import Union

from pywis_pubsub import cli_options
from pywis_pubsub.mqtt import MQTTPubSubClient

from wis2_gdc.env import API_URL, API_URL_DOCKER, BROKER_URL
from wis2_gdc.harvester import HARVESTERS

LOGGER = logging.getLogger(__name__)


def archive_metadata(url: str, archive_zipfile: str) -> None:
"""
Archive all discovery metadata from a GDC to an archive zipfile
:param url: `str` of GDC API URL
:archive_zipfile: `str` of filename of zipfile
:returns: `None`
"""

def _get_next_link(links) -> Union[str, None]:
"""
Inner helper function to derive rel=next link from GDC response
:param links: `list` of links array
:returns: `str` of next link or `None`
"""

for link in links:
if link['rel'] == 'next':
return link['href']

return None

end = False
gdc_items_url = f'{url}/collections/wis2-discovery-metadata/items'
response = None

with zipfile.ZipFile(archive_zipfile, 'w') as zf:
while not end:
if response is None:
gdc_items_url2 = gdc_items_url
else:
gdc_items_url2 = _get_next_link(response['links'])

LOGGER.info(f'Querying GDC with {gdc_items_url2}')
response = requests.get(gdc_items_url2).json()

for feature in response['features']:
LOGGER.debug(f"Saving {feature['id']} to archive")
filename = f"{feature['id']}.json"
zf.writestr(filename, json.dumps(feature))

if _get_next_link(response['links']) is None:
end = True

m = MQTTPubSubClient(BROKER_URL)
m.pub('gdc-reports/archive', f'Archive published at {API_URL}/archive.zip')
m.close()


@click.command()
@click.pass_context
@click.argument('archive-zipfile')
@cli_options.OPTION_VERBOSITY
def archive(ctx, archive_zipfile, verbosity='NOTSET'):
"""Archive discovery metadata records"""

click.echo(f'Achiving metadata from GDC {API_URL}')
archive_metadata(API_URL_DOCKER, archive_zipfile)


@click.command
@click.argument('harvest_type', nargs=1,
Expand Down

0 comments on commit f69056e

Please sign in to comment.