diff --git a/README.md b/README.md index cea90b067..d112ddbd2 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,8 @@ First, ensure that you have a set of Amazon Web Services (AWS) credentials with $ aws configure --profile mmw-stg ``` +You will also need to set the MMW Datahub AWS credential as your default. These are stored in lastpass under the name `MMW Azavea DataHub AWS`. Ensure that the AWS credentials file has universal read permissions. + Ensure you have the [vagrant-disksize](https://github.com/sprotheroe/vagrant-disksize) plugin installed: ```bash diff --git a/doc/README.md b/doc/README.md index 85b4c95db..fc45faafe 100644 --- a/doc/README.md +++ b/doc/README.md @@ -2,7 +2,11 @@ ## Python Jupyter notebooks demonstrating the use of the Model My Watershed geoprocessing API -2018-8-19. Created by [Emilio Mayorga](https://github.com/emiliom/), University of Washington. +The following Jupyter Notebooks provide example workflows for the Model My Watershed (ModelMW) public web services Application Programming Interface (API) for automating some of the workflows that are provided by the web application. + +Detailed ModelMW web service API documentation is provided at: https://modelmywatershed.org/api/docs/ + +Example notebooks were first created by [Emilio Mayorga](https://github.com/emiliom/) (University of Washington) on 2018-8-19, and have been maintained to work with subsequent changes to the API. 1. [MMW_API_watershed_demo.ipynb](https://github.com/WikiWatershed/model-my-watershed/blob/develop/doc/MMW_API_landproperties_demo.ipynb). Go [here](http://nbviewer.jupyter.org/github/WikiWatershed/model-my-watershed/blob/develop/doc/MMW_API_watershed_demo.ipynb) to view the functioning, interactive Folium map at the end of the notebook. diff --git a/src/mmw/apps/core/models.py b/src/mmw/apps/core/models.py index 6da97b5e5..b9d817335 100644 --- a/src/mmw/apps/core/models.py +++ b/src/mmw/apps/core/models.py @@ -5,6 +5,12 @@ AUTH_USER_MODEL = getattr(settings, 'AUTH_USER_MODEL', 'auth.User') +class JobStatus: + STARTED = 'started' + COMPLETE = 'complete' + FAILED = 'failed' + + class Job(models.Model): user = models.ForeignKey(AUTH_USER_MODEL, on_delete=models.SET_NULL, diff --git a/src/mmw/apps/core/tasks.py b/src/mmw/apps/core/tasks.py index 7532a1492..a6c17e872 100644 --- a/src/mmw/apps/core/tasks.py +++ b/src/mmw/apps/core/tasks.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from django.utils.timezone import now from celery import shared_task -from apps.core.models import Job +from apps.core.models import Job, JobStatus import json import logging @@ -29,7 +29,7 @@ def save_job_error(request, exc, traceback, job_id): job.error = exc job.traceback = traceback or 'No traceback' job.delivered_at = now() - job.status = 'failed' + job.status = JobStatus.FAILED job.save() except Exception as e: logger.error('Failed to save job error status. Job will appear hung.' @@ -47,5 +47,5 @@ def save_job_result(self, result, id, model_input): job.delivered_at = now() job.uuid = self.request.id job.model_input = model_input - job.status = 'complete' + job.status = JobStatus.COMPLETE job.save() diff --git a/src/mmw/apps/export/hydroshare.py b/src/mmw/apps/export/hydroshare.py index b6fa9708f..7fbac1241 100644 --- a/src/mmw/apps/export/hydroshare.py +++ b/src/mmw/apps/export/hydroshare.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import json -from io import StringIO +from io import BytesIO from zipfile import ZipFile from rauth import OAuth2Service from urllib.parse import urljoin, urlparse @@ -87,7 +87,7 @@ def add_files(self, resource_id, files): {'name': 'String', 'contents': 'String'} """ zippath = resource_id + '.zip' - stream = StringIO() + stream = BytesIO() # Zip all given files into the stream with ZipFile(stream, 'w') as zf: @@ -124,7 +124,7 @@ def get_project_snapshot(self, resource_id): snapshot_path = 'mmw_project_snapshot.json' try: stream = self.getResourceFile(resource_id, snapshot_path) - fio = StringIO() + fio = BytesIO() for chunk in stream: fio.write(chunk) diff --git a/src/mmw/apps/export/tasks.py b/src/mmw/apps/export/tasks.py index edf13224e..017adc004 100644 --- a/src/mmw/apps/export/tasks.py +++ b/src/mmw/apps/export/tasks.py @@ -89,7 +89,7 @@ def create_resource(user_id, project_id, params): for ext in SHAPEFILE_EXTENSIONS: filename = f'/tmp/{resource}.{ext}' - with open(filename) as shapefile: + with open(filename, 'rb') as shapefile: hs.addResourceFile(resource, shapefile, f'area-of-interest.{ext}') os.remove(filename) diff --git a/src/mmw/apps/geoprocessing_api/exceptions.py b/src/mmw/apps/geoprocessing_api/exceptions.py new file mode 100644 index 000000000..f68b05faa --- /dev/null +++ b/src/mmw/apps/geoprocessing_api/exceptions.py @@ -0,0 +1,14 @@ +from rest_framework import status +from rest_framework.exceptions import APIException + + +class JobNotReadyError(APIException): + status_code = status.HTTP_428_PRECONDITION_REQUIRED + default_code = 'precondition_required' + default_detail = 'The prepare job has not finished yet.' + + +class JobFailedError(APIException): + status_code = status.HTTP_412_PRECONDITION_FAILED + default_code = 'precondition_failed' + default_detail = 'The prepare job has failed.' diff --git a/src/mmw/apps/geoprocessing_api/schemas.py b/src/mmw/apps/geoprocessing_api/schemas.py index 993e9eef8..9396520e2 100644 --- a/src/mmw/apps/geoprocessing_api/schemas.py +++ b/src/mmw/apps/geoprocessing_api/schemas.py @@ -8,6 +8,8 @@ from django.conf import settings +from apps.core.models import JobStatus + STREAM_DATASOURCE = Parameter( 'datasource', IN_PATH, @@ -125,7 +127,7 @@ properties={ 'job': Schema(type=TYPE_STRING, format=FORMAT_UUID, example='6e514e69-f46b-47e7-9476-c1f5be0bac01'), - 'status': Schema(type=TYPE_STRING, example='started'), + 'status': Schema(type=TYPE_STRING, example=JobStatus.STARTED), } ) @@ -135,7 +137,7 @@ properties={ 'job_uuid': Schema(type=TYPE_STRING, format=FORMAT_UUID, example='6e514e69-f46b-47e7-9476-c1f5be0bac01'), - 'status': Schema(type=TYPE_STRING, example='started'), + 'status': Schema(type=TYPE_STRING, example=JobStatus.STARTED), 'result': Schema(type=TYPE_OBJECT), 'error': Schema(type=TYPE_STRING), 'started': Schema(type=TYPE_STRING, format=FORMAT_DATETIME, @@ -178,3 +180,79 @@ }, required=['location'], ) + +nlcd_override_allowed_values = '", "'.join([ + 'nlcd-2019-30m-epsg5070-512-byte', + 'nlcd-2016-30m-epsg5070-512-byte', + 'nlcd-2011-30m-epsg5070-512-byte', + 'nlcd-2006-30m-epsg5070-512-byte', + 'nlcd-2001-30m-epsg5070-512-byte', + 'nlcd-2011-30m-epsg5070-512-int8', +]) +LAYER_OVERRIDES = Schema( + title='Layer Overrides', + type=TYPE_OBJECT, + description='MMW combines different datasets in model runs. These have ' + 'default values, but can be overridden by specifying them ' + 'here. Only specify a value for the layers you want to ' + 'override.', + properties={ + '__LAND__': Schema( + type=TYPE_STRING, + example='nlcd-2019-30m-epsg5070-512-byte', + description='The NLCD layer to use. Valid options are: ' + f'"{nlcd_override_allowed_values}". All "-byte" ' + 'layers are from the NLCD19 product. The "-int8" ' + 'layer is from the NLCD11 product. The default value ' + 'is NLCD19 2019 "nlcd-2019-30m-epsg5070-512-byte".', + ), + '__STREAMS__': Schema( + type=TYPE_STRING, + example='nhdhr', + description='The streams layer to use. Valid options are: ' + '"nhdhr" for NHD High Resolution Streams, "nhd" for ' + 'NHD Medium Resolution Streams, and "drb" for ' + 'Delaware High Resolution. The area of interest must ' + 'be completely within the Delaware River Basin for ' + '"drb". "nhdhr" and "nhd" can be used within the ' + 'Continental United States. In some cases, "nhdhr" ' + 'may timeout. In such cases, "nhd" can be used as a ' + 'fallback. "nhdhr" is the default.' + ) + }, +) + +MODELING_REQUEST = Schema( + title='Modeling Request', + type=TYPE_OBJECT, + properties={ + 'area_of_interest': MULTIPOLYGON, + 'wkaoi': Schema( + title='Well-Known Area of Interest', + type=TYPE_STRING, + example='huc12__55174', + description='The table and ID for a well-known area of interest, ' + 'such as a HUC. ' + 'Format "table__id", eg. "huc12__55174" will analyze ' + 'the HUC-12 City of Philadelphia-Schuylkill River.', + ), + 'layer_overrides': LAYER_OVERRIDES, + }, +) + +GWLFE_REQUEST = Schema( + title='GWLF-E Request', + type=TYPE_OBJECT, + properties={ + 'input': Schema( + type=TYPE_OBJECT, + description='The result of modeling/gwlf-e/prepare/', + ), + 'job_uuid': Schema( + type=TYPE_STRING, + format=FORMAT_UUID, + example='6e514e69-f46b-47e7-9476-c1f5be0bac01', + description='The job uuid of modeling/gwlf-e/prepare/', + ), + }, +) diff --git a/src/mmw/apps/geoprocessing_api/urls.py b/src/mmw/apps/geoprocessing_api/urls.py index 1fc7598f2..0a41c30be 100644 --- a/src/mmw/apps/geoprocessing_api/urls.py +++ b/src/mmw/apps/geoprocessing_api/urls.py @@ -36,5 +36,9 @@ re_path(r'jobs/' + uuid_regex, get_job, name='get_job'), re_path(r'modeling/worksheet/$', views.start_modeling_worksheet, name='start_modeling_worksheet'), + re_path(r'modeling/gwlf-e/prepare/$', views.start_modeling_gwlfe_prepare, + name='start_modeling_gwlfe_prepare'), + re_path(r'modeling/gwlf-e/run/$', views.start_modeling_gwlfe_run, + name='start_modeling_gwlfe_run'), re_path(r'watershed/$', views.start_rwd, name='start_rwd'), ] diff --git a/src/mmw/apps/geoprocessing_api/views.py b/src/mmw/apps/geoprocessing_api/views.py index c1c5d6ff0..aaeadf532 100644 --- a/src/mmw/apps/geoprocessing_api/views.py +++ b/src/mmw/apps/geoprocessing_api/views.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +import json + from celery import chain from rest_framework.response import Response @@ -14,17 +16,24 @@ from django.utils.timezone import now from django.urls import reverse from django.contrib.gis.geos import GEOSGeometry +from django.shortcuts import get_object_or_404 -from apps.core.models import Job +from apps.core.models import Job, JobStatus from apps.core.tasks import (save_job_error, save_job_result) from apps.core.decorators import log_request from apps.modeling import geoprocessing +from apps.modeling.calcs import apply_gwlfe_modifications +from apps.modeling.tasks import run_gwlfe from apps.modeling.mapshed.calcs import streams -from apps.modeling.mapshed.tasks import nlcd_streams +from apps.modeling.mapshed.tasks import (collect_data, + convert_data, + multi_mapshed, + nlcd_streams) from apps.modeling.serializers import AoiSerializer +from apps.modeling.views import _parse_input as _parse_modeling_input -from apps.geoprocessing_api import schemas, tasks +from apps.geoprocessing_api import exceptions, schemas, tasks from apps.geoprocessing_api.permissions import AuthTokenSerializerAuthentication # noqa from apps.geoprocessing_api.throttling import (BurstRateThrottle, SustainedRateThrottle) @@ -217,7 +226,7 @@ def start_rwd(request, format=None): validate_rwd(location, data_source, snapping, simplify) job = Job.objects.create(created_at=created, result='', error='', - traceback='', user=user, status='started') + traceback='', user=user, status=JobStatus.STARTED) task_list = _initiate_rwd_job_chain(location, snapping, simplify, data_source, job.id) @@ -228,7 +237,7 @@ def start_rwd(request, format=None): return Response( { 'job': task_list.id, - 'status': 'started', + 'status': JobStatus.STARTED, }, headers={'Location': reverse('geoprocessing_api:get_job', args=[task_list.id])} @@ -1365,6 +1374,103 @@ def start_modeling_worksheet(request, format=None): ], area_of_interest, user) +@swagger_auto_schema(method='post', + request_body=schemas.MODELING_REQUEST, + responses={200: schemas.JOB_STARTED_RESPONSE}) +@decorators.api_view(['POST']) +@decorators.authentication_classes((SessionAuthentication, + TokenAuthentication, )) +@decorators.permission_classes((IsAuthenticated, )) +@decorators.throttle_classes([BurstRateThrottle, SustainedRateThrottle]) +@log_request +def start_modeling_gwlfe_prepare(request, format=None): + """ + Starts a job to prepare an input payload for GWLF-E for a given area. + + Given an area of interest or a WKAoI id, gathers data from land, soil type, + groundwater nitrogen, available water capacity, K-factor, slope, soil + nitrogen, soil phosphorus, base-flow index, and stream datasets. + + By default, NLCD 2019 and NHD High Resolution Streams are used to prepare + the input payload. This can be changed using the `layer_overrides` option. + + Only one of `area_of_interest` or `wkaoi` should be provided. If both are + given, the `area_of_interest` will be used. + + The `result` should be used with the gwlf-e/run endpoint, by sending at as + the `input`. Alternatively, the `job` UUID can be used as well by sending + it as the `job_uuid`. + """ + user = request.user if request.user.is_authenticated else None + area_of_interest, wkaoi = _parse_modeling_input(request.data) + + layer_overrides = request.data.get('layer_overrides', {}) + + return start_celery_job([ + multi_mapshed(area_of_interest, wkaoi, layer_overrides), + convert_data.s(wkaoi), + collect_data.s(area_of_interest, layer_overrides=layer_overrides), + ], area_of_interest, user) + + +@swagger_auto_schema(method='post', + request_body=schemas.GWLFE_REQUEST, + responses={200: schemas.JOB_STARTED_RESPONSE}) +@decorators.api_view(['POST']) +@decorators.authentication_classes((SessionAuthentication, + TokenAuthentication, )) +@decorators.permission_classes((IsAuthenticated, )) +@decorators.throttle_classes([BurstRateThrottle, SustainedRateThrottle]) +@log_request +def start_modeling_gwlfe_run(request, format=None): + """ + Starts a job to GWLF-E for a given prepared input. + + Given an `input` JSON of the gwlf-e/prepare endpoint's `result`, or a + `job_uuid` of a gwlf-e/prepare job, runs GWLF-E and returns a JSON + dictionary containing mean flow: total and per-second; sediment, nitrogen, + and phosphorous loads for various sources; summarized loads; and monthly + values for average precipitation, evapotranspiration, groundwater, runoff, + stream flow, point source flow, and tile drain. + + If the specified `job_uuid` is not ready or has failed, returns an error. + + For more details on the GWLF-E package, please see: [https://github.com/WikiWatershed/gwlf-e](https://github.com/WikiWatershed/gwlf-e) # NOQA + """ + user = request.user if request.user.is_authenticated else None + model_input = request.data.get('input') + + if not model_input: + job_uuid = request.data.get('job_uuid') + + if not job_uuid: + raise ValidationError( + 'At least one of `input` or `job_uuid` must be specified') + + input_job = get_object_or_404(Job, uuid=job_uuid) + if input_job.status == JobStatus.STARTED: + raise exceptions.JobNotReadyError( + f'The prepare job {job_uuid} has not finished yet.') + + if input_job.status == JobStatus.FAILED: + raise exceptions.JobFailedError( + f'The prepare job {job_uuid} has failed.') + + model_input = json.loads(input_job.result) + + # TODO #3484 Validate model_input + + # TODO #3485 Implement modifications, hash + mods = [] + hash = '' + + modified_model_input = apply_gwlfe_modifications(model_input, mods) + + return start_celery_job([ + run_gwlfe.s(modified_model_input, hash) + ], model_input, user) + + def _initiate_rwd_job_chain(location, snapping, simplify, data_source, job_id, testing=False): errback = save_job_error.s(job_id) @@ -1384,11 +1490,11 @@ def start_celery_job(task_list, job_input, user=None, link_error=True): :param job_input: Input to the first task, used in recording started jobs :param user: The user requesting the job. Optional. :param link_error: Whether or not to apply error handler to entire chain - :return: A Response contianing the job id, marked as 'started' + :return: A Response contianing the job id, marked as JobStatus.STARTED """ created = now() job = Job.objects.create(created_at=created, result='', error='', - traceback='', user=user, status='started', + traceback='', user=user, status=JobStatus.STARTED, model_input=job_input) success = save_job_result.s(job.id, job_input) @@ -1406,7 +1512,7 @@ def start_celery_job(task_list, job_input, user=None, link_error=True): return Response( { 'job': task_chain.id, - 'status': 'started', + 'status': JobStatus.STARTED, }, headers={'Location': reverse('geoprocessing_api:get_job', args=[task_chain.id])} diff --git a/src/mmw/apps/home/views.py b/src/mmw/apps/home/views.py index ac4106ae3..53cc0e6c8 100644 --- a/src/mmw/apps/home/views.py +++ b/src/mmw/apps/home/views.py @@ -185,7 +185,7 @@ def project_via_hydroshare_edit(request, resource): """ # Only logged in users are allowed to edit - if request.user.is_anonymous(): + if request.user.is_anonymous: return redirect('/error/hydroshare-not-logged-in') def callback(project_id): diff --git a/src/mmw/apps/modeling/calcs.py b/src/mmw/apps/modeling/calcs.py index 4960bce1b..c049b8762 100644 --- a/src/mmw/apps/modeling/calcs.py +++ b/src/mmw/apps/modeling/calcs.py @@ -62,13 +62,13 @@ def err(msg, line=None): try: begyear = datetime.strptime(rows[1][0], DATE_FORMAT).year except ValueError as ve: - err(ve.message, 2) + err(ve, 2) return None, errs try: endyear = datetime.strptime(rows[-1][0], DATE_FORMAT).year except ValueError as ve: - err(ve.message, len(rows)) + err(ve, len(rows)) return None, errs year_range = endyear - begyear + 1 @@ -126,7 +126,7 @@ def err(msg, line=None): except Exception as e: # Record error with line. idx + 2 because idx starts at 0 while # line numbers start at 1, and we need to account for the header. - err(e.message, idx + 2) + err(e, idx + 2) previous_d = d diff --git a/src/mmw/apps/modeling/tests.py b/src/mmw/apps/modeling/tests.py index 3c4e99c3b..d755d225b 100644 --- a/src/mmw/apps/modeling/tests.py +++ b/src/mmw/apps/modeling/tests.py @@ -11,7 +11,7 @@ from django.test.utils import override_settings from django.utils.timezone import now -from apps.core.models import Job +from apps.core.models import Job, JobStatus from apps.modeling import tasks, views from apps.modeling.models import Scenario, WeatherType @@ -232,7 +232,7 @@ def test_tr55_job_runs_in_chain(self): 'Job not found') self.assertEqual(str(found_job.status), - 'complete', + JobStatus.COMPLETE, 'Job found but incomplete.') @override_settings(**CELERY_TEST_OVERRIDES) diff --git a/src/mmw/apps/user/templates/user/hydroshare-auth.html b/src/mmw/apps/user/templates/user/hydroshare-auth.html index 85f3d9aa3..eadefbbf7 100644 --- a/src/mmw/apps/user/templates/user/hydroshare-auth.html +++ b/src/mmw/apps/user/templates/user/hydroshare-auth.html @@ -40,11 +40,10 @@