From 430519f393b696fc43e8f1da0cb24d1d36a2b647 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Fri, 9 Feb 2024 18:33:21 -0600 Subject: [PATCH 01/37] Add Celery and a test route --- panoptes_aggregation/routes.py | 20 ++++++++++++ panoptes_aggregation/tasks.py | 12 +++++++ .../tests/tasks/test_tasks.py | 32 +++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 panoptes_aggregation/tasks.py create mode 100644 panoptes_aggregation/tests/tasks/test_tasks.py diff --git a/panoptes_aggregation/routes.py b/panoptes_aggregation/routes.py index 857e99ce..3e1d9557 100644 --- a/panoptes_aggregation/routes.py +++ b/panoptes_aggregation/routes.py @@ -15,8 +15,10 @@ from panoptes_aggregation import reducers from panoptes_aggregation import extractors from panoptes_aggregation import running_reducers +from panoptes_aggregation.tasks import create_task from panoptes_aggregation import __version__ import numpy as np +from celery.result import AsyncResult # see https://stackoverflow.com/a/75666126 @@ -115,6 +117,24 @@ def index(): for route, route_function in panoptes.panoptes.items(): application.route('/panoptes/{0}'.format(route), methods=['POST', 'PUT'])(lambda: route_function(request.args.to_dict(), request.get_json())) + + @application.route('/tasks', methods=['POST']) + def run_task(): + content = request.json + task_type = content["type"] + task = create_task.delay(int(task_type)) + return jsonify({"task_id": task.id}), 202 + + @application.route("/tasks/", methods=["GET"]) + def get_status(task_id): + task_result = AsyncResult(task_id) + result = { + "task_id": task_id, + "task_status": task_result.status, + "task_result": task_result.result + } + return jsonify(result), 200 + @application.route('/docs') def web_docs(): return application.send_static_file('index.html') diff --git a/panoptes_aggregation/tasks.py b/panoptes_aggregation/tasks.py new file mode 100644 index 00000000..753d96a5 --- /dev/null +++ b/panoptes_aggregation/tasks.py @@ -0,0 +1,12 @@ +from celery import Celery +import os +import time + +celery = Celery(__name__) +celery.conf.broker_url = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379") +celery.conf.result_backend = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379") + +@celery.task(name="create_task") +def create_task(task_type): + time.sleep(int(task_type)) + return True diff --git a/panoptes_aggregation/tests/tasks/test_tasks.py b/panoptes_aggregation/tests/tasks/test_tasks.py new file mode 100644 index 00000000..ed3b03f5 --- /dev/null +++ b/panoptes_aggregation/tests/tasks/test_tasks.py @@ -0,0 +1,32 @@ +try: + import panoptes_aggregation.routes as routes + OFFLINE = False +except ImportError: + OFFLINE = True +import unittest +import json + +@unittest.skipIf(OFFLINE, 'Installed in offline mode') +class TasksTest(unittest.TestCase): + def test_task_status(application): + application = routes.make_application() + client = application.test_client() + resp = client.post( + "/tasks", + data=json.dumps({"type": 1}), + content_type='application/json' + ) + content = json.loads(resp.data.decode()) + task_id = content["task_id"] + assert resp.status_code == 202 + assert task_id + + resp = client.get(f"tasks/{task_id}") + content = json.loads(resp.data.decode()) + assert content == {"task_id": task_id, "task_status": "PENDING", "task_result": None} + assert resp.status_code == 200 + + while content["task_status"] == "PENDING": + resp = client.get(f"tasks/{task_id}") + content = json.loads(resp.data.decode()) + assert content == {"task_id": task_id, "task_status": "SUCCESS", "task_result": True} From 25b771136a6fc605a499d42fe454bd039b66d6d8 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Fri, 9 Feb 2024 18:33:34 -0600 Subject: [PATCH 02/37] Add new dependencies --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f37e5f19..a5be1a7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,9 @@ dependencies = [ [project.optional-dependencies] online = [ + "celery>=5.3,<5.4", + "redis>=5,<6", + "flower>2,<3", "flask>=2.3,<3.1", "flask-cors>=3.0,<4.1", "panoptes-client>=1.6,<1.7", @@ -83,7 +86,7 @@ include = [ exclude = [ "docs/", "kubernetes", - "make_docs.sh" + "scripts/make_docs.sh" ] [tool.coverage.run] From 3e0e23c75162898e368d2177a49e5d7a04dc4b70 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Fri, 9 Feb 2024 18:33:58 -0600 Subject: [PATCH 03/37] Test task tests --- .../tests/router_tests/test_routes.py | 11 ++++++++++- panoptes_aggregation/tests/tasks/__init__.py | 0 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 panoptes_aggregation/tests/tasks/__init__.py diff --git a/panoptes_aggregation/tests/router_tests/test_routes.py b/panoptes_aggregation/tests/router_tests/test_routes.py index 0e3ff633..882cfebb 100644 --- a/panoptes_aggregation/tests/router_tests/test_routes.py +++ b/panoptes_aggregation/tests/router_tests/test_routes.py @@ -4,7 +4,7 @@ except ImportError: OFFLINE = True import unittest -from unittest.mock import patch, MagicMock +from unittest.mock import patch, MagicMock, call import numpy as np import os import panoptes_aggregation @@ -68,6 +68,15 @@ def test_one_running_reducer_route(self): running_reducer_name ) + @patch("panoptes_aggregation.tasks.create_task.run") + def test_mock_task(self, mock_run): + '''Test that the bg task gets called''' + assert panoptes_aggregation.tasks.create_task.run(1) + with self.application.test_client() as client: + response = client.post('/tasks', json={'type': 10}) + self.assertEqual(response.status_code, 202) + panoptes_aggregation.tasks.create_task.run.assert_called_once_with(1) + @unittest.skipIf("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", "Skipping this test on Travis CI.") def test_docs_route(self): '''Test docs route works''' diff --git a/panoptes_aggregation/tests/tasks/__init__.py b/panoptes_aggregation/tests/tasks/__init__.py new file mode 100644 index 00000000..e69de29b From 1c3e2fc395c3e221b53880eb15ac55e5b6411284 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Fri, 9 Feb 2024 18:34:09 -0600 Subject: [PATCH 04/37] Docker updates --- Dockerfile | 6 ++++-- docker-compose.yml | 49 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5740f767..587ac9e6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,10 +21,12 @@ COPY . . RUN pip install -U .[online,test,doc] # make documentation -RUN /bin/bash -lc ./make_docs.sh +RUN /bin/bash -lc ./scripts/make_docs.sh + +ADD ./ /usr/src/aggregation ARG REVISION='' ENV REVISION=$REVISION # load configs and start flask app -CMD ["bash", "./start-flask.sh"] +CMD ["bash", "./scripts/start-flask.sh"] diff --git a/docker-compose.yml b/docker-compose.yml index aaf8ae2b..6300e8b8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,7 @@ services: args: REVISION: fake-git-sha-id volumes: - - ./panoptes_aggregation:/usr/src/aggregation/panoptes_aggregation + - ./:/usr/src/aggregation - ~/.aws:/root/.aws environment: - AWS_REGION=${AWS_REGION} @@ -14,6 +14,49 @@ services: - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN} - AWS_SECURITY_TOKEN=${AWS_SECURITY_TOKEN} - - LISTEN_PORT=5000 + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + - FLASK_DEBUG=1 + - FLASK_ENV=development + - LISTEN_PORT=4000 ports: - - "5000:5000" + - "4000:4000" + depends_on: + - redis + + worker: + build: + context: ./ + args: + REVISION: fake-git-sha-id + command: celery --app panoptes_aggregation.tasks.celery worker --loglevel=info + volumes: + - ./:/usr/src/aggregation + environment: + - FLASK_DEBUG=1 + - APP_SETTINGS=project.server.config.DevelopmentConfig + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + - FLASK_ENV=development + depends_on: + - aggregation + - redis + + dashboard: + build: . + command: celery --app panoptes_aggregation.tasks.celery flower --port=5555 --broker=redis://redis:6379/0 + ports: + - 5556:5555 + environment: + - FLASK_DEBUG=1 + - APP_SETTINGS=project.server.config.DevelopmentConfig + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + depends_on: + - aggregation + - redis + - worker + + redis: + image: redis + command: redis-server --appendonly yes \ No newline at end of file From 4af81040d667d3e7362d107edfa5cc95536f2d9e Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Fri, 9 Feb 2024 18:34:24 -0600 Subject: [PATCH 05/37] Scripts folder --- make_docs.sh => scripts/make_docs.sh | 0 scripts/start-celery.sh | 3 +++ start-flask.sh => scripts/start-flask.sh | 0 scripts/start-flower.sh | 4 ++++ 4 files changed, 7 insertions(+) rename make_docs.sh => scripts/make_docs.sh (100%) create mode 100755 scripts/start-celery.sh rename start-flask.sh => scripts/start-flask.sh (100%) create mode 100755 scripts/start-flower.sh diff --git a/make_docs.sh b/scripts/make_docs.sh similarity index 100% rename from make_docs.sh rename to scripts/make_docs.sh diff --git a/scripts/start-celery.sh b/scripts/start-celery.sh new file mode 100755 index 00000000..082f6d72 --- /dev/null +++ b/scripts/start-celery.sh @@ -0,0 +1,3 @@ +#!/bin/bash -e + +exec celery --app panoptes_aggregation.tasks.celery worker --loglevel=info diff --git a/start-flask.sh b/scripts/start-flask.sh similarity index 100% rename from start-flask.sh rename to scripts/start-flask.sh diff --git a/scripts/start-flower.sh b/scripts/start-flower.sh new file mode 100755 index 00000000..68789359 --- /dev/null +++ b/scripts/start-flower.sh @@ -0,0 +1,4 @@ +#!/bin/bash -e + +BROKER=${CELERY_BROKER_URL:='redis://redis:6379/0'} +exec celery --app panoptes_aggregation.tasks.celery flower --port=5555 --broker=$BROKER From 07ae2fada0e3ad2dbacb88b711330265bf5fcc75 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Fri, 9 Feb 2024 18:34:49 -0600 Subject: [PATCH 06/37] Setup deploy to test env --- .github/workflows/deploy_batch.yml | 44 ++++ kubernetes/deployment-batchagg.tmpl | 336 ++++++++++++++++++++++++++++ 2 files changed, 380 insertions(+) create mode 100644 .github/workflows/deploy_batch.yml create mode 100644 kubernetes/deployment-batchagg.tmpl diff --git a/.github/workflows/deploy_batch.yml b/.github/workflows/deploy_batch.yml new file mode 100644 index 00000000..ae2e7baa --- /dev/null +++ b/.github/workflows/deploy_batch.yml @@ -0,0 +1,44 @@ +name: Deploy to Batchagg + +on: + push: + tags: + - batch-aggregation + workflow_dispatch: + +jobs: + build_and_push_image: + name: Build and Push Image + uses: zooniverse/ci-cd/.github/workflows/build_and_push_image.yaml@main + with: + repo_name: aggregation-for-caesar + commit_id: ${{ github.sha }} + latest: false + build_args: "REVISION=${{ github.sha }}" + + deploy_batchagg: + name: Deploy to Batchagg + uses: zooniverse/ci-cd/.github/workflows/deploy_app.yaml@main + needs: build_and_push_image + with: + app_name: aggregation-caesar + repo_name: aggregation-for-caesar + commit_id: ${{ github.sha }} + environment: batchagg + deploy_check: false + secrets: + creds: ${{ secrets.AZURE_AKS }} + + slack_notification: + name: Slack notification + uses: zooniverse/ci-cd/.github/workflows/slack_notification.yaml@main + needs: deploy_batchagg + if: always() + with: + commit_id: ${{ github.sha }} + job_name: Deploy to Batchagg / deploy_app + status: ${{ needs.deploy_production.result }} + title: "Aggregation Batchagg deploy complete" + title_link: "https://batchagg-aggregation-caesar.zooniverse.org" + secrets: + slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} diff --git a/kubernetes/deployment-batchagg.tmpl b/kubernetes/deployment-batchagg.tmpl new file mode 100644 index 00000000..84a46ee1 --- /dev/null +++ b/kubernetes/deployment-batchagg.tmpl @@ -0,0 +1,336 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aggregation-caesar-batchagg + labels: + app: aggregation-caesar-batchagg +spec: + selector: + matchLabels: + app: aggregation-caesar-batchagg + template: + metadata: + labels: + app: aggregation-caesar-batchagg + spec: + containers: + - name: aggregation-caesar-batchagg-app + image: ghcr.io/zooniverse/aggregation-for-caesar:__IMAGE_TAG__ + ports: + - containerPort: 80 + resources: + requests: + memory: "500Mi" + cpu: "500m" + limits: + memory: "1000Mi" + cpu: "1000m" + startupProbe: + httpGet: + path: / + port: 80 + # wait 6 * 10 seconds(default periodSeconds) for the container to start + # after this succeeds once the liveness probe takes over + failureThreshold: 6 + livenessProbe: + httpGet: + path: / + port: 80 + # allow a longer response time than 1s + timeoutSeconds: 10 + readinessProbe: + httpGet: + path: / + port: 80 + # start checking for readiness after 20s (to serve traffic) + initialDelaySeconds: 20 + # allow a longer response time than 1s + timeoutSeconds: 10 + env: + - name: FLASK_ENV + value: production + - name: PANOPTES_URL + value: https://panoptes.zooniverse.org/ + - name: PANOPTES_CLIENT_ID + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: PANOPTES_CLIENT_ID + - name: PANOPTES_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: PANOPTES_CLIENT_SECRET + - name: MAST_AUTH_TOKEN + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: MAST_AUTH_TOKEN + - name: MAST_PROD_TOKEN + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: MAST_PROD_TOKEN + - name: SENTRY_DSN + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: SENTRY_DSN + - name: NEW_RELIC_LICENSE_KEY + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: NEW_RELIC_LICENSE_KEY + - name: NEW_RELIC_APP_NAME + value: 'Aggregation Caesar' +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aggregation-caesar-batchagg-worker + labels: + app: aggregation-caesar-batchagg-worker +spec: + selector: + matchLabels: + app: aggregation-caesar-batchagg-worker + template: + metadata: + labels: + app: aggregation-caesar-batchagg-worker + spec: + containers: + - name: aggregation-caesar-batchagg-worker + image: ghcr.io/zooniverse/aggregation-for-caesar:__IMAGE_TAG__ + ports: + - containerPort: 80 + resources: + requests: + memory: "500Mi" + cpu: "500m" + limits: + memory: "1000Mi" + cpu: "1000m" + livenessProbe: + initialDelaySeconds: 45 + periodSeconds: 60 + timeoutSeconds: + exec: + command: + - "/bin/bash" + - "-c" + - "celery -A panoptes_aggregation.tasks status | grep -o ': OK'" + args: ["/usr/src/aggregation/scripts/start-celery.sh"] + env: + - name: FLASK_ENV + value: production + - name: PANOPTES_URL + value: https://panoptes.zooniverse.org/ + - name: PANOPTES_CLIENT_ID + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: PANOPTES_CLIENT_ID + - name: PANOPTES_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: PANOPTES_CLIENT_SECRET + - name: MAST_AUTH_TOKEN + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: MAST_AUTH_TOKEN + - name: MAST_PROD_TOKEN + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: MAST_PROD_TOKEN + - name: SENTRY_DSN + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: SENTRY_DSN + - name: NEW_RELIC_LICENSE_KEY + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: NEW_RELIC_LICENSE_KEY + - name: NEW_RELIC_APP_NAME + value: 'Aggregation Caesar' +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: aggregation-caesar-batchagg-redis +spec: + accessModes: + - ReadWriteOnce + storageClassName: azurefile + resources: + requests: + storage: 5Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aggregation-caesar-batchagg-redis + labels: + app: aggregation-caesar-batchagg-redis +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: aggregation-caesar-batchagg-redis + template: + metadata: + labels: + app: aggregation-caesar-batchagg-redis + spec: + containers: + - name: aggregation-caesar-batchagg-redis + image: redis:6 + resources: + requests: + memory: "1500Mi" + cpu: "200m" + limits: + memory: "1500Mi" + cpu: "1500m" + volumeMounts: + - name: aggregation-caesar-batchagg-redis-data + mountPath: "/data" + volumes: + - name: aggregation-caesar-batchagg-redis-data + persistentVolumeClaim: + claimName: aggregation-caesar-batchagg-redis +--- +apiVersion: v1 +kind: Service +metadata: + name: aggregation-caesar-batchagg-redis +spec: + selector: + app: aggregation-caesar-batchagg-redis + ports: + - protocol: TCP + port: 6379 + targetPort: 6379 + type: NodePort +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: aggregation-caesar-batchagg-ingress + annotations: + kubernetes.io/ingress.class: nginx + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/proxy-buffer-size: "128k" + nginx.ingress.kubernetes.io/proxy-body-size: 20m + nginx.ingress.kubernetes.io/set-real-ip-from: "10.0.0.0/8" +spec: + tls: + - hosts: + - batchagg-aggregation.zooniverse.org + secretName: aggregation-caesar-batchagg-tls-secret + rules: + - host: batchagg-aggregation.zooniverse.org + http: + paths: + - pathType: Prefix + path: / + backend: + service: + name: aggregation-caesar-batchagg-app + port: + number: 80 +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: batchagg-aggregation-tls +spec: + issuerRef: + name: letsencrypt-prod + kind: ClusterIssuer + secretName: batchagg-aggregation-tls + dnsNames: + - batchagg-aggregation.zooniverse.org +--- +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aggregation-caesar-batchagg-flower + labels: + app: aggregation-caesar-batchagg-flower +spec: + selector: + matchLabels: + app: aggregation-caesar-batchagg-flower + template: + metadata: + labels: + app: aggregation-caesar-batchagg-flower + spec: + containers: + - name: aggregation-caesar-batchagg-flower + image: ghcr.io/zooniverse/aggregation-for-caesar:__IMAGE_TAG__ + ports: + - containerPort: 80 + resources: + requests: + memory: "500Mi" + cpu: "500m" + limits: + memory: "1000Mi" + cpu: "1000m" + livenessProbe: + initialDelaySeconds: 45 + periodSeconds: 60 + timeoutSeconds: 20 + exec: + command: + - "/bin/bash" + - "-c" + - "celery -A panoptes_aggregation.tasks status | grep -o ': OK'" + args: ["/usr/src/aggregation/scripts/start-flower.sh"] + env: + - name: FLASK_ENV + value: production + - name: PANOPTES_URL + value: https://panoptes.zooniverse.org/ + - name: PANOPTES_CLIENT_ID + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: PANOPTES_CLIENT_ID + - name: PANOPTES_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: PANOPTES_CLIENT_SECRET + - name: MAST_AUTH_TOKEN + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: MAST_AUTH_TOKEN + - name: MAST_PROD_TOKEN + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: MAST_PROD_TOKEN + - name: SENTRY_DSN + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: SENTRY_DSN + - name: NEW_RELIC_LICENSE_KEY + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: NEW_RELIC_LICENSE_KEY + - name: NEW_RELIC_APP_NAME + value: 'Aggregation Caesar' From 2e1f84158f55f9f26e5046590b3bdda3be2ba9ab Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 13 Feb 2024 16:29:06 -0600 Subject: [PATCH 07/37] Link redis container via docker --- docker-compose.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 6300e8b8..b1c2acd5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,8 +21,8 @@ services: - LISTEN_PORT=4000 ports: - "4000:4000" - depends_on: - - redis + links: + - redis:redis worker: build: @@ -38,8 +38,9 @@ services: - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/0 - FLASK_ENV=development + links: + - redis:redis depends_on: - - aggregation - redis dashboard: @@ -52,8 +53,9 @@ services: - APP_SETTINGS=project.server.config.DevelopmentConfig - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/0 + links: + - redis:redis depends_on: - - aggregation - redis - worker From edc1957aa18dba546c98c81d1017397fc5f685bc Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 20 Feb 2024 15:23:30 -0600 Subject: [PATCH 08/37] Modify test task --- panoptes_aggregation/routes.py | 8 +-- panoptes_aggregation/tasks.py | 9 +-- .../tests/router_tests/test_routes.py | 10 +-- .../tests/tasks/test_tasks.py | 63 ++++++++++--------- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/panoptes_aggregation/routes.py b/panoptes_aggregation/routes.py index 3e1d9557..4a54b7fd 100644 --- a/panoptes_aggregation/routes.py +++ b/panoptes_aggregation/routes.py @@ -15,7 +15,7 @@ from panoptes_aggregation import reducers from panoptes_aggregation import extractors from panoptes_aggregation import running_reducers -from panoptes_aggregation.tasks import create_task +from panoptes_aggregation import tasks from panoptes_aggregation import __version__ import numpy as np from celery.result import AsyncResult @@ -117,12 +117,12 @@ def index(): for route, route_function in panoptes.panoptes.items(): application.route('/panoptes/{0}'.format(route), methods=['POST', 'PUT'])(lambda: route_function(request.args.to_dict(), request.get_json())) - @application.route('/tasks', methods=['POST']) def run_task(): content = request.json - task_type = content["type"] - task = create_task.delay(int(task_type)) + xx = content['x'] + yy = content['y'] + task = tasks.add.delay(xx, yy) return jsonify({"task_id": task.id}), 202 @application.route("/tasks/", methods=["GET"]) diff --git a/panoptes_aggregation/tasks.py b/panoptes_aggregation/tasks.py index 753d96a5..912e3a3f 100644 --- a/panoptes_aggregation/tasks.py +++ b/panoptes_aggregation/tasks.py @@ -6,7 +6,8 @@ celery.conf.broker_url = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379") celery.conf.result_backend = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379") -@celery.task(name="create_task") -def create_task(task_type): - time.sleep(int(task_type)) - return True + +@celery.task(name="add") +def add(x, y): + time.sleep(5) + return x+y diff --git a/panoptes_aggregation/tests/router_tests/test_routes.py b/panoptes_aggregation/tests/router_tests/test_routes.py index 882cfebb..654b1f23 100644 --- a/panoptes_aggregation/tests/router_tests/test_routes.py +++ b/panoptes_aggregation/tests/router_tests/test_routes.py @@ -4,7 +4,7 @@ except ImportError: OFFLINE = True import unittest -from unittest.mock import patch, MagicMock, call +from unittest.mock import patch, MagicMock import numpy as np import os import panoptes_aggregation @@ -68,14 +68,14 @@ def test_one_running_reducer_route(self): running_reducer_name ) - @patch("panoptes_aggregation.tasks.create_task.run") + @patch("panoptes_aggregation.tasks.add.run") def test_mock_task(self, mock_run): '''Test that the bg task gets called''' - assert panoptes_aggregation.tasks.create_task.run(1) + assert panoptes_aggregation.tasks.add.run(x=1, y=2) with self.application.test_client() as client: - response = client.post('/tasks', json={'type': 10}) + response = client.post('/tasks', json={'x': 1, 'y': 2}) self.assertEqual(response.status_code, 202) - panoptes_aggregation.tasks.create_task.run.assert_called_once_with(1) + panoptes_aggregation.tasks.add.run.assert_called_once_with(1) @unittest.skipIf("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", "Skipping this test on Travis CI.") def test_docs_route(self): diff --git a/panoptes_aggregation/tests/tasks/test_tasks.py b/panoptes_aggregation/tests/tasks/test_tasks.py index ed3b03f5..cbbfd9a0 100644 --- a/panoptes_aggregation/tests/tasks/test_tasks.py +++ b/panoptes_aggregation/tests/tasks/test_tasks.py @@ -1,32 +1,39 @@ -try: - import panoptes_aggregation.routes as routes - OFFLINE = False -except ImportError: - OFFLINE = True +# try: +# import panoptes_aggregation.routes as routes +# OFFLINE = False +# except ImportError: +# OFFLINE = True import unittest -import json +# import json -@unittest.skipIf(OFFLINE, 'Installed in offline mode') -class TasksTest(unittest.TestCase): - def test_task_status(application): - application = routes.make_application() - client = application.test_client() - resp = client.post( - "/tasks", - data=json.dumps({"type": 1}), - content_type='application/json' - ) - content = json.loads(resp.data.decode()) - task_id = content["task_id"] - assert resp.status_code == 202 - assert task_id - resp = client.get(f"tasks/{task_id}") - content = json.loads(resp.data.decode()) - assert content == {"task_id": task_id, "task_status": "PENDING", "task_result": None} - assert resp.status_code == 200 +from panoptes_aggregation.tasks import add +import unittest +class TestAddTask(unittest.TestCase): + def test_add_task(self): + assert add.run(x=3, y=5) == 8 + +# @unittest.skipIf(OFFLINE, 'Installed in offline mode') +# class TasksTest(unittest.TestCase): +# def test_task_status(celery_worker): +# application = routes.make_application() +# client = application.test_client() +# resp = client.post( +# "/tasks", +# data=json.dumps({"type": 1}), +# content_type='application/json' +# ) +# content = json.loads(resp.data.decode()) +# task_id = content["task_id"] +# assert resp.status_code == 202 +# assert task_id + +# resp = client.get(f"tasks/{task_id}") +# content = json.loads(resp.data.decode()) +# assert content == {"task_id": task_id, "task_status": "PENDING", "task_result": None} +# assert resp.status_code == 200 - while content["task_status"] == "PENDING": - resp = client.get(f"tasks/{task_id}") - content = json.loads(resp.data.decode()) - assert content == {"task_id": task_id, "task_status": "SUCCESS", "task_result": True} + # while content["task_status"] == "PENDING": + # resp = client.get(f"tasks/{task_id}") + # content = json.loads(resp.data.decode()) + # assert content == {"task_id": task_id, "task_status": "SUCCESS", "task_result": True} From 803d4edd1d7cbcdbfabedb7d3bd86ff1ec06e085 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 20 Feb 2024 15:23:55 -0600 Subject: [PATCH 09/37] Add redis service to test workflow --- .github/workflows/python-versions.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-versions.yml b/.github/workflows/python-versions.yml index 74a3890e..c44f2f6c 100644 --- a/.github/workflows/python-versions.yml +++ b/.github/workflows/python-versions.yml @@ -9,6 +9,9 @@ on: jobs: build: runs-on: ubuntu-latest + services: + redis: + image: redis strategy: matrix: python-version: [3.8, 3.9, "3.10", "3.11"] @@ -35,6 +38,6 @@ jobs: coverage report - name: Coveralls if: ${{ matrix.python-version == 3.10 }} - env: + env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: coveralls --service=github From 7290b8281376054cd1041be1eb9b83caa590dd02 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 20 Feb 2024 15:34:14 -0600 Subject: [PATCH 10/37] Hook up services --- .github/workflows/python-versions.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/python-versions.yml b/.github/workflows/python-versions.yml index c44f2f6c..f0a78b17 100644 --- a/.github/workflows/python-versions.yml +++ b/.github/workflows/python-versions.yml @@ -12,6 +12,13 @@ jobs: services: redis: image: redis + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 6379:6379 strategy: matrix: python-version: [3.8, 3.9, "3.10", "3.11"] @@ -33,6 +40,9 @@ jobs: - name: Run tests env: TRAVIS: true # one test is skipped on CI and looks for this env value + REDIS_HOST: redis + CELERY_BROKER_URL: redis://localhost:6379/0 + CELERY_RESULT_BACKEND: redis://localhost:6379/0 run: | coverage run coverage report From 8c593c344641a1a8d8d4d97c7529b4aaa2f82532 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 20 Feb 2024 15:53:51 -0600 Subject: [PATCH 11/37] Fix test arguments --- panoptes_aggregation/tests/router_tests/test_routes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/panoptes_aggregation/tests/router_tests/test_routes.py b/panoptes_aggregation/tests/router_tests/test_routes.py index 654b1f23..13fad701 100644 --- a/panoptes_aggregation/tests/router_tests/test_routes.py +++ b/panoptes_aggregation/tests/router_tests/test_routes.py @@ -71,11 +71,11 @@ def test_one_running_reducer_route(self): @patch("panoptes_aggregation.tasks.add.run") def test_mock_task(self, mock_run): '''Test that the bg task gets called''' - assert panoptes_aggregation.tasks.add.run(x=1, y=2) + assert panoptes_aggregation.tasks.add.run(1, 2) with self.application.test_client() as client: response = client.post('/tasks', json={'x': 1, 'y': 2}) self.assertEqual(response.status_code, 202) - panoptes_aggregation.tasks.add.run.assert_called_once_with(1) + panoptes_aggregation.tasks.add.run.assert_called_once_with(1, 2) @unittest.skipIf("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", "Skipping this test on Travis CI.") def test_docs_route(self): From 80e4edd6a21f595940a2f8d6443febc3c309c105 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 20 Feb 2024 16:30:33 -0600 Subject: [PATCH 12/37] flake8 --- panoptes_aggregation/tasks.py | 2 +- .../tests/tasks/test_tasks.py | 35 +------------------ 2 files changed, 2 insertions(+), 35 deletions(-) diff --git a/panoptes_aggregation/tasks.py b/panoptes_aggregation/tasks.py index 912e3a3f..0dc4d8fc 100644 --- a/panoptes_aggregation/tasks.py +++ b/panoptes_aggregation/tasks.py @@ -10,4 +10,4 @@ @celery.task(name="add") def add(x, y): time.sleep(5) - return x+y + return x + y diff --git a/panoptes_aggregation/tests/tasks/test_tasks.py b/panoptes_aggregation/tests/tasks/test_tasks.py index cbbfd9a0..7dedd72e 100644 --- a/panoptes_aggregation/tests/tasks/test_tasks.py +++ b/panoptes_aggregation/tests/tasks/test_tasks.py @@ -1,39 +1,6 @@ -# try: -# import panoptes_aggregation.routes as routes -# OFFLINE = False -# except ImportError: -# OFFLINE = True import unittest -# import json - - from panoptes_aggregation.tasks import add -import unittest + class TestAddTask(unittest.TestCase): def test_add_task(self): assert add.run(x=3, y=5) == 8 - -# @unittest.skipIf(OFFLINE, 'Installed in offline mode') -# class TasksTest(unittest.TestCase): -# def test_task_status(celery_worker): -# application = routes.make_application() -# client = application.test_client() -# resp = client.post( -# "/tasks", -# data=json.dumps({"type": 1}), -# content_type='application/json' -# ) -# content = json.loads(resp.data.decode()) -# task_id = content["task_id"] -# assert resp.status_code == 202 -# assert task_id - -# resp = client.get(f"tasks/{task_id}") -# content = json.loads(resp.data.decode()) -# assert content == {"task_id": task_id, "task_status": "PENDING", "task_result": None} -# assert resp.status_code == 200 - - # while content["task_status"] == "PENDING": - # resp = client.get(f"tasks/{task_id}") - # content = json.loads(resp.data.decode()) - # assert content == {"task_id": task_id, "task_status": "SUCCESS", "task_result": True} From a94bbd5342bbda998acd44d0688e5ce62e1928f5 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 20 Feb 2024 16:38:38 -0600 Subject: [PATCH 13/37] newline --- panoptes_aggregation/tests/tasks/test_tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/panoptes_aggregation/tests/tasks/test_tasks.py b/panoptes_aggregation/tests/tasks/test_tasks.py index 7dedd72e..20096dd1 100644 --- a/panoptes_aggregation/tests/tasks/test_tasks.py +++ b/panoptes_aggregation/tests/tasks/test_tasks.py @@ -1,6 +1,7 @@ import unittest from panoptes_aggregation.tasks import add + class TestAddTask(unittest.TestCase): def test_add_task(self): assert add.run(x=3, y=5) == 8 From 0504d47eac4041238b53fc32ac2f7530d8afedfa Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Mon, 26 Feb 2024 16:40:41 -0600 Subject: [PATCH 14/37] rename and refactor --- panoptes_aggregation/batch_aggregation.py | 82 +++++++++++++++++++++++ panoptes_aggregation/routes.py | 21 +++--- panoptes_aggregation/tasks.py | 13 ---- 3 files changed, 93 insertions(+), 23 deletions(-) create mode 100644 panoptes_aggregation/batch_aggregation.py delete mode 100644 panoptes_aggregation/tasks.py diff --git a/panoptes_aggregation/batch_aggregation.py b/panoptes_aggregation/batch_aggregation.py new file mode 100644 index 00000000..93134ffc --- /dev/null +++ b/panoptes_aggregation/batch_aggregation.py @@ -0,0 +1,82 @@ +from celery import Celery +import csv +import pandas as pd +import os +import urllib3 +import numpy as np +import pandas as pd + +from panoptes_client import Panoptes, Project, Workflow +from panoptes_client.panoptes import PanoptesAPIException + +import logging +panoptes_client_logger = logging.getLogger('panoptes_client').setLevel(logging.ERROR) + +from panoptes_client import Panoptes, Project, Workflow + +celery = Celery(__name__) +celery.conf.broker_url = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379") +celery.conf.result_backend = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379") + +@celery.task(name="run_aggregation") +def run_aggregation(project_id, workflow_id, user_id): + ba = BatchAggregator(project_id, workflow_id, user_id) + exports = ba.save_exports() + wf_df = ba.process_wf_export(ba.wf_csv) + cls_df = ba.process_cls_export(ba.cls_csv) + + +class BatchAggregator: + """ + Bunch of stuff to manage a batch aggregation run + """ + + def __init__(self, project_id, workflow_id, user_id): + self.project_id = project_id + self.workflow_id = workflow_id + self.user_id = user_id + self._connect_api_client() + + def save_exports(self): + cls_export = Workflow(self.workflow_id).describe_export('classifications') + full_cls_url = cls_export['media'][0]['src'] + wf_export = Project(self.project_id).describe_export('workflows') + full_wf_url = wf_export['media'][0]['src'] + cls_file = f'tmp/{self.workflow_id}_cls_export.csv' + self._download_export(full_cls_url, cls_file) + wf_file = f'tmp/{self.project_id}_workflow_export.csv' + self._download_export(full_wf_url, wf_file) + self.cls_csv = cls_file + self.wf_csv = wf_file + return {'cls_csv': cls_file, 'wf_csv': wf_file} + + def process_wf_export(self, wf_csv): + self.wf_df = pd.read_csv(wf_csv) + self.wf_maj_version = self.wf_df.query(f'workflow_id == {self.workflow_id}')['version'].max() + self.wf_min_version = self.wf_df.query(f'workflow_id == {self.workflow_id} & version == {self.wf_maj_version}')['minor_version'].max() + return self.wf_df + + def process_cls_export(self, cls_csv): + cls_df = pd.read_csv(cls_csv) + self.cls_df = cls_df.query(f'workflow_version == {self.wf_maj_version}.{self.wf_min_version}') + return self.cls_df + + def _download_export(self, url, filepath): + http = urllib3.PoolManager() + r = http.request('GET', url, preload_content=False) + with open(filepath, 'wb') as out: + while True: + data = r.read(65536) + if not data: + break + out.write(data) + r.release_conn() + + def _connect_api_client(self): + # connect to the API only once for this function request + Panoptes.connect(username='zwolf', password='@U^rNtC*o8q2', admin=True) + # Panoptes.connect( + # endpoint=getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'), + # client_id=getenv('PANOPTES_CLIENT_ID'), + # client_secret=getenv('PANOPTES_CLIENT_SECRET') + # ) \ No newline at end of file diff --git a/panoptes_aggregation/routes.py b/panoptes_aggregation/routes.py index 4a54b7fd..004dea1b 100644 --- a/panoptes_aggregation/routes.py +++ b/panoptes_aggregation/routes.py @@ -15,7 +15,7 @@ from panoptes_aggregation import reducers from panoptes_aggregation import extractors from panoptes_aggregation import running_reducers -from panoptes_aggregation import tasks +from panoptes_aggregation import batch_aggregation from panoptes_aggregation import __version__ import numpy as np from celery.result import AsyncResult @@ -117,21 +117,22 @@ def index(): for route, route_function in panoptes.panoptes.items(): application.route('/panoptes/{0}'.format(route), methods=['POST', 'PUT'])(lambda: route_function(request.args.to_dict(), request.get_json())) - @application.route('/tasks', methods=['POST']) - def run_task(): + @application.route('/run_aggregation', methods=['POST']) + def run_aggregation(): content = request.json - xx = content['x'] - yy = content['y'] - task = tasks.add.delay(xx, yy) + project_id = content['project_id'] + workflow_id = content['workflow_id'] + user_id = content['user_id'] + task = batch_aggregation.run_aggregation.delay(project_id, workflow_id, user_id) return jsonify({"task_id": task.id}), 202 - @application.route("/tasks/", methods=["GET"]) + @application.route('/tasks/', methods=['GET']) def get_status(task_id): task_result = AsyncResult(task_id) result = { - "task_id": task_id, - "task_status": task_result.status, - "task_result": task_result.result + 'task_id': task_id, + 'task_status': task_result.status, + 'task_result': task_result.result } return jsonify(result), 200 diff --git a/panoptes_aggregation/tasks.py b/panoptes_aggregation/tasks.py deleted file mode 100644 index 0dc4d8fc..00000000 --- a/panoptes_aggregation/tasks.py +++ /dev/null @@ -1,13 +0,0 @@ -from celery import Celery -import os -import time - -celery = Celery(__name__) -celery.conf.broker_url = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379") -celery.conf.result_backend = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379") - - -@celery.task(name="add") -def add(x, y): - time.sleep(5) - return x + y From e9d929f956455ef01f1607b75a7d4b2ac5e8ebaf Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Mon, 26 Feb 2024 18:57:17 -0600 Subject: [PATCH 15/37] Taking a swing at extraction --- panoptes_aggregation/batch_aggregation.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/panoptes_aggregation/batch_aggregation.py b/panoptes_aggregation/batch_aggregation.py index 93134ffc..16b02d6b 100644 --- a/panoptes_aggregation/batch_aggregation.py +++ b/panoptes_aggregation/batch_aggregation.py @@ -1,12 +1,13 @@ from celery import Celery -import csv +import json import pandas as pd import os import urllib3 -import numpy as np import pandas as pd from panoptes_client import Panoptes, Project, Workflow +from panoptes_aggregation.workflow_config import workflow_extractor_config, workflow_reducer_config +from panoptes_aggregation.scripts import batch_utils from panoptes_client.panoptes import PanoptesAPIException import logging @@ -25,6 +26,11 @@ def run_aggregation(project_id, workflow_id, user_id): wf_df = ba.process_wf_export(ba.wf_csv) cls_df = ba.process_cls_export(ba.cls_csv) + extractor_config = workflow_extractor_config(ba.tasks) + extracted_data = batch_utils.batch_extract(cls_df, extractor_config) + + reducer_config = workflow_reducer_config(extractor_config) + reduced_data = batch_utils.batch_reduce(extracted_data, reducer_config) class BatchAggregator: """ @@ -54,11 +60,14 @@ def process_wf_export(self, wf_csv): self.wf_df = pd.read_csv(wf_csv) self.wf_maj_version = self.wf_df.query(f'workflow_id == {self.workflow_id}')['version'].max() self.wf_min_version = self.wf_df.query(f'workflow_id == {self.workflow_id} & version == {self.wf_maj_version}')['minor_version'].max() + self.workflow_version = f'{self.wf_maj_version}.{self.wf_min_version}' + self.workflow_row = self.wf_df.query(f'workflow_id == {self.workflow_id} & minor_version == {self.wf_min_version}') + self.tasks = json.loads(self.workflow_row.iloc[0]['tasks']) return self.wf_df def process_cls_export(self, cls_csv): cls_df = pd.read_csv(cls_csv) - self.cls_df = cls_df.query(f'workflow_version == {self.wf_maj_version}.{self.wf_min_version}') + self.cls_df = cls_df.query(f'workflow_version == {self.workflow_version}') return self.cls_df def _download_export(self, url, filepath): From 15e34c97c42ab5b80ac482e95400031425582881 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Mon, 26 Feb 2024 19:08:06 -0600 Subject: [PATCH 16/37] oops --- panoptes_aggregation/batch_aggregation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/panoptes_aggregation/batch_aggregation.py b/panoptes_aggregation/batch_aggregation.py index 16b02d6b..83a68da0 100644 --- a/panoptes_aggregation/batch_aggregation.py +++ b/panoptes_aggregation/batch_aggregation.py @@ -83,7 +83,6 @@ def _download_export(self, url, filepath): def _connect_api_client(self): # connect to the API only once for this function request - Panoptes.connect(username='zwolf', password='@U^rNtC*o8q2', admin=True) # Panoptes.connect( # endpoint=getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'), # client_id=getenv('PANOPTES_CLIENT_ID'), From b780f030e9d1d0e073ab0af30cac44d6e32befac Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 27 Feb 2024 15:39:48 -0600 Subject: [PATCH 17/37] update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f35986e6..6c74e356 100644 --- a/.gitignore +++ b/.gitignore @@ -113,3 +113,4 @@ endpoints.yml .vscode/ .noseids +tmp/* \ No newline at end of file From ae7adea815c7c981ed332029d2c08c0d2274bf8c Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 5 Mar 2024 15:49:29 -0600 Subject: [PATCH 18/37] Remove deploy files --- .github/workflows/deploy_batch.yml | 44 ---- kubernetes/deployment-batchagg.tmpl | 336 -------------------------- kubernetes/deployment-production.tmpl | 120 --------- 3 files changed, 500 deletions(-) delete mode 100644 .github/workflows/deploy_batch.yml delete mode 100644 kubernetes/deployment-batchagg.tmpl delete mode 100644 kubernetes/deployment-production.tmpl diff --git a/.github/workflows/deploy_batch.yml b/.github/workflows/deploy_batch.yml deleted file mode 100644 index ae2e7baa..00000000 --- a/.github/workflows/deploy_batch.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: Deploy to Batchagg - -on: - push: - tags: - - batch-aggregation - workflow_dispatch: - -jobs: - build_and_push_image: - name: Build and Push Image - uses: zooniverse/ci-cd/.github/workflows/build_and_push_image.yaml@main - with: - repo_name: aggregation-for-caesar - commit_id: ${{ github.sha }} - latest: false - build_args: "REVISION=${{ github.sha }}" - - deploy_batchagg: - name: Deploy to Batchagg - uses: zooniverse/ci-cd/.github/workflows/deploy_app.yaml@main - needs: build_and_push_image - with: - app_name: aggregation-caesar - repo_name: aggregation-for-caesar - commit_id: ${{ github.sha }} - environment: batchagg - deploy_check: false - secrets: - creds: ${{ secrets.AZURE_AKS }} - - slack_notification: - name: Slack notification - uses: zooniverse/ci-cd/.github/workflows/slack_notification.yaml@main - needs: deploy_batchagg - if: always() - with: - commit_id: ${{ github.sha }} - job_name: Deploy to Batchagg / deploy_app - status: ${{ needs.deploy_production.result }} - title: "Aggregation Batchagg deploy complete" - title_link: "https://batchagg-aggregation-caesar.zooniverse.org" - secrets: - slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} diff --git a/kubernetes/deployment-batchagg.tmpl b/kubernetes/deployment-batchagg.tmpl deleted file mode 100644 index 84a46ee1..00000000 --- a/kubernetes/deployment-batchagg.tmpl +++ /dev/null @@ -1,336 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: aggregation-caesar-batchagg - labels: - app: aggregation-caesar-batchagg -spec: - selector: - matchLabels: - app: aggregation-caesar-batchagg - template: - metadata: - labels: - app: aggregation-caesar-batchagg - spec: - containers: - - name: aggregation-caesar-batchagg-app - image: ghcr.io/zooniverse/aggregation-for-caesar:__IMAGE_TAG__ - ports: - - containerPort: 80 - resources: - requests: - memory: "500Mi" - cpu: "500m" - limits: - memory: "1000Mi" - cpu: "1000m" - startupProbe: - httpGet: - path: / - port: 80 - # wait 6 * 10 seconds(default periodSeconds) for the container to start - # after this succeeds once the liveness probe takes over - failureThreshold: 6 - livenessProbe: - httpGet: - path: / - port: 80 - # allow a longer response time than 1s - timeoutSeconds: 10 - readinessProbe: - httpGet: - path: / - port: 80 - # start checking for readiness after 20s (to serve traffic) - initialDelaySeconds: 20 - # allow a longer response time than 1s - timeoutSeconds: 10 - env: - - name: FLASK_ENV - value: production - - name: PANOPTES_URL - value: https://panoptes.zooniverse.org/ - - name: PANOPTES_CLIENT_ID - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: PANOPTES_CLIENT_ID - - name: PANOPTES_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: PANOPTES_CLIENT_SECRET - - name: MAST_AUTH_TOKEN - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: MAST_AUTH_TOKEN - - name: MAST_PROD_TOKEN - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: MAST_PROD_TOKEN - - name: SENTRY_DSN - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: SENTRY_DSN - - name: NEW_RELIC_LICENSE_KEY - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: NEW_RELIC_LICENSE_KEY - - name: NEW_RELIC_APP_NAME - value: 'Aggregation Caesar' ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: aggregation-caesar-batchagg-worker - labels: - app: aggregation-caesar-batchagg-worker -spec: - selector: - matchLabels: - app: aggregation-caesar-batchagg-worker - template: - metadata: - labels: - app: aggregation-caesar-batchagg-worker - spec: - containers: - - name: aggregation-caesar-batchagg-worker - image: ghcr.io/zooniverse/aggregation-for-caesar:__IMAGE_TAG__ - ports: - - containerPort: 80 - resources: - requests: - memory: "500Mi" - cpu: "500m" - limits: - memory: "1000Mi" - cpu: "1000m" - livenessProbe: - initialDelaySeconds: 45 - periodSeconds: 60 - timeoutSeconds: - exec: - command: - - "/bin/bash" - - "-c" - - "celery -A panoptes_aggregation.tasks status | grep -o ': OK'" - args: ["/usr/src/aggregation/scripts/start-celery.sh"] - env: - - name: FLASK_ENV - value: production - - name: PANOPTES_URL - value: https://panoptes.zooniverse.org/ - - name: PANOPTES_CLIENT_ID - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: PANOPTES_CLIENT_ID - - name: PANOPTES_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: PANOPTES_CLIENT_SECRET - - name: MAST_AUTH_TOKEN - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: MAST_AUTH_TOKEN - - name: MAST_PROD_TOKEN - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: MAST_PROD_TOKEN - - name: SENTRY_DSN - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: SENTRY_DSN - - name: NEW_RELIC_LICENSE_KEY - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: NEW_RELIC_LICENSE_KEY - - name: NEW_RELIC_APP_NAME - value: 'Aggregation Caesar' ---- -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: aggregation-caesar-batchagg-redis -spec: - accessModes: - - ReadWriteOnce - storageClassName: azurefile - resources: - requests: - storage: 5Gi ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: aggregation-caesar-batchagg-redis - labels: - app: aggregation-caesar-batchagg-redis -spec: - replicas: 1 - strategy: - type: Recreate - selector: - matchLabels: - app: aggregation-caesar-batchagg-redis - template: - metadata: - labels: - app: aggregation-caesar-batchagg-redis - spec: - containers: - - name: aggregation-caesar-batchagg-redis - image: redis:6 - resources: - requests: - memory: "1500Mi" - cpu: "200m" - limits: - memory: "1500Mi" - cpu: "1500m" - volumeMounts: - - name: aggregation-caesar-batchagg-redis-data - mountPath: "/data" - volumes: - - name: aggregation-caesar-batchagg-redis-data - persistentVolumeClaim: - claimName: aggregation-caesar-batchagg-redis ---- -apiVersion: v1 -kind: Service -metadata: - name: aggregation-caesar-batchagg-redis -spec: - selector: - app: aggregation-caesar-batchagg-redis - ports: - - protocol: TCP - port: 6379 - targetPort: 6379 - type: NodePort ---- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: aggregation-caesar-batchagg-ingress - annotations: - kubernetes.io/ingress.class: nginx - cert-manager.io/cluster-issuer: letsencrypt-prod - nginx.ingress.kubernetes.io/proxy-buffer-size: "128k" - nginx.ingress.kubernetes.io/proxy-body-size: 20m - nginx.ingress.kubernetes.io/set-real-ip-from: "10.0.0.0/8" -spec: - tls: - - hosts: - - batchagg-aggregation.zooniverse.org - secretName: aggregation-caesar-batchagg-tls-secret - rules: - - host: batchagg-aggregation.zooniverse.org - http: - paths: - - pathType: Prefix - path: / - backend: - service: - name: aggregation-caesar-batchagg-app - port: - number: 80 ---- -apiVersion: cert-manager.io/v1 -kind: Certificate -metadata: - name: batchagg-aggregation-tls -spec: - issuerRef: - name: letsencrypt-prod - kind: ClusterIssuer - secretName: batchagg-aggregation-tls - dnsNames: - - batchagg-aggregation.zooniverse.org ---- ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: aggregation-caesar-batchagg-flower - labels: - app: aggregation-caesar-batchagg-flower -spec: - selector: - matchLabels: - app: aggregation-caesar-batchagg-flower - template: - metadata: - labels: - app: aggregation-caesar-batchagg-flower - spec: - containers: - - name: aggregation-caesar-batchagg-flower - image: ghcr.io/zooniverse/aggregation-for-caesar:__IMAGE_TAG__ - ports: - - containerPort: 80 - resources: - requests: - memory: "500Mi" - cpu: "500m" - limits: - memory: "1000Mi" - cpu: "1000m" - livenessProbe: - initialDelaySeconds: 45 - periodSeconds: 60 - timeoutSeconds: 20 - exec: - command: - - "/bin/bash" - - "-c" - - "celery -A panoptes_aggregation.tasks status | grep -o ': OK'" - args: ["/usr/src/aggregation/scripts/start-flower.sh"] - env: - - name: FLASK_ENV - value: production - - name: PANOPTES_URL - value: https://panoptes.zooniverse.org/ - - name: PANOPTES_CLIENT_ID - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: PANOPTES_CLIENT_ID - - name: PANOPTES_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: PANOPTES_CLIENT_SECRET - - name: MAST_AUTH_TOKEN - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: MAST_AUTH_TOKEN - - name: MAST_PROD_TOKEN - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: MAST_PROD_TOKEN - - name: SENTRY_DSN - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: SENTRY_DSN - - name: NEW_RELIC_LICENSE_KEY - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: NEW_RELIC_LICENSE_KEY - - name: NEW_RELIC_APP_NAME - value: 'Aggregation Caesar' diff --git a/kubernetes/deployment-production.tmpl b/kubernetes/deployment-production.tmpl deleted file mode 100644 index e97d4b88..00000000 --- a/kubernetes/deployment-production.tmpl +++ /dev/null @@ -1,120 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: aggregation-caesar - labels: - app: aggregation-caesar -spec: - selector: - matchLabels: - app: aggregation-caesar - template: - metadata: - labels: - app: aggregation-caesar - spec: - containers: - - name: aggregation-caesar-app - image: ghcr.io/zooniverse/aggregation-for-caesar:__IMAGE_TAG__ - ports: - - containerPort: 80 - resources: - requests: - memory: "500Mi" - cpu: "500m" - limits: - memory: "1000Mi" - cpu: "1000m" - startupProbe: - httpGet: - path: / - port: 80 - # wait 6 * 10 seconds(default periodSeconds) for the container to start - # after this succeeds once the liveness probe takes over - failureThreshold: 6 - livenessProbe: - httpGet: - path: / - port: 80 - # allow a longer response time than 1s - timeoutSeconds: 10 - readinessProbe: - httpGet: - path: / - port: 80 - # start checking for readiness after 20s (to serve traffic) - initialDelaySeconds: 20 - # allow a longer response time than 1s - timeoutSeconds: 10 - env: - - name: FLASK_ENV - value: production - - name: PANOPTES_URL - value: https://panoptes.zooniverse.org/ - - name: PANOPTES_CLIENT_ID - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: PANOPTES_CLIENT_ID - - name: PANOPTES_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: PANOPTES_CLIENT_SECRET - - name: MAST_AUTH_TOKEN - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: MAST_AUTH_TOKEN - - name: MAST_PROD_TOKEN - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: MAST_PROD_TOKEN - - name: SENTRY_DSN - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: SENTRY_DSN - - name: NEW_RELIC_LICENSE_KEY - valueFrom: - secretKeyRef: - name: aggregation-for-caesar-environment - key: NEW_RELIC_LICENSE_KEY - - name: NEW_RELIC_APP_NAME - value: 'Aggregation Caesar' ---- -apiVersion: autoscaling/v1 -kind: HorizontalPodAutoscaler -metadata: - name: aggregation-caesar -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: aggregation-caesar - minReplicas: 2 - maxReplicas: 3 - targetCPUUtilizationPercentage: 80 ---- -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: aggregation-caesar -spec: - minAvailable: 50% - selector: - matchLabels: - app: aggregation-caesar ---- -apiVersion: v1 -kind: Service -metadata: - name: aggregation-caesar -spec: - selector: - app: aggregation-caesar - ports: - - protocol: TCP - port: 80 - targetPort: 80 From afc4523b1a990216fe8d284b9fb15c2f9f490cdd Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 5 Mar 2024 15:49:36 -0600 Subject: [PATCH 19/37] Update .gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6c74e356..040a57ca 100644 --- a/.gitignore +++ b/.gitignore @@ -113,4 +113,5 @@ endpoints.yml .vscode/ .noseids -tmp/* \ No newline at end of file +tmp/* +.DS_Store \ No newline at end of file From 916456c11bcffa5dc0b91f621eb7422f4c815f05 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 12 Mar 2024 17:05:33 -0500 Subject: [PATCH 20/37] Clean up test tests --- panoptes_aggregation/batch_aggregation.py | 14 ++++++-------- panoptes_aggregation/tests/tasks/__init__.py | 0 panoptes_aggregation/tests/tasks/test_tasks.py | 7 ------- 3 files changed, 6 insertions(+), 15 deletions(-) delete mode 100644 panoptes_aggregation/tests/tasks/__init__.py delete mode 100644 panoptes_aggregation/tests/tasks/test_tasks.py diff --git a/panoptes_aggregation/batch_aggregation.py b/panoptes_aggregation/batch_aggregation.py index 83a68da0..1eff9584 100644 --- a/panoptes_aggregation/batch_aggregation.py +++ b/panoptes_aggregation/batch_aggregation.py @@ -3,7 +3,7 @@ import pandas as pd import os import urllib3 -import pandas as pd +from os import getenv from panoptes_client import Panoptes, Project, Workflow from panoptes_aggregation.workflow_config import workflow_extractor_config, workflow_reducer_config @@ -13,8 +13,6 @@ import logging panoptes_client_logger = logging.getLogger('panoptes_client').setLevel(logging.ERROR) -from panoptes_client import Panoptes, Project, Workflow - celery = Celery(__name__) celery.conf.broker_url = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379") celery.conf.result_backend = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379") @@ -83,8 +81,8 @@ def _download_export(self, url, filepath): def _connect_api_client(self): # connect to the API only once for this function request - # Panoptes.connect( - # endpoint=getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'), - # client_id=getenv('PANOPTES_CLIENT_ID'), - # client_secret=getenv('PANOPTES_CLIENT_SECRET') - # ) \ No newline at end of file + Panoptes.connect( + endpoint=getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'), + client_id=getenv('PANOPTES_CLIENT_ID'), + client_secret=getenv('PANOPTES_CLIENT_SECRET') + ) diff --git a/panoptes_aggregation/tests/tasks/__init__.py b/panoptes_aggregation/tests/tasks/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/panoptes_aggregation/tests/tasks/test_tasks.py b/panoptes_aggregation/tests/tasks/test_tasks.py deleted file mode 100644 index 20096dd1..00000000 --- a/panoptes_aggregation/tests/tasks/test_tasks.py +++ /dev/null @@ -1,7 +0,0 @@ -import unittest -from panoptes_aggregation.tasks import add - - -class TestAddTask(unittest.TestCase): - def test_add_task(self): - assert add.run(x=3, y=5) == 8 From 0b2322900e4875eeb2990bd403cd71ccd6b0e0b1 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 12 Mar 2024 17:25:06 -0500 Subject: [PATCH 21/37] Add router tests --- panoptes_aggregation/routes.py | 5 ++-- .../tests/router_tests/test_routes.py | 28 ++++++++++++++----- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/panoptes_aggregation/routes.py b/panoptes_aggregation/routes.py index 004dea1b..4a8e9daf 100644 --- a/panoptes_aggregation/routes.py +++ b/panoptes_aggregation/routes.py @@ -124,15 +124,14 @@ def run_aggregation(): workflow_id = content['workflow_id'] user_id = content['user_id'] task = batch_aggregation.run_aggregation.delay(project_id, workflow_id, user_id) - return jsonify({"task_id": task.id}), 202 + return json.dumps({"task_id": task.id}), 202 @application.route('/tasks/', methods=['GET']) def get_status(task_id): task_result = AsyncResult(task_id) result = { 'task_id': task_id, - 'task_status': task_result.status, - 'task_result': task_result.result + 'task_status': task_result.status } return jsonify(result), 200 diff --git a/panoptes_aggregation/tests/router_tests/test_routes.py b/panoptes_aggregation/tests/router_tests/test_routes.py index 13fad701..f57143f4 100644 --- a/panoptes_aggregation/tests/router_tests/test_routes.py +++ b/panoptes_aggregation/tests/router_tests/test_routes.py @@ -68,14 +68,28 @@ def test_one_running_reducer_route(self): running_reducer_name ) - @patch("panoptes_aggregation.tasks.add.run") - def test_mock_task(self, mock_run): - '''Test that the bg task gets called''' - assert panoptes_aggregation.tasks.add.run(1, 2) - with self.application.test_client() as client: - response = client.post('/tasks', json={'x': 1, 'y': 2}) + # Override json.dumps() for this test so it doesn't try to jsonify the mock + import json + @patch("json.dumps", return_value=json.dumps({'project_id': 1, 'workflow_id': 10, 'user_id': 100, 'task_id': 'asdf'})) + @patch("panoptes_aggregation.batch_aggregation.run_aggregation.delay") + def test_run_aggregation_route(self, mocked_task, mocked_json): + '''Test that the bg task gets called on batch aggregation route''' + with routes.make_application().test_client() as client: + mocked_task.id = 'asdf' + response = client.post('/run_aggregation', json={'project_id': 1, 'workflow_id': 10, 'user_id': 100}) + mocked_task.assert_called_once_with(1, 10, 100) self.assertEqual(response.status_code, 202) - panoptes_aggregation.tasks.add.run.assert_called_once_with(1, 2) + self.assertIn('"task_id": "asdf"', response.text) + + @patch("celery.result.AsyncResult") + def test_get_status(self, asyncresult): + '''Test task status works''' + with self.application.test_client() as client: + result = '"task_id": "asdf", "task_status": "PENDING"' + asyncresult.get = result + response = client.get('/tasks/asdf') + self.assertEqual(response.status_code, 200) + self.assertIn(result, response.text) @unittest.skipIf("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", "Skipping this test on Travis CI.") def test_docs_route(self): From 931eebab4e4c4985ae48d365babfb288fb51e2e5 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 12 Mar 2024 17:25:57 -0500 Subject: [PATCH 22/37] Extremely placeholder BA lib tests --- .../tests/batch_aggregation/__init__.py | 0 .../test_batch_aggregation.py | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 panoptes_aggregation/tests/batch_aggregation/__init__.py create mode 100644 panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py diff --git a/panoptes_aggregation/tests/batch_aggregation/__init__.py b/panoptes_aggregation/tests/batch_aggregation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py new file mode 100644 index 00000000..ef1ffb14 --- /dev/null +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -0,0 +1,19 @@ +import unittest +from panoptes_aggregation.batch_aggregation import run_aggregation + + +class TestBatchAggregation(unittest.TestCase): + def test_save_exports(self): + # Test that Panoptes calls are made and files are saved + assert 1 == 1 + + def test_process_wf_export(self): + # Test that: + # the wf export is parsed + # the version instance vars are set + # dataframe is retuned + assert 1 == 1 + + def test_process_cls_export(self): + # Test that the cls csv is parsed and a dataframe is returned + assert 1 == 1 From 501ce4d1bd92458014648e0f011f18e03dd63d8e Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 2 Apr 2024 17:57:35 -0500 Subject: [PATCH 23/37] Only override local import --- panoptes_aggregation/tests/router_tests/test_routes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/panoptes_aggregation/tests/router_tests/test_routes.py b/panoptes_aggregation/tests/router_tests/test_routes.py index f57143f4..5c81e2be 100644 --- a/panoptes_aggregation/tests/router_tests/test_routes.py +++ b/panoptes_aggregation/tests/router_tests/test_routes.py @@ -70,7 +70,7 @@ def test_one_running_reducer_route(self): # Override json.dumps() for this test so it doesn't try to jsonify the mock import json - @patch("json.dumps", return_value=json.dumps({'project_id': 1, 'workflow_id': 10, 'user_id': 100, 'task_id': 'asdf'})) + @patch("panoptes_aggregation.batch_aggregation.json.dumps", return_value=json.dumps({'project_id': 1, 'workflow_id': 10, 'user_id': 100, 'task_id': 'asdf'})) @patch("panoptes_aggregation.batch_aggregation.run_aggregation.delay") def test_run_aggregation_route(self, mocked_task, mocked_json): '''Test that the bg task gets called on batch aggregation route''' From 1f945fbdbbae5538b4284e1ede19425536a02ba9 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 2 Apr 2024 17:58:02 -0500 Subject: [PATCH 24/37] First few batch agg specs --- .../test_batch_aggregation.py | 38 +++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index ef1ffb14..8f18a549 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -1,11 +1,43 @@ import unittest +from unittest.mock import patch, Mock, MagicMock +from panoptes_aggregation.scripts import batch_utils from panoptes_aggregation.batch_aggregation import run_aggregation - +from panoptes_aggregation import batch_aggregation as batch_agg class TestBatchAggregation(unittest.TestCase): - def test_save_exports(self): + @patch("panoptes_aggregation.batch_aggregation.workflow_extractor_config") + @patch("panoptes_aggregation.batch_aggregation.workflow_reducer_config") + @patch("panoptes_aggregation.batch_aggregation.BatchAggregator") + def test_run_aggregation(self, mock_aggregator, mock_wf_red_conf, mock_wf_ext_conf): + mock_aggregator.process_wf_export.return_value = MagicMock() + mock_aggregator.process_cls_export.return_value = MagicMock() + batch_utils.batch_extract = MagicMock() + batch_utils.batch_reduce = MagicMock() + run_aggregation(1, 10, 100) + mock_aggregator.assert_called_once_with(1, 10, 100) + mock_wf_ext_conf.assert_called_once() + mock_wf_red_conf.assert_called_once() + batch_utils.batch_extract.assert_called_once() + batch_utils.batch_reduce.assert_called_once() + + @patch("panoptes_aggregation.batch_aggregation.Workflow") + @patch("panoptes_aggregation.batch_aggregation.Project") + @patch("panoptes_aggregation.batch_aggregation.Panoptes.connect") + def test_save_exports(self, mock_client, mock_project, mock_workflow): # Test that Panoptes calls are made and files are saved - assert 1 == 1 + csv_dict = {'media': [ {'src': 'http://zooniverse.org/123.csv'} ] } + mock_project.return_value.describe_export.return_value = csv_dict + mock_workflow.return_value.describe_export.return_value = csv_dict + ba = batch_agg.BatchAggregator(1, 10, 100) + batch_agg.BatchAggregator._download_export = MagicMock(side_effect=[f'./cls_export.csv', f'./wf_export.csv']) + expected_response = {'cls_csv': 'tmp/10_cls_export.csv', 'wf_csv': 'tmp/1_workflow_export.csv'} + response = ba.save_exports() + self.assertEqual(response, expected_response) + mock_client.assert_called_once() + mock_project.assert_called_once_with(1) + mock_workflow.assert_called_once_with(10) + mock_project.return_value.describe_export.assert_called_once_with('workflows') + mock_workflow.return_value.describe_export.assert_called_once_with('classifications') def test_process_wf_export(self): # Test that: From c6e8ba9caaeb686fd52d325a6985bcc9aead4919 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Wed, 22 May 2024 16:02:05 -0500 Subject: [PATCH 25/37] Updates to BatchAggregation & tests --- panoptes_aggregation/batch_aggregation.py | 74 +++++++++++--- .../tests/batch_aggregation/cls_export.csv | 8 ++ .../test_batch_aggregation.py | 99 +++++++++++++++---- .../tests/batch_aggregation/wf_export.csv | 7 ++ pyproject.toml | 2 + 5 files changed, 159 insertions(+), 31 deletions(-) create mode 100644 panoptes_aggregation/tests/batch_aggregation/cls_export.csv create mode 100644 panoptes_aggregation/tests/batch_aggregation/wf_export.csv diff --git a/panoptes_aggregation/batch_aggregation.py b/panoptes_aggregation/batch_aggregation.py index 1eff9584..8db4e574 100644 --- a/panoptes_aggregation/batch_aggregation.py +++ b/panoptes_aggregation/batch_aggregation.py @@ -3,12 +3,14 @@ import pandas as pd import os import urllib3 -from os import getenv +from shutil import make_archive +import uuid + +from azure.storage.blob import BlobServiceClient from panoptes_client import Panoptes, Project, Workflow -from panoptes_aggregation.workflow_config import workflow_extractor_config, workflow_reducer_config +from panoptes_aggregation.workflow_config import workflow_extractor_config from panoptes_aggregation.scripts import batch_utils -from panoptes_client.panoptes import PanoptesAPIException import logging panoptes_client_logger = logging.getLogger('panoptes_client').setLevel(logging.ERROR) @@ -20,15 +22,34 @@ @celery.task(name="run_aggregation") def run_aggregation(project_id, workflow_id, user_id): ba = BatchAggregator(project_id, workflow_id, user_id) - exports = ba.save_exports() + ba.save_exports() + wf_df = ba.process_wf_export(ba.wf_csv) cls_df = ba.process_cls_export(ba.cls_csv) extractor_config = workflow_extractor_config(ba.tasks) extracted_data = batch_utils.batch_extract(cls_df, extractor_config) - reducer_config = workflow_reducer_config(extractor_config) - reduced_data = batch_utils.batch_reduce(extracted_data, reducer_config) + batch_standard_reducers = { + 'question_extractor': ['question_reducer', 'question_consensus_reducer'], + 'survey_extractor': ['survey_reducer'] + } + + for task_type, extract_df in extracted_data.items(): + extract_df.to_csv(f'{ba.output_path}/{ba.workflow_id}_{task_type}.csv') + reducer_list = batch_standard_reducers[task_type] + reduced_data = {} + + for reducer in reducer_list: + # This is an override. The workflow_reducer_config method returns a config object + # that is incompatible with the batch_utils batch_reduce method + reducer_config = {'reducer_config': {reducer: {}}} + reduced_data[reducer] = batch_utils.batch_reduce(extract_df, reducer_config) + filename = f'{ba.output_path}/{ba.workflow_id}_reductions.csv' + reduced_data[reducer].to_csv(filename, mode='a') + ba.upload_files() + + # hit up panoptes, let em know you're done class BatchAggregator: """ @@ -39,20 +60,26 @@ def __init__(self, project_id, workflow_id, user_id): self.project_id = project_id self.workflow_id = workflow_id self.user_id = user_id + self._generate_uuid() self._connect_api_client() def save_exports(self): + self.output_path = f'tmp/{self.workflow_id}' + os.mkdir(self.output_path) + cls_export = Workflow(self.workflow_id).describe_export('classifications') full_cls_url = cls_export['media'][0]['src'] + cls_file = f'{self.output_path}/{self.workflow_id}_cls_export.csv' + self._download_export(full_cls_url, cls_file) + wf_export = Project(self.project_id).describe_export('workflows') full_wf_url = wf_export['media'][0]['src'] - cls_file = f'tmp/{self.workflow_id}_cls_export.csv' - self._download_export(full_cls_url, cls_file) - wf_file = f'tmp/{self.project_id}_workflow_export.csv' + wf_file = f'{self.output_path}/{self.workflow_id}_workflow_export.csv' self._download_export(full_wf_url, wf_file) + self.cls_csv = cls_file self.wf_csv = wf_file - return {'cls_csv': cls_file, 'wf_csv': wf_file} + return {'classifications': cls_file, 'workflows': wf_file} def process_wf_export(self, wf_csv): self.wf_df = pd.read_csv(wf_csv) @@ -68,6 +95,27 @@ def process_cls_export(self, cls_csv): self.cls_df = cls_df.query(f'workflow_version == {self.workflow_version}') return self.cls_df + def connect_blob_storage(self): + connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING') + self.blob_service_client = BlobServiceClient.from_connection_string(connect_str) + self.blob_service_client.create_container(name=self.id) + + def upload_file_to_storage(self, container_name, filepath): + blob = filepath.split('/')[-1] + blob_client = self.blob_service_client.get_blob_client(container=container_name, blob=blob) + with open(file=filepath, mode="rb") as data: + blob_client.upload_blob(data, overwrite=True) + + def upload_files(self): + self.connect_blob_storage() + reductions_file = f'{self.output_path}/{self.workflow_id}_reductions.csv' + self.upload_file_to_storage(self.id, reductions_file) + zipfile = make_archive(f'tmp/{self.id}', 'zip', self.output_path) + self.upload_file_to_storage(self.id, zipfile) + + def _generate_uuid(self): + self.id = uuid.uuid4().hex + def _download_export(self, url, filepath): http = urllib3.PoolManager() r = http.request('GET', url, preload_content=False) @@ -82,7 +130,7 @@ def _download_export(self, url, filepath): def _connect_api_client(self): # connect to the API only once for this function request Panoptes.connect( - endpoint=getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'), - client_id=getenv('PANOPTES_CLIENT_ID'), - client_secret=getenv('PANOPTES_CLIENT_SECRET') + endpoint=os.getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'), + client_id=os.getenv('PANOPTES_CLIENT_ID'), + client_secret=os.getenv('PANOPTES_CLIENT_SECRET') ) diff --git a/panoptes_aggregation/tests/batch_aggregation/cls_export.csv b/panoptes_aggregation/tests/batch_aggregation/cls_export.csv new file mode 100644 index 00000000..dd2ed4e4 --- /dev/null +++ b/panoptes_aggregation/tests/batch_aggregation/cls_export.csv @@ -0,0 +1,8 @@ +classification_id,user_name,user_id,user_ip,workflow_id,workflow_name,workflow_version,created_at,gold_standard,expert,metadata,annotations,subject_data,subject_ids +543695319,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:17:42 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:16:35.085Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:17:42.334Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":150,""clientHeight"":150,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""Yes""},{""task"":""T1"",""task_label"":""Is the cross-hair in the image close to a faint, fuzzy galaxy?"",""value"":""Yes""}]","{""96588114"":{""retired"":{""id"":125510348,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:44:28.745Z"",""updated_at"":""2024-02-23T16:17:42.488Z"",""retired_at"":""2024-02-23T16:17:42.479Z"",""subject_id"":96588114,""retirement_reason"":""classification_count""},""ramean"":123.7681641625,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF23abjrdem"",""decmean"":57.182124325000004,""objectId"":""ZTF23abjrdem""}}",96588114 +543695340,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:17:48 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:17:42.351Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:17:48.539Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":300,""clientHeight"":300,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""No""}]","{""96588105"":{""retired"":{""id"":125510263,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:43:24.074Z"",""updated_at"":""2024-02-23T16:17:48.696Z"",""retired_at"":""2024-02-23T16:17:48.686Z"",""subject_id"":96588105,""retirement_reason"":""classification_count""},""ramean"":157.50762885625,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF22abycniv"",""decmean"":8.21724599375,""objectId"":""ZTF22abycniv""}}",96588105 +543695374,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:18:02 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:17:48.559Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:18:02.264Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":300,""clientHeight"":300,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""Yes""},{""task"":""T1"",""task_label"":""Is the cross-hair in the image close to a faint, fuzzy galaxy?"",""value"":""No""}]","{""96588126"":{""retired"":{""id"":125510270,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:43:25.834Z"",""updated_at"":""2024-02-23T16:18:02.396Z"",""retired_at"":""2024-02-23T16:18:02.389Z"",""subject_id"":96588126,""retirement_reason"":""classification_count""},""ramean"":98.49884808888889,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF22abfnkve"",""decmean"":58.67660070000001,""objectId"":""ZTF22abfnkve""}}",96588126 +543695390,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:18:09 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:18:02.283Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:18:09.532Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":300,""clientHeight"":300,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""No""}]","{""96588128"":{""retired"":{""id"":125510290,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:43:36.461Z"",""updated_at"":""2024-02-23T16:18:09.674Z"",""retired_at"":""2024-02-23T16:18:09.667Z"",""subject_id"":96588128,""retirement_reason"":""classification_count""},""ramean"":41.573462775,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF23aavvcjd"",""decmean"":-5.001660237499999,""objectId"":""ZTF23aavvcjd""}}",96588128 +543695425,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:18:24 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:18:09.551Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:18:24.225Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":300,""clientHeight"":300,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""Yes""},{""task"":""T1"",""task_label"":""Is the cross-hair in the image close to a faint, fuzzy galaxy?"",""value"":""No""}]","{""96588109"":{""retired"":{""id"":125510335,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:44:14.501Z"",""updated_at"":""2024-02-23T16:18:24.390Z"",""retired_at"":""2024-02-23T16:18:24.378Z"",""subject_id"":96588109,""retirement_reason"":""classification_count""},""ramean"":11.719328585714285,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF23aatzhso"",""decmean"":42.02810038571429,""objectId"":""ZTF23aatzhso""}}",96588109 +543695436,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:18:27 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:18:24.243Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:18:27.892Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":300,""clientHeight"":300,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""No""}]","{""96588106"":{""retired"":{""id"":125510296,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:43:44.966Z"",""updated_at"":""2024-02-23T17:54:11.466Z"",""retired_at"":""2024-02-23T17:54:11.458Z"",""subject_id"":96588106,""retirement_reason"":""classification_count""},""ramean"":47.78652812,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF23aauyuay"",""decmean"":73.76492526000001,""objectId"":""ZTF23aauyuay""}}",96588106 +543695453,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:18:35 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:18:27.902Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:18:35.478Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":300,""clientHeight"":300,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""No""}]","{""96588131"":{""retired"":{""id"":125510305,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:43:48.500Z"",""updated_at"":""2024-02-23T18:31:31.686Z"",""retired_at"":""2024-02-23T18:31:31.677Z"",""subject_id"":96588131,""retirement_reason"":""classification_count""},""ramean"":158.6458489125,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF23absjgik"",""decmean"":-27.650916137499998,""objectId"":""ZTF23absjgik""}}",96588131 diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index 8f18a549..3e79914b 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -1,51 +1,114 @@ import unittest -from unittest.mock import patch, Mock, MagicMock +from unittest.mock import patch, Mock, MagicMock, call from panoptes_aggregation.scripts import batch_utils from panoptes_aggregation.batch_aggregation import run_aggregation from panoptes_aggregation import batch_aggregation as batch_agg +wf_export = f'panoptes_aggregation/tests/batch_aggregation/wf_export.csv' +cls_export = f'panoptes_aggregation/tests/batch_aggregation/cls_export.csv' + +@patch("panoptes_aggregation.batch_aggregation.BatchAggregator._connect_api_client", new=MagicMock()) class TestBatchAggregation(unittest.TestCase): @patch("panoptes_aggregation.batch_aggregation.workflow_extractor_config") - @patch("panoptes_aggregation.batch_aggregation.workflow_reducer_config") @patch("panoptes_aggregation.batch_aggregation.BatchAggregator") - def test_run_aggregation(self, mock_aggregator, mock_wf_red_conf, mock_wf_ext_conf): + def test_run_aggregation(self, mock_aggregator, mock_wf_ext_conf): mock_aggregator.process_wf_export.return_value = MagicMock() mock_aggregator.process_cls_export.return_value = MagicMock() - batch_utils.batch_extract = MagicMock() - batch_utils.batch_reduce = MagicMock() + + mock_df = MagicMock() + test_extracts = { 'question_extractor': mock_df } + batch_utils.batch_extract = MagicMock(return_value=test_extracts) + mock_reducer = MagicMock() + batch_utils.batch_reduce = mock_reducer + run_aggregation(1, 10, 100) mock_aggregator.assert_called_once_with(1, 10, 100) mock_wf_ext_conf.assert_called_once() - mock_wf_red_conf.assert_called_once() batch_utils.batch_extract.assert_called_once() - batch_utils.batch_reduce.assert_called_once() + mock_df.to_csv.assert_called() + batch_utils.batch_reduce.assert_called() + self.assertEqual(mock_reducer.call_count, 2) + + # The reducer's call list includes subsequent calls to to_csv, but the args are methods called on the mock + # rather than use the set values i.e. "" + # mock_aggregator.workflow_id = '10' + # mock_aggregator.output_path = 'tmp/10' + # mock_reducer.assert_has_calls([ + # call(mock_df, {'reducer_config': {'question_reducer': {}}}), + # call().to_csv('tmp/10/10_reducers.csv', mode='a'), + # call(mock_df, {'reducer_config': {'question_consensus_reducer': {}}}), + # call().to_csv('tmp/10/10_reducers.csv', mode='a'), + # ]) + # How do I test the specific instance of BatchAggregator rather than the mocked class? + # mock_aggregator.upload_files.assert_called_once() + + @patch("panoptes_aggregation.batch_aggregation.os.mkdir") @patch("panoptes_aggregation.batch_aggregation.Workflow") @patch("panoptes_aggregation.batch_aggregation.Project") @patch("panoptes_aggregation.batch_aggregation.Panoptes.connect") - def test_save_exports(self, mock_client, mock_project, mock_workflow): + def test_save_exports(self, mock_client, mock_project, mock_workflow, mock_mkdir): # Test that Panoptes calls are made and files are saved csv_dict = {'media': [ {'src': 'http://zooniverse.org/123.csv'} ] } mock_project.return_value.describe_export.return_value = csv_dict mock_workflow.return_value.describe_export.return_value = csv_dict ba = batch_agg.BatchAggregator(1, 10, 100) - batch_agg.BatchAggregator._download_export = MagicMock(side_effect=[f'./cls_export.csv', f'./wf_export.csv']) - expected_response = {'cls_csv': 'tmp/10_cls_export.csv', 'wf_csv': 'tmp/1_workflow_export.csv'} + batch_agg.BatchAggregator._download_export = MagicMock(side_effect=['./cls_export.csv', './wf_export.csv']) + mock_uuidgen = MagicMock(side_effect=ba._generate_uuid()) + ba._generate_uuid = mock_uuidgen + expected_response = {'classifications': 'tmp/10/10_cls_export.csv', 'workflows': 'tmp/10/10_workflow_export.csv'} + response = ba.save_exports() + + # Why do these mocked methods called in __init__ not get counted as called? + # They are def getting called as the attributes are set + # mock_uuidgen.assert_called_once() + # mock_client.assert_called_once() + self.assertEqual(response, expected_response) - mock_client.assert_called_once() + mock_mkdir.assert_called_once() mock_project.assert_called_once_with(1) mock_workflow.assert_called_once_with(10) mock_project.return_value.describe_export.assert_called_once_with('workflows') mock_workflow.return_value.describe_export.assert_called_once_with('classifications') def test_process_wf_export(self): - # Test that: - # the wf export is parsed - # the version instance vars are set - # dataframe is retuned - assert 1 == 1 + ba = batch_agg.BatchAggregator(1, 10, 100) + result = ba.process_wf_export(wf_export) + self.assertEqual(ba.wf_maj_version, 16) + self.assertEqual(ba.wf_min_version, 55) + self.assertEqual(ba.workflow_version, '16.55') + self.assertEqual(result.__class__.__name__, 'DataFrame') def test_process_cls_export(self): - # Test that the cls csv is parsed and a dataframe is returned - assert 1 == 1 + ba = batch_agg.BatchAggregator(1, 10, 100) + ba.workflow_version = '16.55' + result = ba.process_cls_export(cls_export) + self.assertEqual(result.__class__.__name__, 'DataFrame') + + @patch("panoptes_aggregation.batch_aggregation.BatchAggregator.connect_blob_storage") + @patch("panoptes_aggregation.batch_aggregation.make_archive") + def test_upload_files(self, archive_mock, client_mock): + zipped_mock = MagicMock() + archive_mock.return_value = zipped_mock + ba = batch_agg.BatchAggregator(1, 10, 100) + ba.upload_file_to_storage = MagicMock() + ba.output_path = 'tmp/10' + reductions_file = 'tmp/10/10_reductions.csv' + ba.upload_files() + client_mock.assert_called_once() + archive_mock.assert_called_once() + ba.upload_file_to_storage.assert_has_calls([call(ba.id, reductions_file), call(ba.id, zipped_mock)]) + + def test_upload_file_to_storage(self): + ba = batch_agg.BatchAggregator(1, 10, 100) + mock_client = MagicMock() + ba.blob_service_client = MagicMock(return_value=mock_client) + ba.upload_file_to_storage('container', cls_export) + mock_client.upload_blob.assert_called_once + + @patch("panoptes_aggregation.batch_aggregation.BlobServiceClient") + def test_connect_blob_storage(self, mock_client): + ba = batch_agg.BatchAggregator(1, 10, 100) + ba.connect_blob_storage() + ba.blob_service_client.create_container.assert_called_once_with(name=ba.id) diff --git a/panoptes_aggregation/tests/batch_aggregation/wf_export.csv b/panoptes_aggregation/tests/batch_aggregation/wf_export.csv new file mode 100644 index 00000000..14e7fe45 --- /dev/null +++ b/panoptes_aggregation/tests/batch_aggregation/wf_export.csv @@ -0,0 +1,7 @@ +workflow_id,display_name,version,active,classifications_count,pairwise,grouped,prioritized,primary_language,first_task,tutorial_subject_id,retired_set_member_subjects_count,tasks,retirement,aggregation,strings,minor_version +10,Superluminous Supernovae,14,true,105171,false,false,false,en,T0,,9056,"{""T0"":{""help"":""T0.help"",""type"":""single"",""answers"":[{""next"":""T1"",""label"":""T0.answers.0.label""},{""label"":""T0.answers.1.label""}],""question"":""T0.question""},""T1"":{""help"":""T1.help"",""type"":""single"",""answers"":[{""label"":""T1.answers.0.label""},{""label"":""T1.answers.1.label""}],""question"":""T1.question""}}","{""options"":{""count"":10},""criteria"":""classification_count""}",{},"{""T0.help"":""The lightcurve is the plot showing brightness over time. The numbers on the x-axis are in days. Look to see if the brightness has been increasing over a period of more than 20 days.\n\nThe blue diamonds represent how bright the supernova is in blue light (at around 450 nanometres)\nand red circles show how bright it is in red light (about 610 nanometres). Often supernovae start blue and\nbecome red as they cool.\n\nHere are some examples of lightcurves that have been rising for over 20 days:\n\n![ZTF20abobpcb_light_curve.jpeg](https://panoptes-uploads.zooniverse.org/production/project_attached_image/d04087a4-ea8d-48f2-b297-401a58a091dd.jpeg)\n\n![ZTF20aadcbvz_light_curve.jpeg](https://panoptes-uploads.zooniverse.org/production/project_attached_image/a8dde630-fcc6-4577-afbc-6c1b7f451857.jpeg)\n\nEven though we observed this object well into its decline, the start of it still has a rise more than 20 days long, so it is a good candidate.\n\nRemember that the \n"",""T1.help"":""Here are some examples of what a faint, fuzzy galaxy hosting a superluminous supernova looks like (Please note that the supernova is not visible in these images. The cross-hair marks where the candidate supernova was detected):\n\n![ZTF19abxekxi-image.png](https://panoptes-uploads.zooniverse.org/production/project_attached_image/928be57f-844b-4947-b30f-28e70dfeb365.png)\n\n![ZTF19abpbopt-image.png](https://panoptes-uploads.zooniverse.org/production/project_attached_image/1f125022-f2f8-411b-a1c4-b1d825d06d9f.png)"",""T0.question"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""T1.question"":""Is the cross-hair in the image close to a faint, fuzzy galaxy?"",""T0.answers.0.label"":""Yes"",""T0.answers.1.label"":""No"",""T1.answers.0.label"":""Yes"",""T1.answers.1.label"":""No""}",52 +10,Superluminous Supernovae,14,true,105171,false,false,false,en,T0,,9056,"{""T0"":{""help"":""T0.help"",""type"":""single"",""answers"":[{""next"":""T1"",""label"":""T0.answers.0.label""},{""label"":""T0.answers.1.label""}],""question"":""T0.question""},""T1"":{""help"":""T1.help"",""type"":""single"",""answers"":[{""label"":""T1.answers.0.label""},{""label"":""T1.answers.1.label""}],""question"":""T1.question""}}","{""options"":{""count"":10},""criteria"":""classification_count""}",{},"{""T0.help"":""The lightcurve is the plot showing brightness over time. The numbers on the x-axis are in days. Look to see if the brightness has been increasing over a period of more than 20 days.\n\nThe blue diamonds represent how bright the supernova is in blue light (at around 450 nanometres)\nand red circles show how bright it is in red light (about 610 nanometres). Often supernovae start blue and\nbecome red as they cool.\n\nHere are some examples of lightcurves that have been rising for over 20 days:\n\n![ZTF20abobpcb_light_curve.jpeg](https://panoptes-uploads.zooniverse.org/production/project_attached_image/d04087a4-ea8d-48f2-b297-401a58a091dd.jpeg)\n\n![ZTF20aadcbvz_light_curve.jpeg](https://panoptes-uploads.zooniverse.org/production/project_attached_image/a8dde630-fcc6-4577-afbc-6c1b7f451857.jpeg)\n\nEven though we observed this object well into its decline, the start of it still has a rise more than 20 days long, so it is a good candidate.\n\nRemember that the size of a 20-day period will change depending on the number of observations, so you need to look at the axis. \n"",""T1.help"":""Here are some examples of what a faint, fuzzy galaxy hosting a superluminous supernova looks like (Please note that the supernova is not visible in these images. The cross-hair marks where the candidate supernova was detected):\n\n![ZTF19abxekxi-image.png](https://panoptes-uploads.zooniverse.org/production/project_attached_image/928be57f-844b-4947-b30f-28e70dfeb365.png)\n\n![ZTF19abpbopt-image.png](https://panoptes-uploads.zooniverse.org/production/project_attached_image/1f125022-f2f8-411b-a1c4-b1d825d06d9f.png)"",""T0.question"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""T1.question"":""Is the cross-hair in the image close to a faint, fuzzy galaxy?"",""T0.answers.0.label"":""Yes"",""T0.answers.1.label"":""No"",""T1.answers.0.label"":""Yes"",""T1.answers.1.label"":""No""}",53 +10,Superluminous Supernovae,14,true,105171,false,false,false,en,T0,,9056,"{""T0"":{""help"":""T0.help"",""type"":""single"",""answers"":[{""next"":""T1"",""label"":""T0.answers.0.label""},{""label"":""T0.answers.1.label""}],""question"":""T0.question""},""T1"":{""help"":""T1.help"",""type"":""single"",""answers"":[{""label"":""T1.answers.0.label""},{""label"":""T1.answers.1.label""}],""question"":""T1.question""}}","{""options"":{""count"":10},""criteria"":""classification_count""}",{},"{""T0.help"":""The lightcurve is the plot showing brightness over time. The numbers on the x-axis are in days. Look to see if the brightness has been increasing over a period of more than 20 days.\n\nThe blue diamonds represent how bright the supernova is in blue light (at around 450 nanometres)and red circles show how bright it is in red light (about 610 nanometres). Often supernovae start blue and\nbecome red as they cool.\n\nHere are some examples of lightcurves that have been rising for over 20 days:\n\n![ZTF20abobpcb_light_curve.jpeg](https://panoptes-uploads.zooniverse.org/production/project_attached_image/d04087a4-ea8d-48f2-b297-401a58a091dd.jpeg)\n\n![ZTF20aadcbvz_light_curve.jpeg](https://panoptes-uploads.zooniverse.org/production/project_attached_image/a8dde630-fcc6-4577-afbc-6c1b7f451857.jpeg)\n\nEven though we observed this object well into its decline, the start of it still has a rise more than 20 days long, so it is a good candidate.\n\nRemember that the size of a 20-day period will change depending on the number of observations, so you need to look at the axis. \n"",""T1.help"":""Here are some examples of what a faint, fuzzy galaxy hosting a superluminous supernova looks like (Please note that the supernova is not visible in these images. The cross-hair marks where the candidate supernova was detected):\n\n![ZTF19abxekxi-image.png](https://panoptes-uploads.zooniverse.org/production/project_attached_image/928be57f-844b-4947-b30f-28e70dfeb365.png)\n\n![ZTF19abpbopt-image.png](https://panoptes-uploads.zooniverse.org/production/project_attached_image/1f125022-f2f8-411b-a1c4-b1d825d06d9f.png)"",""T0.question"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""T1.question"":""Is the cross-hair in the image close to a faint, fuzzy galaxy?"",""T0.answers.0.label"":""Yes"",""T0.answers.1.label"":""No"",""T1.answers.0.label"":""Yes"",""T1.answers.1.label"":""No""}",54 +10,Superluminous Supernovae,14,true,105171,false,false,false,en,T0,,9056,"{""T0"":{""help"":""T0.help"",""type"":""single"",""answers"":[{""next"":""T1"",""label"":""T0.answers.0.label""},{""label"":""T0.answers.1.label""}],""question"":""T0.question""},""T1"":{""help"":""T1.help"",""type"":""single"",""answers"":[{""label"":""T1.answers.0.label""},{""label"":""T1.answers.1.label""}],""question"":""T1.question""}}","{""options"":{""count"":10},""criteria"":""classification_count""}",{},"{""T0.help"":""The lightcurve is the plot showing brightness over time. The numbers on the x-axis are in days. Look to see if the brightness has been increasing over a period of more than 20 days.\n\nThe blue diamonds represent how bright the supernova is in blue light (at around 450 nanometres)and red circles show how bright it is in red light (about 610 nanometres). Often supernovae start blue and become red as they cool.\n\nHere are some examples of lightcurves that have been rising for over 20 days:\n\n![ZTF20abobpcb_light_curve.jpeg](https://panoptes-uploads.zooniverse.org/production/project_attached_image/d04087a4-ea8d-48f2-b297-401a58a091dd.jpeg)\n\n![ZTF20aadcbvz_light_curve.jpeg](https://panoptes-uploads.zooniverse.org/production/project_attached_image/a8dde630-fcc6-4577-afbc-6c1b7f451857.jpeg)\n\nEven though we observed this object well into its decline, the start of it still has a rise more than 20 days long, so it is a good candidate.\n\nRemember that the size of a 20-day period will change depending on the number of observations, so you need to look at the axis. \n"",""T1.help"":""Here are some examples of what a faint, fuzzy galaxy hosting a superluminous supernova looks like (Please note that the supernova is not visible in these images. The cross-hair marks where the candidate supernova was detected):\n\n![ZTF19abxekxi-image.png](https://panoptes-uploads.zooniverse.org/production/project_attached_image/928be57f-844b-4947-b30f-28e70dfeb365.png)\n\n![ZTF19abpbopt-image.png](https://panoptes-uploads.zooniverse.org/production/project_attached_image/1f125022-f2f8-411b-a1c4-b1d825d06d9f.png)"",""T0.question"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""T1.question"":""Is the cross-hair in the image close to a faint, fuzzy galaxy?"",""T0.answers.0.label"":""Yes"",""T0.answers.1.label"":""No"",""T1.answers.0.label"":""Yes"",""T1.answers.1.label"":""No""}",55 +10,Superluminous Supernovae,15,true,105171,false,false,false,en,T0,,9056,"{""T0"":{""help"":""T0.help"",""type"":""single"",""answers"":[{""next"":""T1"",""label"":""T0.answers.0.label""},{""label"":""T0.answers.1.label""}],""question"":""T0.question"",""required"":true},""T1"":{""help"":""T1.help"",""type"":""single"",""answers"":[{""label"":""T1.answers.0.label""},{""label"":""T1.answers.1.label""}],""question"":""T1.question""}}","{""options"":{""count"":10},""criteria"":""classification_count""}",{},"{""T0.help"":""The lightcurve is the plot showing brightness over time. The numbers on the x-axis are in days. Look to see if the brightness has been increasing over a period of more than 20 days.\n\nThe blue diamonds represent how bright the supernova is in blue light (at around 450 nanometres)and red circles show how bright it is in red light (about 610 nanometres). Often supernovae start blue and become red as they cool.\n\nHere are some examples of lightcurves that have been rising for over 20 days:\n\n![ZTF20abobpcb_light_curve.jpeg](https://panoptes-uploads.zooniverse.org/production/project_attached_image/d04087a4-ea8d-48f2-b297-401a58a091dd.jpeg)\n\n![ZTF20aadcbvz_light_curve.jpeg](https://panoptes-uploads.zooniverse.org/production/project_attached_image/a8dde630-fcc6-4577-afbc-6c1b7f451857.jpeg)\n\nEven though we observed this object well into its decline, the start of it still has a rise more than 20 days long, so it is a good candidate.\n\nRemember that the size of a 20-day period will change depending on the number of observations, so you need to look at the axis. \n"",""T1.help"":""Here are some examples of what a faint, fuzzy galaxy hosting a superluminous supernova looks like (Please note that the supernova is not visible in these images. The cross-hair marks where the candidate supernova was detected):\n\n![ZTF19abxekxi-image.png](https://panoptes-uploads.zooniverse.org/production/project_attached_image/928be57f-844b-4947-b30f-28e70dfeb365.png)\n\n![ZTF19abpbopt-image.png](https://panoptes-uploads.zooniverse.org/production/project_attached_image/1f125022-f2f8-411b-a1c4-b1d825d06d9f.png)"",""T0.question"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""T1.question"":""Is the cross-hair in the image close to a faint, fuzzy galaxy?"",""T0.answers.0.label"":""Yes"",""T0.answers.1.label"":""No"",""T1.answers.0.label"":""Yes"",""T1.answers.1.label"":""No""}",55 +10,Superluminous Supernovae,16,true,105171,false,false,false,en,T0,,9056,"{""T0"":{""help"":""T0.help"",""type"":""single"",""answers"":[{""next"":""T1"",""label"":""T0.answers.0.label""},{""label"":""T0.answers.1.label""}],""question"":""T0.question"",""required"":true},""T1"":{""help"":""T1.help"",""type"":""single"",""answers"":[{""label"":""T1.answers.0.label""},{""label"":""T1.answers.1.label""}],""question"":""T1.question"",""required"":true}}","{""options"":{""count"":10},""criteria"":""classification_count""}",{},"{""T0.help"":""The lightcurve is the plot showing brightness over time. The numbers on the x-axis are in days. Look to see if the brightness has been increasing over a period of more than 20 days.\n\nThe blue diamonds represent how bright the supernova is in blue light (at around 450 nanometres)and red circles show how bright it is in red light (about 610 nanometres). Often supernovae start blue and become red as they cool.\n\nHere are some examples of lightcurves that have been rising for over 20 days:\n\n![ZTF20abobpcb_light_curve.jpeg](https://panoptes-uploads.zooniverse.org/production/project_attached_image/d04087a4-ea8d-48f2-b297-401a58a091dd.jpeg)\n\n![ZTF20aadcbvz_light_curve.jpeg](https://panoptes-uploads.zooniverse.org/production/project_attached_image/a8dde630-fcc6-4577-afbc-6c1b7f451857.jpeg)\n\nEven though we observed this object well into its decline, the start of it still has a rise more than 20 days long, so it is a good candidate.\n\nRemember that the size of a 20-day period will change depending on the number of observations, so you need to look at the axis. \n"",""T1.help"":""Here are some examples of what a faint, fuzzy galaxy hosting a superluminous supernova looks like (Please note that the supernova is not visible in these images. The cross-hair marks where the candidate supernova was detected):\n\n![ZTF19abxekxi-image.png](https://panoptes-uploads.zooniverse.org/production/project_attached_image/928be57f-844b-4947-b30f-28e70dfeb365.png)\n\n![ZTF19abpbopt-image.png](https://panoptes-uploads.zooniverse.org/production/project_attached_image/1f125022-f2f8-411b-a1c4-b1d825d06d9f.png)"",""T0.question"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""T1.question"":""Is the cross-hair in the image close to a faint, fuzzy galaxy?"",""T0.answers.0.label"":""Yes"",""T0.answers.1.label"":""No"",""T1.answers.0.label"":""Yes"",""T1.answers.1.label"":""No""}",55 diff --git a/pyproject.toml b/pyproject.toml index a5be1a7c..28567368 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,8 @@ dependencies = [ [project.optional-dependencies] online = [ + "azure-identity>=1,<2", + "azure-storage-blob>=12,<13", "celery>=5.3,<5.4", "redis>=5,<6", "flower>2,<3", From 71528fe43be6e5043f022c4c600bd1308b5a110f Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Wed, 22 May 2024 16:06:37 -0500 Subject: [PATCH 26/37] less flake8y --- panoptes_aggregation/batch_aggregation.py | 4 +++- .../tests/batch_aggregation/test_batch_aggregation.py | 11 ++++++----- .../tests/router_tests/test_routes.py | 1 + 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/panoptes_aggregation/batch_aggregation.py b/panoptes_aggregation/batch_aggregation.py index 8db4e574..bf7f3c96 100644 --- a/panoptes_aggregation/batch_aggregation.py +++ b/panoptes_aggregation/batch_aggregation.py @@ -19,12 +19,13 @@ celery.conf.broker_url = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379") celery.conf.result_backend = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379") + @celery.task(name="run_aggregation") def run_aggregation(project_id, workflow_id, user_id): ba = BatchAggregator(project_id, workflow_id, user_id) ba.save_exports() - wf_df = ba.process_wf_export(ba.wf_csv) + ba.process_wf_export(ba.wf_csv) cls_df = ba.process_cls_export(ba.cls_csv) extractor_config = workflow_extractor_config(ba.tasks) @@ -51,6 +52,7 @@ def run_aggregation(project_id, workflow_id, user_id): # hit up panoptes, let em know you're done + class BatchAggregator: """ Bunch of stuff to manage a batch aggregation run diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index 3e79914b..56ee9e08 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -1,11 +1,12 @@ import unittest -from unittest.mock import patch, Mock, MagicMock, call +from unittest.mock import patch, MagicMock, call from panoptes_aggregation.scripts import batch_utils from panoptes_aggregation.batch_aggregation import run_aggregation from panoptes_aggregation import batch_aggregation as batch_agg -wf_export = f'panoptes_aggregation/tests/batch_aggregation/wf_export.csv' -cls_export = f'panoptes_aggregation/tests/batch_aggregation/cls_export.csv' +wf_export = 'panoptes_aggregation/tests/batch_aggregation/wf_export.csv' +cls_export = 'panoptes_aggregation/tests/batch_aggregation/cls_export.csv' + @patch("panoptes_aggregation.batch_aggregation.BatchAggregator._connect_api_client", new=MagicMock()) class TestBatchAggregation(unittest.TestCase): @@ -16,7 +17,7 @@ def test_run_aggregation(self, mock_aggregator, mock_wf_ext_conf): mock_aggregator.process_cls_export.return_value = MagicMock() mock_df = MagicMock() - test_extracts = { 'question_extractor': mock_df } + test_extracts = {'question_extractor': mock_df} batch_utils.batch_extract = MagicMock(return_value=test_extracts) mock_reducer = MagicMock() batch_utils.batch_reduce = mock_reducer @@ -49,7 +50,7 @@ def test_run_aggregation(self, mock_aggregator, mock_wf_ext_conf): @patch("panoptes_aggregation.batch_aggregation.Panoptes.connect") def test_save_exports(self, mock_client, mock_project, mock_workflow, mock_mkdir): # Test that Panoptes calls are made and files are saved - csv_dict = {'media': [ {'src': 'http://zooniverse.org/123.csv'} ] } + csv_dict = {'media': [{'src': 'http://zooniverse.org/123.csv'}]} mock_project.return_value.describe_export.return_value = csv_dict mock_workflow.return_value.describe_export.return_value = csv_dict ba = batch_agg.BatchAggregator(1, 10, 100) diff --git a/panoptes_aggregation/tests/router_tests/test_routes.py b/panoptes_aggregation/tests/router_tests/test_routes.py index 5c81e2be..70db0aa5 100644 --- a/panoptes_aggregation/tests/router_tests/test_routes.py +++ b/panoptes_aggregation/tests/router_tests/test_routes.py @@ -70,6 +70,7 @@ def test_one_running_reducer_route(self): # Override json.dumps() for this test so it doesn't try to jsonify the mock import json + @patch("panoptes_aggregation.batch_aggregation.json.dumps", return_value=json.dumps({'project_id': 1, 'workflow_id': 10, 'user_id': 100, 'task_id': 'asdf'})) @patch("panoptes_aggregation.batch_aggregation.run_aggregation.delay") def test_run_aggregation_route(self, mocked_task, mocked_json): From 1f8b51ffff22b0123299283c1e9a5bf8690c946e Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 28 May 2024 18:29:30 -0500 Subject: [PATCH 27/37] Add final POST message to Panoptes --- panoptes_aggregation/batch_aggregation.py | 18 ++++++++++++++++++ .../test_batch_aggregation.py | 6 ++++++ 2 files changed, 24 insertions(+) diff --git a/panoptes_aggregation/batch_aggregation.py b/panoptes_aggregation/batch_aggregation.py index bf7f3c96..fe12ab42 100644 --- a/panoptes_aggregation/batch_aggregation.py +++ b/panoptes_aggregation/batch_aggregation.py @@ -51,6 +51,9 @@ def run_aggregation(project_id, workflow_id, user_id): ba.upload_files() # hit up panoptes, let em know you're done + # This could catch PanoptesAPIException, but what to do if it fails? + ba.create_run_in_panoptes() + class BatchAggregator: @@ -115,6 +118,21 @@ def upload_files(self): zipfile = make_archive(f'tmp/{self.id}', 'zip', self.output_path) self.upload_file_to_storage(self.id, zipfile) + def create_run_in_panoptes(self): + Panoptes.client().post( + '/aggregations/', + json={ + 'aggregations': { + 'uuid': self.id, + 'status': 'completed', + 'links': { + 'workflow': self.workflow_id, + 'user': self.user_id + } + } + } + ) + def _generate_uuid(self): self.id = uuid.uuid4().hex diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index 56ee9e08..e7b4efe9 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -108,6 +108,12 @@ def test_upload_file_to_storage(self): ba.upload_file_to_storage('container', cls_export) mock_client.upload_blob.assert_called_once + @patch("panoptes_aggregation.batch_aggregation.Panoptes.post") + def test_create_run_in_panoptes(self, mock_poster): + ba = batch_agg.BatchAggregator(1, 10, 100) + ba.create_run_in_panoptes() + mock_poster.assert_called_with('/aggregations/', json={ 'aggregations': { 'uuid': ba.id, 'status': 'completed', 'links': { 'workflow': 10, 'user': 100 } } }) + @patch("panoptes_aggregation.batch_aggregation.BlobServiceClient") def test_connect_blob_storage(self, mock_client): ba = batch_agg.BatchAggregator(1, 10, 100) From db9591341549fd60c1ed0348af477b672a51f18f Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 28 May 2024 19:41:01 -0500 Subject: [PATCH 28/37] Flake --- panoptes_aggregation/batch_aggregation.py | 1 - .../tests/batch_aggregation/test_batch_aggregation.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/panoptes_aggregation/batch_aggregation.py b/panoptes_aggregation/batch_aggregation.py index fe12ab42..1554b87c 100644 --- a/panoptes_aggregation/batch_aggregation.py +++ b/panoptes_aggregation/batch_aggregation.py @@ -55,7 +55,6 @@ def run_aggregation(project_id, workflow_id, user_id): ba.create_run_in_panoptes() - class BatchAggregator: """ Bunch of stuff to manage a batch aggregation run diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index e7b4efe9..67f6dd7f 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -112,7 +112,7 @@ def test_upload_file_to_storage(self): def test_create_run_in_panoptes(self, mock_poster): ba = batch_agg.BatchAggregator(1, 10, 100) ba.create_run_in_panoptes() - mock_poster.assert_called_with('/aggregations/', json={ 'aggregations': { 'uuid': ba.id, 'status': 'completed', 'links': { 'workflow': 10, 'user': 100 } } }) + mock_poster.assert_called_with('/aggregations/', json={'aggregations': {'uuid': ba.id, 'status': 'completed', 'links': {'workflow': 10, 'user': 100 }}}) @patch("panoptes_aggregation.batch_aggregation.BlobServiceClient") def test_connect_blob_storage(self, mock_client): From d4635b9a2e4cc2a85ef88e134489de5610c082ca Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 28 May 2024 19:41:58 -0500 Subject: [PATCH 29/37] flake --- .../tests/batch_aggregation/test_batch_aggregation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index 67f6dd7f..fab09ac1 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -112,7 +112,7 @@ def test_upload_file_to_storage(self): def test_create_run_in_panoptes(self, mock_poster): ba = batch_agg.BatchAggregator(1, 10, 100) ba.create_run_in_panoptes() - mock_poster.assert_called_with('/aggregations/', json={'aggregations': {'uuid': ba.id, 'status': 'completed', 'links': {'workflow': 10, 'user': 100 }}}) + mock_poster.assert_called_with('/aggregations/', json={'aggregations': {'uuid': ba.id, 'status': 'completed', 'links': {'workflow': 10, 'user': 100}}}) @patch("panoptes_aggregation.batch_aggregation.BlobServiceClient") def test_connect_blob_storage(self, mock_client): From 194b0ae3b8611f95ebbf4c9c885adbb832a36227 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 28 May 2024 20:41:03 -0500 Subject: [PATCH 30/37] Pull etag before atempting update --- panoptes_aggregation/batch_aggregation.py | 19 ++++++++++--------- .../test_batch_aggregation.py | 11 +++++++---- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/panoptes_aggregation/batch_aggregation.py b/panoptes_aggregation/batch_aggregation.py index 1554b87c..fe78edb0 100644 --- a/panoptes_aggregation/batch_aggregation.py +++ b/panoptes_aggregation/batch_aggregation.py @@ -50,9 +50,8 @@ def run_aggregation(project_id, workflow_id, user_id): reduced_data[reducer].to_csv(filename, mode='a') ba.upload_files() - # hit up panoptes, let em know you're done # This could catch PanoptesAPIException, but what to do if it fails? - ba.create_run_in_panoptes() + ba.update_panoptes() class BatchAggregator: @@ -117,17 +116,19 @@ def upload_files(self): zipfile = make_archive(f'tmp/{self.id}', 'zip', self.output_path) self.upload_file_to_storage(self.id, zipfile) - def create_run_in_panoptes(self): - Panoptes.client().post( + def update_panoptes(self): + # An Aggregation class can be added to the python client to avoid doing this manually + params = {'workflow_id': self.workflow_id, 'user_id': self.user_id} + response = Panoptes.client().get('/aggregations/', params=params) + fresh_etag = response[1] + + Panoptes.client().put( '/aggregations/', + etag=fresh_etag, json={ 'aggregations': { 'uuid': self.id, - 'status': 'completed', - 'links': { - 'workflow': self.workflow_id, - 'user': self.user_id - } + 'status': 'completed' } } ) diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index fab09ac1..e6c8c431 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -108,11 +108,14 @@ def test_upload_file_to_storage(self): ba.upload_file_to_storage('container', cls_export) mock_client.upload_blob.assert_called_once - @patch("panoptes_aggregation.batch_aggregation.Panoptes.post") - def test_create_run_in_panoptes(self, mock_poster): + @patch("panoptes_aggregation.batch_aggregation.Panoptes.put") + @patch("panoptes_aggregation.batch_aggregation.Panoptes.get") + def test_update_panoptes(self, mock_get, mock_put): ba = batch_agg.BatchAggregator(1, 10, 100) - ba.create_run_in_panoptes() - mock_poster.assert_called_with('/aggregations/', json={'aggregations': {'uuid': ba.id, 'status': 'completed', 'links': {'workflow': 10, 'user': 100}}}) + mock_get.return_value = ({}, 'thisisanetag') + ba.update_panoptes() + mock_get.assert_called_with('/aggregations/', params={'workflow_id': 10, 'user_id': 100}) + mock_put.assert_called_with('/aggregations/', etag='thisisanetag', json={'aggregations': {'uuid': ba.id, 'status': 'completed'}}) @patch("panoptes_aggregation.batch_aggregation.BlobServiceClient") def test_connect_blob_storage(self, mock_client): From 55fec657c45f745412c77aaa004a82583969e1ed Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Mon, 3 Jun 2024 15:47:04 -0500 Subject: [PATCH 31/37] Remove unnecessary mocks --- .../tests/batch_aggregation/test_batch_aggregation.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index e6c8c431..fe2c90e7 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -13,9 +13,6 @@ class TestBatchAggregation(unittest.TestCase): @patch("panoptes_aggregation.batch_aggregation.workflow_extractor_config") @patch("panoptes_aggregation.batch_aggregation.BatchAggregator") def test_run_aggregation(self, mock_aggregator, mock_wf_ext_conf): - mock_aggregator.process_wf_export.return_value = MagicMock() - mock_aggregator.process_cls_export.return_value = MagicMock() - mock_df = MagicMock() test_extracts = {'question_extractor': mock_df} batch_utils.batch_extract = MagicMock(return_value=test_extracts) From 5285b23cda20786bf99497a292259076024cd131 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Mon, 3 Jun 2024 19:10:01 -0500 Subject: [PATCH 32/37] Assert result set, not method called --- .../tests/batch_aggregation/test_batch_aggregation.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index fe2c90e7..c5079ed4 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -44,8 +44,7 @@ def test_run_aggregation(self, mock_aggregator, mock_wf_ext_conf): @patch("panoptes_aggregation.batch_aggregation.os.mkdir") @patch("panoptes_aggregation.batch_aggregation.Workflow") @patch("panoptes_aggregation.batch_aggregation.Project") - @patch("panoptes_aggregation.batch_aggregation.Panoptes.connect") - def test_save_exports(self, mock_client, mock_project, mock_workflow, mock_mkdir): + def test_save_exports(self, mock_project, mock_workflow, mock_mkdir): # Test that Panoptes calls are made and files are saved csv_dict = {'media': [{'src': 'http://zooniverse.org/123.csv'}]} mock_project.return_value.describe_export.return_value = csv_dict @@ -58,11 +57,7 @@ def test_save_exports(self, mock_client, mock_project, mock_workflow, mock_mkdir response = ba.save_exports() - # Why do these mocked methods called in __init__ not get counted as called? - # They are def getting called as the attributes are set - # mock_uuidgen.assert_called_once() - # mock_client.assert_called_once() - + assert ba.id is not None self.assertEqual(response, expected_response) mock_mkdir.assert_called_once() mock_project.assert_called_once_with(1) From 8b3db20baa165292c64f66d4d221deb86be61b80 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Mon, 3 Jun 2024 19:16:44 -0500 Subject: [PATCH 33/37] clean up spec mocks --- .../tests/batch_aggregation/test_batch_aggregation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index c5079ed4..5e4d23de 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -51,8 +51,6 @@ def test_save_exports(self, mock_project, mock_workflow, mock_mkdir): mock_workflow.return_value.describe_export.return_value = csv_dict ba = batch_agg.BatchAggregator(1, 10, 100) batch_agg.BatchAggregator._download_export = MagicMock(side_effect=['./cls_export.csv', './wf_export.csv']) - mock_uuidgen = MagicMock(side_effect=ba._generate_uuid()) - ba._generate_uuid = mock_uuidgen expected_response = {'classifications': 'tmp/10/10_cls_export.csv', 'workflows': 'tmp/10/10_workflow_export.csv'} response = ba.save_exports() From c2ec0ceca25457f7f96d64e1fa534aef750d4c8a Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 4 Jun 2024 17:47:05 -0500 Subject: [PATCH 34/37] Add permissions checking, fix some specs, refactor Panoptes update --- panoptes_aggregation/batch_aggregation.py | 41 ++++++---- .../test_batch_aggregation.py | 80 ++++++++++++++----- 2 files changed, 87 insertions(+), 34 deletions(-) diff --git a/panoptes_aggregation/batch_aggregation.py b/panoptes_aggregation/batch_aggregation.py index fe78edb0..c3d26139 100644 --- a/panoptes_aggregation/batch_aggregation.py +++ b/panoptes_aggregation/batch_aggregation.py @@ -2,6 +2,7 @@ import json import pandas as pd import os +import sys import urllib3 from shutil import make_archive import uuid @@ -12,9 +13,6 @@ from panoptes_aggregation.workflow_config import workflow_extractor_config from panoptes_aggregation.scripts import batch_utils -import logging -panoptes_client_logger = logging.getLogger('panoptes_client').setLevel(logging.ERROR) - celery = Celery(__name__) celery.conf.broker_url = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379") celery.conf.result_backend = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379") @@ -23,6 +21,12 @@ @celery.task(name="run_aggregation") def run_aggregation(project_id, workflow_id, user_id): ba = BatchAggregator(project_id, workflow_id, user_id) + + if not ba.check_permission(): + print(f'Batch Aggregation: Unauthorized attempt by user {user_id} to aggregate workflow {workflow_id}') + # Exit the task gracefully without retrying or erroring + sys.exit() + ba.save_exports() ba.process_wf_export(ba.wf_csv) @@ -48,11 +52,16 @@ def run_aggregation(project_id, workflow_id, user_id): reduced_data[reducer] = batch_utils.batch_reduce(extract_df, reducer_config) filename = f'{ba.output_path}/{ba.workflow_id}_reductions.csv' reduced_data[reducer].to_csv(filename, mode='a') + + # Upload zip & reduction files to blob storage ba.upload_files() # This could catch PanoptesAPIException, but what to do if it fails? - ba.update_panoptes() + success_attrs = {'uuid': ba.id, 'status': 'completed'} + ba.update_panoptes(success_attrs) + # STDOUT messages get printed to kubernetes logs + print(f'Batch Aggregation: Run successful for workflow {workflow_id} by user {user_id}') class BatchAggregator: """ @@ -116,23 +125,27 @@ def upload_files(self): zipfile = make_archive(f'tmp/{self.id}', 'zip', self.output_path) self.upload_file_to_storage(self.id, zipfile) - def update_panoptes(self): + def update_panoptes(self, body_attributes): # An Aggregation class can be added to the python client to avoid doing this manually - params = {'workflow_id': self.workflow_id, 'user_id': self.user_id} - response = Panoptes.client().get('/aggregations/', params=params) + params = {'workflow_id': self.workflow_id} + response = Panoptes.client().get('/aggregations', params=params) + agg_id = response[0]['aggregations'][0]['id'] fresh_etag = response[1] Panoptes.client().put( - '/aggregations/', + f'/aggregations/{agg_id}', etag=fresh_etag, - json={ - 'aggregations': { - 'uuid': self.id, - 'status': 'completed' - } - } + json={'aggregations': body_attributes} ) + def check_permission(self): + project = Project.find(self.project_id) + permission = False + for user in project.collaborators(): + if user.id == self.user_id: + permission = True + return permission + def _generate_uuid(self): self.id = uuid.uuid4().hex diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index 5e4d23de..d2bd8b42 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -10,9 +10,21 @@ @patch("panoptes_aggregation.batch_aggregation.BatchAggregator._connect_api_client", new=MagicMock()) class TestBatchAggregation(unittest.TestCase): + @patch("panoptes_aggregation.batch_aggregation.BatchAggregator") + def test_run_aggregation_permission_failure(self, mock_aggregator): + mock_aggregator_instance = mock_aggregator.return_value + mock_aggregator_instance.check_permission.return_value = False + + with self.assertRaises(SystemExit) as leaver: + run_aggregation(1, 10, 100) + mock_aggregator_instance.update_panoptes.assert_not_called() + @patch("panoptes_aggregation.batch_aggregation.workflow_extractor_config") @patch("panoptes_aggregation.batch_aggregation.BatchAggregator") - def test_run_aggregation(self, mock_aggregator, mock_wf_ext_conf): + def test_run_aggregation_success(self, mock_aggregator, mock_wf_ext_conf): + mock_aggregator_instance = mock_aggregator.return_value + mock_aggregator_instance.check_permission.return_value = True + mock_df = MagicMock() test_extracts = {'question_extractor': mock_df} batch_utils.batch_extract = MagicMock(return_value=test_extracts) @@ -20,26 +32,15 @@ def test_run_aggregation(self, mock_aggregator, mock_wf_ext_conf): batch_utils.batch_reduce = mock_reducer run_aggregation(1, 10, 100) + mock_aggregator_instance.check_permission.assert_called_once() mock_aggregator.assert_called_once_with(1, 10, 100) mock_wf_ext_conf.assert_called_once() batch_utils.batch_extract.assert_called_once() mock_df.to_csv.assert_called() batch_utils.batch_reduce.assert_called() self.assertEqual(mock_reducer.call_count, 2) - - # The reducer's call list includes subsequent calls to to_csv, but the args are methods called on the mock - # rather than use the set values i.e. "" - # mock_aggregator.workflow_id = '10' - # mock_aggregator.output_path = 'tmp/10' - # mock_reducer.assert_has_calls([ - # call(mock_df, {'reducer_config': {'question_reducer': {}}}), - # call().to_csv('tmp/10/10_reducers.csv', mode='a'), - # call(mock_df, {'reducer_config': {'question_consensus_reducer': {}}}), - # call().to_csv('tmp/10/10_reducers.csv', mode='a'), - # ]) - - # How do I test the specific instance of BatchAggregator rather than the mocked class? - # mock_aggregator.upload_files.assert_called_once() + mock_aggregator_instance.upload_files.assert_called_once() + mock_aggregator_instance.update_panoptes.assert_called_once() @patch("panoptes_aggregation.batch_aggregation.os.mkdir") @patch("panoptes_aggregation.batch_aggregation.Workflow") @@ -98,14 +99,53 @@ def test_upload_file_to_storage(self): ba.upload_file_to_storage('container', cls_export) mock_client.upload_blob.assert_called_once + @patch("panoptes_aggregation.batch_aggregation.Project") + def test_check_permission_success(self, mock_project): + mock_user = MagicMock() + mock_user.id = 100 + mock_project.find().collaborators.return_value = [mock_user] + + ba = batch_agg.BatchAggregator(1, 10, 100) + ba.check_permission() + mock_project.find.assert_called_with(1) + mock_project.find().collaborators.assert_called() + self.assertEqual(ba.check_permission(), True) + + @patch("panoptes_aggregation.batch_aggregation.Project") + def test_check_permission_failure(self, mock_project): + mock_user = MagicMock() + + # List of collaborators does not include initiating user + mock_user.id = 999 + mock_project.find().collaborators.return_value = [mock_user] + + ba = batch_agg.BatchAggregator(1, 10, 100) + ba.update_panoptes = MagicMock() + ba.check_permission() + mock_project.find.assert_called_with(1) + mock_project.find().collaborators.assert_called() + self.assertEqual(ba.check_permission(), False) + ba.update_panoptes.assert_not_called() + + @patch("panoptes_aggregation.batch_aggregation.Panoptes.put") + @patch("panoptes_aggregation.batch_aggregation.Panoptes.get") + def test_update_panoptes_success(self, mock_get, mock_put): + ba = batch_agg.BatchAggregator(1, 10, 100) + mock_get.return_value = ({'aggregations': [{'id': 5555}]}, 'thisisanetag') + body = {'uuid': ba.id, 'status': 'completed'} + ba.update_panoptes(body) + mock_get.assert_called_with('/aggregations', params={'workflow_id': 10}) + mock_put.assert_called_with('/aggregations/5555', etag='thisisanetag', json={'aggregations': body }) + @patch("panoptes_aggregation.batch_aggregation.Panoptes.put") @patch("panoptes_aggregation.batch_aggregation.Panoptes.get") - def test_update_panoptes(self, mock_get, mock_put): + def test_update_panoptes_failure(self, mock_get, mock_put): ba = batch_agg.BatchAggregator(1, 10, 100) - mock_get.return_value = ({}, 'thisisanetag') - ba.update_panoptes() - mock_get.assert_called_with('/aggregations/', params={'workflow_id': 10, 'user_id': 100}) - mock_put.assert_called_with('/aggregations/', etag='thisisanetag', json={'aggregations': {'uuid': ba.id, 'status': 'completed'}}) + mock_get.return_value = ({'aggregations': [{'id': 5555}]}, 'thisisanetag') + body = {'status': 'failure'} + ba.update_panoptes(body) + mock_get.assert_called_with('/aggregations', params={'workflow_id': 10}) + mock_put.assert_called_with('/aggregations/5555', etag='thisisanetag', json={'aggregations': body }) @patch("panoptes_aggregation.batch_aggregation.BlobServiceClient") def test_connect_blob_storage(self, mock_client): From 76221bdb8359796099908b7e8b2785f3c5c66dc2 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 4 Jun 2024 17:49:36 -0500 Subject: [PATCH 35/37] Flake --- panoptes_aggregation/batch_aggregation.py | 1 + .../tests/batch_aggregation/test_batch_aggregation.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/panoptes_aggregation/batch_aggregation.py b/panoptes_aggregation/batch_aggregation.py index c3d26139..74fa9571 100644 --- a/panoptes_aggregation/batch_aggregation.py +++ b/panoptes_aggregation/batch_aggregation.py @@ -63,6 +63,7 @@ def run_aggregation(project_id, workflow_id, user_id): # STDOUT messages get printed to kubernetes logs print(f'Batch Aggregation: Run successful for workflow {workflow_id} by user {user_id}') + class BatchAggregator: """ Bunch of stuff to manage a batch aggregation run diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index d2bd8b42..8050c359 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -15,7 +15,7 @@ def test_run_aggregation_permission_failure(self, mock_aggregator): mock_aggregator_instance = mock_aggregator.return_value mock_aggregator_instance.check_permission.return_value = False - with self.assertRaises(SystemExit) as leaver: + with self.assertRaises(SystemExit): run_aggregation(1, 10, 100) mock_aggregator_instance.update_panoptes.assert_not_called() @@ -135,7 +135,7 @@ def test_update_panoptes_success(self, mock_get, mock_put): body = {'uuid': ba.id, 'status': 'completed'} ba.update_panoptes(body) mock_get.assert_called_with('/aggregations', params={'workflow_id': 10}) - mock_put.assert_called_with('/aggregations/5555', etag='thisisanetag', json={'aggregations': body }) + mock_put.assert_called_with('/aggregations/5555', etag='thisisanetag', json={'aggregations': body}) @patch("panoptes_aggregation.batch_aggregation.Panoptes.put") @patch("panoptes_aggregation.batch_aggregation.Panoptes.get") @@ -145,7 +145,7 @@ def test_update_panoptes_failure(self, mock_get, mock_put): body = {'status': 'failure'} ba.update_panoptes(body) mock_get.assert_called_with('/aggregations', params={'workflow_id': 10}) - mock_put.assert_called_with('/aggregations/5555', etag='thisisanetag', json={'aggregations': body }) + mock_put.assert_called_with('/aggregations/5555', etag='thisisanetag', json={'aggregations': body}) @patch("panoptes_aggregation.batch_aggregation.BlobServiceClient") def test_connect_blob_storage(self, mock_client): From de7bc9997330bafd3bd8b0b1974f88c76f6c1098 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 4 Jun 2024 18:07:28 -0500 Subject: [PATCH 36/37] Use os.path for platform independence --- panoptes_aggregation/batch_aggregation.py | 15 +++++++++------ .../batch_aggregation/test_batch_aggregation.py | 5 +++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/panoptes_aggregation/batch_aggregation.py b/panoptes_aggregation/batch_aggregation.py index 74fa9571..330b11f2 100644 --- a/panoptes_aggregation/batch_aggregation.py +++ b/panoptes_aggregation/batch_aggregation.py @@ -50,7 +50,8 @@ def run_aggregation(project_id, workflow_id, user_id): # that is incompatible with the batch_utils batch_reduce method reducer_config = {'reducer_config': {reducer: {}}} reduced_data[reducer] = batch_utils.batch_reduce(extract_df, reducer_config) - filename = f'{ba.output_path}/{ba.workflow_id}_reductions.csv' + # filename = f'{ba.output_path}/{ba.workflow_id}_reductions.csv' + filename = os.path.join(ba.output_path, ba.workflow_id, '_reductions.csv') reduced_data[reducer].to_csv(filename, mode='a') # Upload zip & reduction files to blob storage @@ -77,17 +78,18 @@ def __init__(self, project_id, workflow_id, user_id): self._connect_api_client() def save_exports(self): - self.output_path = f'tmp/{self.workflow_id}' + self.output_path = os.path.join('tmp', str(self.workflow_id)) os.mkdir(self.output_path) cls_export = Workflow(self.workflow_id).describe_export('classifications') full_cls_url = cls_export['media'][0]['src'] - cls_file = f'{self.output_path}/{self.workflow_id}_cls_export.csv' + cls_file = os.path.join(self.output_path, f'{self.workflow_id}_cls_export.csv') + self._download_export(full_cls_url, cls_file) wf_export = Project(self.project_id).describe_export('workflows') full_wf_url = wf_export['media'][0]['src'] - wf_file = f'{self.output_path}/{self.workflow_id}_workflow_export.csv' + wf_file = os.path.join(self.output_path, f'{self.workflow_id}_workflow_export.csv') self._download_export(full_wf_url, wf_file) self.cls_csv = cls_file @@ -121,9 +123,10 @@ def upload_file_to_storage(self, container_name, filepath): def upload_files(self): self.connect_blob_storage() - reductions_file = f'{self.output_path}/{self.workflow_id}_reductions.csv' + reductions_file = os.path.join(self.output_path, f'{self.workflow_id}_reductions.csv') self.upload_file_to_storage(self.id, reductions_file) - zipfile = make_archive(f'tmp/{self.id}', 'zip', self.output_path) + zippath = os.path.join('tmp', self.id) + zipfile = make_archive(zippath, 'zip', self.output_path) self.upload_file_to_storage(self.id, zipfile) def update_panoptes(self, body_attributes): diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index 8050c359..d68ca476 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -1,4 +1,5 @@ import unittest +import os from unittest.mock import patch, MagicMock, call from panoptes_aggregation.scripts import batch_utils from panoptes_aggregation.batch_aggregation import run_aggregation @@ -85,8 +86,8 @@ def test_upload_files(self, archive_mock, client_mock): archive_mock.return_value = zipped_mock ba = batch_agg.BatchAggregator(1, 10, 100) ba.upload_file_to_storage = MagicMock() - ba.output_path = 'tmp/10' - reductions_file = 'tmp/10/10_reductions.csv' + ba.output_path = os.path.join('tmp', '10') + reductions_file = os.path.join('tmp', '10', '10_reductions.csv') ba.upload_files() client_mock.assert_called_once() archive_mock.assert_called_once() From ad9682bec94e6929591741cfeedf9e61dfde9c83 Mon Sep 17 00:00:00 2001 From: Zach Wolfenbarger Date: Tue, 4 Jun 2024 18:23:07 -0500 Subject: [PATCH 37/37] Undeleting deploy template --- kubernetes/deployment-production.tmpl | 120 ++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 kubernetes/deployment-production.tmpl diff --git a/kubernetes/deployment-production.tmpl b/kubernetes/deployment-production.tmpl new file mode 100644 index 00000000..c54084fe --- /dev/null +++ b/kubernetes/deployment-production.tmpl @@ -0,0 +1,120 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aggregation-caesar + labels: + app: aggregation-caesar +spec: + selector: + matchLabels: + app: aggregation-caesar + template: + metadata: + labels: + app: aggregation-caesar + spec: + containers: + - name: aggregation-caesar-app + image: ghcr.io/zooniverse/aggregation-for-caesar:__IMAGE_TAG__ + ports: + - containerPort: 80 + resources: + requests: + memory: "500Mi" + cpu: "500m" + limits: + memory: "1000Mi" + cpu: "1000m" + startupProbe: + httpGet: + path: / + port: 80 + # wait 6 * 10 seconds(default periodSeconds) for the container to start + # after this succeeds once the liveness probe takes over + failureThreshold: 6 + livenessProbe: + httpGet: + path: / + port: 80 + # allow a longer response time than 1s + timeoutSeconds: 10 + readinessProbe: + httpGet: + path: / + port: 80 + # start checking for readiness after 20s (to serve traffic) + initialDelaySeconds: 20 + # allow a longer response time than 1s + timeoutSeconds: 10 + env: + - name: FLASK_ENV + value: production + - name: PANOPTES_URL + value: https://panoptes.zooniverse.org/ + - name: PANOPTES_CLIENT_ID + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: PANOPTES_CLIENT_ID + - name: PANOPTES_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: PANOPTES_CLIENT_SECRET + - name: MAST_AUTH_TOKEN + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: MAST_AUTH_TOKEN + - name: MAST_PROD_TOKEN + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: MAST_PROD_TOKEN + - name: SENTRY_DSN + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: SENTRY_DSN + - name: NEW_RELIC_LICENSE_KEY + valueFrom: + secretKeyRef: + name: aggregation-for-caesar-environment + key: NEW_RELIC_LICENSE_KEY + - name: NEW_RELIC_APP_NAME + value: 'Aggregation Caesar' +--- +apiVersion: autoscaling/v1 +kind: HorizontalPodAutoscaler +metadata: + name: aggregation-caesar +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: aggregation-caesar + minReplicas: 2 + maxReplicas: 3 + targetCPUUtilizationPercentage: 80 +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: aggregation-caesar +spec: + minAvailable: 50% + selector: + matchLabels: + app: aggregation-caesar +--- +apiVersion: v1 +kind: Service +metadata: + name: aggregation-caesar +spec: + selector: + app: aggregation-caesar + ports: + - protocol: TCP + port: 80 + targetPort: 80 \ No newline at end of file