From 5ce5dc200311300c8d22d6392dc080fc81a228a4 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Thu, 31 Aug 2023 22:44:57 +0000 Subject: [PATCH 01/40] add a devcontainer [PTV-1888] --- .devcontainer/Dockerfile | 34 +++++++++++++++++++++++++++++ .devcontainer/devcontainer.json | 37 ++++++++++++++++++++++++++++++++ .devcontainer/docker-compose.yml | 24 +++++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .devcontainer/docker-compose.yml diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000..f1e42edf --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,34 @@ +FROM python:3.11.5-slim-bookworm +# ----------------------------------------- +# We use the "stable" debian release; python does not seem to release current +# versions on LTS debian releases. + +# We don't upgrade, and we pin all os dependency versions +RUN apt-get update && \ + apt-get install -y --no-install-recommends zip=3.0-13 unzip=6.0-28 bzip2=1.0.8-5+b1 libmagic-dev=1:5.44-3 htop=3.2.2-2 wget=1.21.3-1+b2 git=1:2.39.2-1.1 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +SHELL ["/bin/bash", "-c"] + +RUN mkdir -p /app + +WORKDIR /app + +# Standard simplified python setup. +COPY ./requirements.txt /app/requirements.txt +COPY ./requirements_dev.txt /app/requirements_dev.txt +RUN python -m pip install "pip==23.2.1" && pip install -r /app/requirements.txt -r /app/requirements_dev.txt + +ENV PATH="/app:${PATH}" + +# Run as a regular user, not root. +# RUN addgroup --system kbapp && \ +# adduser --system --ingroup kbapp kbapp && \ +# chown -R kbapp:kbapp /app + +# USER kbapp + +# ENTRYPOINT [ "/app/development/tools/runner/entrypoint.sh" ] + +# CMD [ "bash" ] \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..7289d2f5 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,37 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile +{ + "name": "staging service devcontainer", + "dockerComposeFile": [ + "docker-compose.yml" + ], + // The 'service' property is the name of the service for the container that VS Code should + // use. Update this value and .devcontainer/docker-compose.yml to the real service name. + "service": "staging_service", + // "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached", + // The optional 'workspaceFolder' property is the path VS Code should open by default when + // connected. This is typically a file mount in .devcontainer/docker-compose.yml + "workspaceFolder": "/workspace", + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "tamasfe.even-better-toml", + "mhutchie.git-graph", + "ms-python.black-formatter", + "ms-python.flake8", + "mikoz.autoflake-extension" + ] + } + } + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + // Uncomment the next line to run commands after the container is created. + // "postCreateCommand": "cat /etc/os-release", + // Configure tool-specific properties. + // "customizations": {}, + // Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "devcontainer" +} \ No newline at end of file diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 00000000..7498be19 --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,24 @@ +version: '3.6' +networks: + kbase-dev: + name: kbase-dev +services: + staging_service: + build: + context: .. + dockerfile: .devcontainer/Dockerfile + + image: kbase/staging_server_devcontainer:dev + + container_name: staging_server_devcontainer + dns: 8.8.8.8 + volumes: + - ..:/workspace:cached + networks: + - kbase-dev + # Required for a devcontainer -- keeps the container running. + # Don't worry, our main interaction with the container is through the + # VSC terminal, which for a devcontainer opens a shell within the + # container. + command: /bin/sh -c "while sleep 1000; do :; done" + \ No newline at end of file From b168c680722c90f318479620160b3bca252955d6 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Thu, 31 Aug 2023 22:46:38 +0000 Subject: [PATCH 02/40] update Dockerfiles to use python 3.11.5, bullseye, and fix os dependencies [PTV-1888] python 3.11 no longer supports debian "buster". --- Dockerfile | 6 ++--- development/tools/runner/Dockerfile | 34 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 development/tools/runner/Dockerfile diff --git a/Dockerfile b/Dockerfile index c8d57bf4..7773cab7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,13 @@ -FROM python:3.11.4-slim-buster +FROM python:3.11.5-slim-bullseye # ----------------------------------------- RUN mkdir -p /kb/deployment/lib RUN apt-get update && \ - apt-get install -y --no-install-recommends zip=3.0-11+b1 unzip=6.0-23+deb10u3 bzip2=1.0.6-9.2~deb10u2 libmagic-dev=1:5.35-4+deb10u2 htop=2.2.0-1+b1 wget=1.20.1-1.1 && \ + apt-get install -y --no-install-recommends zip=3.0-13 unzip=6.0-28 bzip2=1.0.8-5+b1 libmagic-dev=1:5.44-3 htop=3.2.2-2 wget=1.21.3-1+b2 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* COPY ./requirements.txt /requirements.txt -RUN python -m pip install "pip==23.1.2" && pip install -r /requirements.txt && rm /requirements.txt +RUN python -m pip install "pip==23.2.1" && pip install -r /requirements.txt && rm /requirements.txt COPY ./globus.cfg /etc/globus.cfg RUN touch /var/log/globus.log && chmod 777 /var/log/globus.log diff --git a/development/tools/runner/Dockerfile b/development/tools/runner/Dockerfile new file mode 100644 index 00000000..30035426 --- /dev/null +++ b/development/tools/runner/Dockerfile @@ -0,0 +1,34 @@ +FROM python:3.11.5-slim-bookworm +# ----------------------------------------- +# We use the "stable" debian release; python does not seem to release current +# versions on LTS debian releases. + +# We don't upgrade, and we pin all os dependency versions +RUN apt-get update && \ + apt-get install -y --no-install-recommends zip=3.0-13 unzip=6.0-28 bzip2=1.0.8-5+b1 libmagic-dev=1:5.44-3 htop=3.2.2-2 wget=1.21.3-1+b2 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +SHELL ["/bin/bash", "-c"] + +RUN mkdir -p /app + +WORKDIR /app + +# Standard simplified python setup. +COPY ./requirements.txt /app/requirements.txt +COPY ./requirements_dev.txt /app/requirements_dev.txt +RUN python -m pip install "pip==23.2.1" && pip install -r /app/requirements.txt -r /app/requirements_dev.txt + +ENV PATH="/app:${PATH}" + +# Run as a regular user, not root. +RUN addgroup --system kbapp && \ + adduser --system --ingroup kbapp kbapp && \ + chown -R kbapp:kbapp /app + +USER kbapp + +ENTRYPOINT [ "/app/development/tools/runner/entrypoint.sh" ] + +CMD [ "bash" ] \ No newline at end of file From dee74d3eff6921f083b9feb1cf955ef48948d218 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Thu, 31 Aug 2023 22:47:42 +0000 Subject: [PATCH 03/40] add development tools [PTV-1888] --- development/docker-compose-kbase-ui.yml | 30 ++++++++++++++++ development/scripts/README.md | 35 +++++++++++++++++++ development/scripts/generate-data-files.py | 15 ++++++++ .../scripts/generate-random-strings.py | 21 +++++++++++ development/scripts/list-uploads.sh | 13 +++++++ development/scripts/run | 8 +++++ development/scripts/run-dev-server.sh | 7 ++++ development/tools/runner/docker-compose.yml | 10 ++++++ development/tools/runner/entrypoint.sh | 17 +++++++++ 9 files changed, 156 insertions(+) create mode 100644 development/docker-compose-kbase-ui.yml create mode 100644 development/scripts/README.md create mode 100644 development/scripts/generate-data-files.py create mode 100644 development/scripts/generate-random-strings.py create mode 100755 development/scripts/list-uploads.sh create mode 100755 development/scripts/run create mode 100755 development/scripts/run-dev-server.sh create mode 100644 development/tools/runner/docker-compose.yml create mode 100755 development/tools/runner/entrypoint.sh diff --git a/development/docker-compose-kbase-ui.yml b/development/docker-compose-kbase-ui.yml new file mode 100644 index 00000000..170a4036 --- /dev/null +++ b/development/docker-compose-kbase-ui.yml @@ -0,0 +1,30 @@ +# This docker compose file works with kbase-ui - specifically the kbase-dev +# network that allows proxying service requests from kbase-ui and the narrative. +version: "3.8" +networks: + kbase-dev: + name: kbase-dev + external: true +services: + staging_service: + hostname: staging_service + build: + context: . + ports: + # TODO: should be port 5000 to match other kbase services + - "3010:3000" + volumes: + - "./data:/kb/deployment/lib/src/data" + - "./staging_service:/kb/deployment/lib/staging_service" + networks: + - kbase-dev + dns: + - 8.8.8.8 + environment: + - KB_DEPLOYMENT_CONFIG=/kb/deployment/conf/deployment.cfg + - FILE_LIFETIME=90 + # - SAVE_STRATEGY=SAVE_STRATEGY_TEMP_THEN_COPY + # - SAVE_STRATEGY=SAVE_STRATEGY_SAVE_TO_DESTINATION + # - CHUNK_SIZE= + entrypoint: + ["/kb/deployment/bin/entrypoint-dev.sh"] diff --git a/development/scripts/README.md b/development/scripts/README.md new file mode 100644 index 00000000..44542f6f --- /dev/null +++ b/development/scripts/README.md @@ -0,0 +1,35 @@ +# Scripts + +## `generate-data-files.py` + +A small tool, an example function really, to generate files for manual testing of upload and download. I use it by copy/pasting into a Python REPL session in the directory in which I want to generate files, and just run the function with the required parameters. + +## `list-uploads.sh` + +Handy for listing the contents of the `./data` directory while developing the `/upload` endpoint of the server locally. It simply lists the directory every 0.5s. + +## `run` + +Runs the Docker container located in /development/tools/runner with docker compose. This is a "runner container" designed to run things inside a container which is very similar to the service container. It is used to run tests, and can be used to run code quality tools also like black and mypy (not yet used in this project.) + +The advantage of using `run` is that one does not have to arrange the precise version of Python and OS dependencies in a virtual environment. + +Here are some usages of it: + +To run mypy against the codebase: + +```shell +./development/run mypy staging_service +``` + +To run black against the codebase: + +```shell +./development/run black staging_service +``` + +To run black against the codebase and apply formatting fixes: + +```shell +./development/run black staging_service --fix +``` \ No newline at end of file diff --git a/development/scripts/generate-data-files.py b/development/scripts/generate-data-files.py new file mode 100644 index 00000000..ceebe63d --- /dev/null +++ b/development/scripts/generate-data-files.py @@ -0,0 +1,15 @@ +import io + + +def generate_null_file(size: int, name: str): + """ + Generate a file full of 0 bytes, for testing different + file size scenarios. + + e.g. generate_null_file(5100000000, "5.1g.out") + """ + with open(name, "wb") as out: + bw = io.BufferedWriter(out, 10000000) + for _ in range(size): + bw.write(b"\0") + bw.flush() diff --git a/development/scripts/generate-random-strings.py b/development/scripts/generate-random-strings.py new file mode 100644 index 00000000..ce754add --- /dev/null +++ b/development/scripts/generate-random-strings.py @@ -0,0 +1,21 @@ +""" +Generate a set of text files for testing filled with random ascii characters. +""" +import random +import string + + +def make_random_string(string_length: int) -> str: + """ + Generate a string of random ascii letters of the given length + """ + possible_letters = string.ascii_letters + return "".join(random.choice(possible_letters) for i in range(string_length)) + + +if __name__ == "__main__": + for file_length in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]: + filename = f"sample-text-file-{file_length}" + print(f"generating {filename}") + with open(f"sample-text-file-{file_length}", "w", encoding="utf-8") as out: + out.write(make_random_string(file_length)) diff --git a/development/scripts/list-uploads.sh b/development/scripts/list-uploads.sh new file mode 100755 index 00000000..10f87f8e --- /dev/null +++ b/development/scripts/list-uploads.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# +# A handy script to display the contents of the data directory in a +# terminal window, refreshing every 0.5 seconds and excluding +# any macOS .DS_Store Desktop Services files. +# +while : +do + clear + find data -type f \( ! -name ".DS_Store" \) -ls + sleep 0.5 +done \ No newline at end of file diff --git a/development/scripts/run b/development/scripts/run new file mode 100755 index 00000000..219486f2 --- /dev/null +++ b/development/scripts/run @@ -0,0 +1,8 @@ +#!/bin/sh +PROJECT_DIRECTORY=${DIR:-$PWD} +echo "Project Directory: ${PROJECT_DIRECTORY}" +docker compose \ + -f "${PROJECT_DIRECTORY}/development/tools/runner/docker-compose.yml" \ + --project-directory "${PROJECT_DIRECTORY}" \ + --project-name tools \ + run --rm runner "${@:1}" \ No newline at end of file diff --git a/development/scripts/run-dev-server.sh b/development/scripts/run-dev-server.sh new file mode 100755 index 00000000..6c2f6d65 --- /dev/null +++ b/development/scripts/run-dev-server.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +echo "Running server in development..." +docker compose \ + -f development/docker-compose-kbase-ui.yml \ + --project-directory "${PWD}" \ + run --rm staging_service \ No newline at end of file diff --git a/development/tools/runner/docker-compose.yml b/development/tools/runner/docker-compose.yml new file mode 100644 index 00000000..f1b8e1ea --- /dev/null +++ b/development/tools/runner/docker-compose.yml @@ -0,0 +1,10 @@ +version: '3.8' +services: + runner: + build: + context: . + dockerfile: development/tools/runner/Dockerfile + volumes: + - .:/app + command: + - bash diff --git a/development/tools/runner/entrypoint.sh b/development/tools/runner/entrypoint.sh new file mode 100755 index 00000000..2ba1545b --- /dev/null +++ b/development/tools/runner/entrypoint.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -e + +# Nice to set this up for all future python code being run. +export PYTHONPATH="${PWD}/src" + +# +# This execs whatever is provided as a COMMAND to the container. By default, as established +# in the image via the Dockerfile, it is scripts/start-server.sh +# This mechanism, though, allows us to run anything inside the container. We use this +# for running tests, mypy, black, etc. through docker compose. +# Note that surrounding $@ with double quote causes the expansion (which starts with +# parameter 1 not 0) to result in each parameter being retained without splitting. +# Sort of like "$@" -> "$1" "$2" "$3" .. +# +exec "$@" From 7b070cdf2ebd84e17cb33bd3bc05360b8b2bc914 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Thu, 31 Aug 2023 22:48:19 +0000 Subject: [PATCH 04/40] ignore jetbrains idea directory and temp dir [PTV-1888] being nice to developers --- .gitignore | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 00b31a50..9f564edd 100644 --- a/.gitignore +++ b/.gitignore @@ -104,4 +104,10 @@ data/ .DS_Store .virtualenvs/ test.env -run_tests_single.sh \ No newline at end of file +run_tests_single.sh +_attic + +# IDEs + +# jetbrains +.idea From d4905acbce4b5ba3bb0b8f64b7046eacdcb75f20 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Thu, 31 Aug 2023 22:49:00 +0000 Subject: [PATCH 05/40] split Python dependencies into service and development, and update them [PTV-1888] --- requirements.txt | 16 ++++++---------- requirements_dev.txt | 11 +++++++++++ 2 files changed, 17 insertions(+), 10 deletions(-) create mode 100644 requirements_dev.txt diff --git a/requirements.txt b/requirements.txt index 82291acf..974f1eb7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,18 +1,14 @@ -aiofiles==23.1.0 +aiofiles==23.2.1 +aiohttp==3.8.5 aiohttp_cors==0.7.0 -aiohttp==3.8.4 -coverage==7.2.7 defusedxml==0.7.1 frozendict==2.3.8 -globus-sdk==3.23.0 -hypothesis==6.81.1 +globus-sdk==3.28.0 +gunicorn==21.2.0 openpyxl==3.1.2 -pandas==2.0.3 -pytest-aiohttp==1.0.4 -pytest-asyncio==0.21.0 -pytest-cov==4.1.0 -pytest==7.4.0 +pandas==2.1.0 python-dotenv==1.0.0 python-magic==0.4.27 +types-openpyxl===3.1.0.17 uvloop==0.17.0 xlrd==2.0.1 # fairly out of date \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 00000000..cfd35eb6 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,11 @@ +autoflake==2.2.0 +black==23.7.0 +coverage==7.3.0 +hypothesis==6.82.7 +mypy==1.5.1 +pymarkdownlnt==0.9.12 +pylint==2.17.5 +pytest-aiohttp==1.0.4 +pytest-asyncio==0.21.1 +pytest-cov==4.1.0 +pytest==7.4.0 From a7fa5d4f207484c38c91f84261f96fc4c5caeee0 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Thu, 31 Aug 2023 22:50:58 +0000 Subject: [PATCH 06/40] fix flaky tests [PTV-1888] they were broken for a few reasons: ordering of fileutil & app client, invalid time comparisons, and an async workaround for hypothesis is no longer necessary --- tests/test_app.py | 216 +++++++++++++++++++++------------------------- 1 file changed, 97 insertions(+), 119 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index d002fd27..71a3e6ec 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -14,7 +14,6 @@ import openpyxl import pandas -import pytest from aiohttp import FormData, test_utils from hypothesis import given, settings from hypothesis import strategies as st @@ -46,22 +45,6 @@ utils.Path._META_DIR = META_DIR -def asyncgiven(**kwargs): - """alternative to hypothesis.given decorator for async""" - - def real_decorator(fn): - @given(**kwargs) - def aio_wrapper(*args, **kwargs): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - future = asyncio.wait_for(fn(*args, **kwargs), timeout=60) - loop.run_until_complete(future) - - return aio_wrapper - - return real_decorator - - def mock_auth_app(): application = app.app_factory(config) @@ -98,6 +81,10 @@ def __init__(self, base_dir=DATA_DIR): self.base_dir = base_dir def __enter__(self): + """ + Removes the base directory and contents, if it exists, then creates it so that + it provides a blank slate. + """ os.makedirs(self.base_dir, exist_ok=True) shutil.rmtree(self.base_dir) os.makedirs(self.base_dir, exist_ok=False) @@ -185,8 +172,7 @@ def test_path_sanitation(username_first, username_rest, path): assert validated.metadata_path.find("../") == -1 -@asyncgiven(txt=st.text()) - +@given(txt=st.text()) async def test_cmd(txt): with FileUtil(DATA_DIR) as fs: d = fs.make_dir("test") @@ -204,7 +190,6 @@ async def test_cmd(txt): assert md5 == expected_md5 - async def test_auth(): async with AppClient(config) as cli: resp = await cli.get("/test-auth") @@ -213,7 +198,6 @@ async def test_auth(): assert "I'm authenticated as" in text - async def test_service(): async with AppClient(config) as cli: resp = await cli.get("/test-service") @@ -222,14 +206,13 @@ async def test_service(): assert "staging service version" in text - async def test_jbi_metadata(): txt = "testing text\n" username = "testuser" jbi_metadata = '{"file_owner": "sdm", "added_date": "2013-08-12T00:21:53.844000"}' - async with AppClient(config, username) as cli: - with FileUtil() as fs: + with FileUtil() as fs: + async with AppClient(config, username) as cli: fs.make_dir(os.path.join(username, "test")) fs.make_file(os.path.join(username, "test", "test_jgi.fastq"), txt) fs.make_file( @@ -255,12 +238,11 @@ async def test_jbi_metadata(): assert res1.status == 404 - async def test_metadata(): txt = "testing text\n" username = "testuser" - async with AppClient(config, username) as cli: - with FileUtil() as fs: + with FileUtil() as fs: + async with AppClient(config, username) as cli: fs.make_dir(os.path.join(username, "test")) fs.make_file(os.path.join(username, "test", "test_file_1"), txt) res1 = await cli.get( @@ -356,12 +338,11 @@ async def test_metadata(): assert not json.get("isFolder") - async def test_define_upa(): txt = "testing text\n" username = "testuser" - async with AppClient(config, username) as cli: - with FileUtil() as fs: + with FileUtil() as fs: + async with AppClient(config, username) as cli: fs.make_dir(os.path.join(username, "test")) fs.make_file(os.path.join(username, "test", "test_file_1"), txt) # generating metadata file @@ -438,12 +419,11 @@ async def test_define_upa(): assert res6.status == 400 - async def test_mv(): txt = "testing text\n" username = "testuser" - async with AppClient(config, username) as cli: - with FileUtil() as fs: + with FileUtil() as fs: + async with AppClient(config, username) as cli: fs.make_dir(os.path.join(username, "test")) fs.make_file(os.path.join(username, "test", "test_file_1"), txt) @@ -513,12 +493,11 @@ async def test_mv(): assert res7.status == 404 - async def test_delete(): txt = "testing text\n" username = "testuser" - async with AppClient(config, username) as cli: - with FileUtil() as fs: + with FileUtil() as fs: + async with AppClient(config, username) as cli: fs.make_dir(os.path.join(username, "test")) fs.make_file(os.path.join(username, "test", "test_file_1"), txt) @@ -562,12 +541,11 @@ async def test_delete(): assert res5.status == 404 - async def test_list(): txt = "testing text" username = "testuser" - async with AppClient(config, username) as cli: - with FileUtil() as fs: + with FileUtil() as fs: + async with AppClient(config, username) as cli: fs.make_dir(os.path.join(username, "test")) fs.make_file(os.path.join(username, "test", "test_file_1"), txt) fs.make_dir(os.path.join(username, "test", "test_sub_dir")) @@ -591,7 +569,23 @@ async def test_list(): assert json[0]["isFolder"] is True assert json[0]["name"] == "test" assert json[0]["path"] == "testuser/test" - assert json[0]["mtime"] <= time.time() * 1000 + # This can fail due to the calculation of mtime, which may be have the + # precision of time.time(). That is, in my observations it may be in + # seconds. If the mtime is rounded up, the test will quite likely fail as + # the file mtime will be greater than the current time returned by + # time.time(). + # The solution applied is to round the expected time up. + # This will fail, still, on Windows, where apparently st_mtime has 2 + # second precision. + # Alternatively, the service implementation could use finer precition time + # measurement via st_mtime_ns, although that is also platform-dependent + # (i.e. it is not always nanosecond precise.)) + # E.g. + # FAILED tests/test_app.py::test_list - assert 1693508770000 <= + # (1693508769.9508653 * 1000) + # Thursday, August 31, 2023 7:06:10 PM + # Thursday, August 31, 2023 7:06:09.950 PM + assert json[0]["mtime"] <= round(time.time()) * 1000 assert len(file_folder_count) == 4 # 2 folders and 2 files assert sum(file_folder_count) == 2 @@ -604,7 +598,7 @@ async def test_list(): assert json[0]["isFolder"] is True assert json[0]["name"] == "test" assert json[0]["path"] == "testuser/test" - assert json[0]["mtime"] <= time.time() * 1000 + assert json[0]["mtime"] <= round(time.time()) * 1000 assert len(file_folder_count) == 4 # 2 folders and 2 files assert sum(file_folder_count) == 2 @@ -653,15 +647,21 @@ async def test_list(): assert len(file_names) == 4 -@asyncgiven(txt=st.text()) - +# Not sure that we need hypothesis here; I think we should poke at the boundaries +# of what the download should handle. +@given(txt=st.text()) async def test_download(txt): + # async def test_download(): username = "testuser" - async with AppClient(config, username) as cli: - with FileUtil() as fs: + with FileUtil() as fs: + async with AppClient(config, username) as cli: + # TODO: here and elsewhere, we should probably use a fake fs. + # Creates the test file (test_file_1) with text provided by + # hypothesis. fs.make_dir(os.path.join(username, "test")) fs.make_file(os.path.join(username, "test", "test_file_1"), txt) + # Then we download it and ensure the contents are as expected. res = await cli.get( os.path.join("download", "test", "test_file_1"), headers={"Authorization": ""}, @@ -671,11 +671,10 @@ async def test_download(txt): assert result_text == txt.encode() - async def test_download_errors(): username = "testuser" - async with AppClient(config, username) as cli: - with FileUtil() as fs: + with FileUtil() as fs: + async with AppClient(config, username) as cli: fs.make_dir(os.path.join(username, "test")) res1 = await cli.get("dwnload", headers={"Authorization": ""}) @@ -686,12 +685,11 @@ async def test_download_errors(): assert res2.status == 400 - async def test_similar(): txt = "testing text" username = "testuser" - async with AppClient(config, username) as cli: - with FileUtil() as fs: + with FileUtil() as fs: + async with AppClient(config, username) as cli: fs.make_dir(os.path.join(username, "test")) fs.make_file(os.path.join(username, "test", "test_file_1.fq"), txt) fs.make_dir(os.path.join(username, "test", "test_sub_dir")) @@ -728,12 +726,11 @@ async def test_similar(): assert res3.status == 400 - async def test_existence(): txt = "testing text" username = "testuser" - async with AppClient(config, username) as cli: - with FileUtil() as fs: + with FileUtil() as fs: + async with AppClient(config, username) as cli: fs.make_dir(os.path.join(username, "test")) fs.make_file(os.path.join(username, "test", "test_file_1"), txt) fs.make_dir(os.path.join(username, "test", "test_sub_dir")) @@ -791,12 +788,11 @@ async def test_existence(): assert json["isFolder"] is False - async def test_search(): txt = "testing text" username = "testuser" - async with AppClient(config, username) as cli: - with FileUtil() as fs: + with FileUtil() as fs: + async with AppClient(config, username) as cli: fs.make_dir(os.path.join(username, "test")) fs.make_file(os.path.join(username, "test", "test1"), txt) fs.make_dir(os.path.join(username, "test", "test2")) @@ -818,16 +814,11 @@ async def test_search(): assert len(json) == 2 - async def test_upload(): txt = "testing text\n" username = "testuser" - async with AppClient(config, username) as cli: - # testing missing body - res1 = await cli.post("upload", headers={"Authorization": ""}) - assert res1.status == 400 - - with FileUtil() as fs: + with FileUtil() as fs: + async with AppClient(config, username) as cli: fs.make_dir(os.path.join(username, "test")) f = fs.make_file(os.path.join(username, "test", "test_file_1"), txt) @@ -840,6 +831,14 @@ async def test_upload(): assert res2.status == 200 +async def test_upload_missing_body(): + username = "testuser" + async with AppClient(config, username) as cli: + # testing missing body + res1 = await cli.post("upload", headers={"Authorization": ""}) + assert res1.status == 400 + + async def _upload_file_fail_filename(filename: str, err: str): # Note two file uploads in a row causes a test error: # https://github.com/aio-libs/aiohttp/issues/3968 @@ -854,28 +853,24 @@ async def _upload_file_fail_filename(filename: str, err: str): assert res.status == 403 - async def test_upload_fail_leading_space(): await _upload_file_fail_filename( " test_file", "cannot upload file with name beginning with space" ) - async def test_upload_fail_dotfile(): await _upload_file_fail_filename( ".test_file", "cannot upload file with name beginning with '.'" ) - async def test_upload_fail_comma_in_file(): await _upload_file_fail_filename("test,file", "cannot upload file with ',' in name") @settings(deadline=None) -@asyncgiven(contents=st.text()) - +@given(contents=st.text()) async def test_directory_decompression(contents): fname = "test" dirname = "dirname" @@ -895,9 +890,10 @@ async def test_directory_decompression(contents): ("bztar", ".tar.bz"), ("tar", ".tar"), ] - async with AppClient(config, username) as cli: - for method, extension in methods: - with FileUtil() as fs: + + for method, extension in methods: + with FileUtil() as fs: + async with AppClient(config, username) as cli: d = fs.make_dir(os.path.join(username, dirname)) f1 = fs.make_file(path.user_path, contents) d2 = fs.make_dir(os.path.join(username, dirname, dirname)) @@ -935,8 +931,8 @@ async def test_directory_decompression(contents): assert os.path.exists(f3) -@asyncgiven(contents=st.text()) - +@settings(deadline=None) +@given(contents=st.text()) async def test_file_decompression(contents): fname = "test" dirname = "dirname" @@ -948,9 +944,9 @@ async def test_file_decompression(contents): return methods = [("gzip", ".gz"), ("bzip2", ".bz2")] - async with AppClient(config, username) as cli: - for method, extension in methods: - with FileUtil() as fs: + for method, extension in methods: + with FileUtil() as fs: + async with AppClient(config, username) as cli: d = fs.make_dir(os.path.join(username, dirname)) f1 = fs.make_file(path.user_path, contents) name = fname + extension @@ -972,7 +968,6 @@ async def test_file_decompression(contents): assert os.path.exists(f1) - async def test_importer_mappings(): """ This tests calling with simple good cases, and some expected bad cases @@ -1043,11 +1038,10 @@ async def test_importer_mappings(): ) - async def test_bulk_specification_success(): # In other tests a username is passed to AppClient but AppClient completely ignores it... - async with AppClient(config) as cli: - with FileUtil() as fu: + with FileUtil() as fu: + async with AppClient(config) as cli: fu.make_dir("testuser/somefolder") # testuser is hardcoded in the auth mock base = Path(fu.base_dir) / "testuser" tsv = "genomes.tsv" @@ -1134,7 +1128,6 @@ async def test_bulk_specification_success(): assert resp.status == 200 - async def test_bulk_specification_fail_no_files(): async with AppClient(config) as cli: for f in ["", "?files=", "?files= , ,, , "]: @@ -1144,10 +1137,9 @@ async def test_bulk_specification_fail_no_files(): assert resp.status == 400 - async def test_bulk_specification_fail_not_found(): - async with AppClient(config) as cli: - with FileUtil() as fu: + with FileUtil() as fu: + async with AppClient(config) as cli: fu.make_dir( "testuser/otherfolder" ) # testuser is hardcoded in the auth mock @@ -1172,14 +1164,13 @@ async def test_bulk_specification_fail_not_found(): assert resp.status == 404 - async def test_bulk_specification_fail_parse_fail(): """ Tests a number of different parse fail cases, including incorrect file types. Also tests that all the errors are combined correctly. """ - async with AppClient(config) as cli: - with FileUtil() as fu: + with FileUtil() as fu: + async with AppClient(config) as cli: fu.make_dir( "testuser/otherfolder" ) # testuser is hardcoded in the auth mock @@ -1261,7 +1252,9 @@ async def test_bulk_specification_fail_parse_fail(): }, { "type": "cannot_parse_file", - "message": "fasta.gz is not a supported file type for import specifications", + "message": ( + "fasta.gz is not a supported file type for import specifications" + ), "file": "testuser/badfile.fasta.gz", "tab": None, }, @@ -1279,7 +1272,9 @@ async def test_bulk_specification_fail_parse_fail(): }, { "type": "cannot_parse_file", - "message": "badfile. is not a supported file type for import specifications", + "message": ( + "badfile. is not a supported file type for import specifications" + ), "file": "testuser/badfile.", "tab": None, }, @@ -1288,10 +1283,9 @@ async def test_bulk_specification_fail_parse_fail(): assert resp.status == 400 - async def test_bulk_specification_fail_column_count(): - async with AppClient(config) as cli: - with FileUtil() as fu: + with FileUtil() as fu: + async with AppClient(config) as cli: fu.make_dir("testuser") # testuser is hardcoded in the auth mock base = Path(fu.base_dir) / "testuser" tsv = "genomes.tsv" @@ -1362,10 +1356,9 @@ async def test_bulk_specification_fail_column_count(): assert resp.status == 400 - async def test_bulk_specification_fail_multiple_specs_per_type(): - async with AppClient(config) as cli: - with FileUtil() as fu: + with FileUtil() as fu: + async with AppClient(config) as cli: fu.make_dir("testuser") # testuser is hardcoded in the auth mock base = Path(fu.base_dir) / "testuser" tsv = "genomes.tsv" @@ -1454,14 +1447,13 @@ async def test_bulk_specification_fail_multiple_specs_per_type(): assert resp.status == 400 - async def test_bulk_specification_fail_multiple_specs_per_type_excel(): """ Test an excel file with an internal data type collision. This is the only case when all 5 error fields are filled out. """ - async with AppClient(config) as cli: - with FileUtil() as fu: + with FileUtil() as fu: + async with AppClient(config) as cli: fu.make_dir("testuser") # testuser is hardcoded in the auth mock base = Path(fu.base_dir) / "testuser" excel = "stuff.xlsx" @@ -1533,11 +1525,10 @@ async def test_bulk_specification_fail_multiple_specs_per_type_excel(): } - async def test_write_bulk_specification_success_csv(): # In other tests a username is passed to AppClient but AppClient completely ignores it... - async with AppClient(config) as cli: - with FileUtil() as fu: + with FileUtil() as fu: + async with AppClient(config) as cli: fu.make_dir("testuser") # testuser is hardcoded in the auth mock resp = await cli.post( "write_bulk_specification/", # NOSONAR @@ -1577,11 +1568,10 @@ async def test_write_bulk_specification_success_csv(): ) - async def test_write_bulk_specification_success_tsv(): # In other tests a username is passed to AppClient but AppClient completely ignores it... - async with AppClient(config) as cli: - with FileUtil() as fu: + with FileUtil() as fu: + async with AppClient(config) as cli: fu.make_dir("testuser") # testuser is hardcoded in the auth mock types = dict(_IMPORT_SPEC_TEST_DATA) types["reads"] = dict(types["reads"]) @@ -1623,11 +1613,10 @@ async def test_write_bulk_specification_success_tsv(): ) - async def test_write_bulk_specification_success_excel(): # In other tests a username is passed to AppClient but AppClient completely ignores it... - async with AppClient(config) as cli: - with FileUtil() as fu: + with FileUtil() as fu: + async with AppClient(config) as cli: fu.make_dir("testuser") # testuser is hardcoded in the auth mock resp = await cli.post( "write_bulk_specification/", @@ -1674,7 +1663,6 @@ async def test_write_bulk_specification_success_excel(): ) - async def test_write_bulk_specification_fail_wrong_data_type(): async with AppClient(config) as cli: resp = await cli.post("write_bulk_specification/", data="foo") @@ -1683,7 +1671,6 @@ async def test_write_bulk_specification_fail_wrong_data_type(): assert resp.status == 415 - async def test_write_bulk_specification_fail_no_content_length(): async with AppClient(config) as cli: resp = await cli.post( @@ -1694,7 +1681,6 @@ async def test_write_bulk_specification_fail_no_content_length(): assert resp.status == 411 - async def test_write_bulk_specification_fail_large_input(): async with AppClient(config) as cli: resp = await cli.post("write_bulk_specification/", json="a" * (1024 * 1024 - 2)) @@ -1707,7 +1693,6 @@ async def test_write_bulk_specification_fail_large_input(): assert resp.status == 413 - async def _write_bulk_specification_json_fail(json: Any, err: str): async with AppClient(config) as cli: resp = await cli.post("write_bulk_specification", json=json) @@ -1716,35 +1701,30 @@ async def _write_bulk_specification_json_fail(json: Any, err: str): assert resp.status == 400 - async def test_write_bulk_specification_fail_not_dict(): await _write_bulk_specification_json_fail( ["foo"], "The top level JSON element must be a mapping" ) - async def test_write_bulk_specification_fail_no_output_dir(): await _write_bulk_specification_json_fail( {}, "output_directory is required and must be a string" ) - async def test_write_bulk_specification_fail_wrong_type_for_output_dir(): await _write_bulk_specification_json_fail( {"output_directory": 4}, "output_directory is required and must be a string" ) - async def test_write_bulk_specification_fail_no_file_type(): await _write_bulk_specification_json_fail( {"output_directory": "foo"}, "Invalid output_file_type: None" ) - async def test_write_bulk_specification_fail_wrong_file_type(): await _write_bulk_specification_json_fail( {"output_directory": "foo", "output_file_type": "XSV"}, @@ -1752,7 +1732,6 @@ async def test_write_bulk_specification_fail_wrong_file_type(): ) - async def test_write_bulk_specification_fail_invalid_type_value(): await _write_bulk_specification_json_fail( {"output_directory": "foo", "output_file_type": "CSV", "types": {"a": "fake"}}, @@ -1760,7 +1739,6 @@ async def test_write_bulk_specification_fail_invalid_type_value(): ) - async def test_importer_filetypes(): """ Only checks a few example entries since the list may expand over time From 34a4474022526d756e6be7ed96997e9f95ca70c1 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 21:22:54 +0000 Subject: [PATCH 07/40] fix local config [PTV-1888] --- deployment/conf/local.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deployment/conf/local.cfg b/deployment/conf/local.cfg index b0033d2b..17771ccd 100644 --- a/deployment/conf/local.cfg +++ b/deployment/conf/local.cfg @@ -2,4 +2,5 @@ META_DIR = ./data/metadata/ DATA_DIR = ./data/bulk/ AUTH_URL = https://ci.kbase.us/services/auth/api/V2/token -CONCIERGE_PATH = /kbaseconcierge \ No newline at end of file +CONCIERGE_PATH = /kbaseconcierge +FILE_EXTENSION_MAPPINGS = ./deployment/conf/supported_apps_w_extensions.json \ No newline at end of file From ffdb74982ad5952800a79a46e14c72c9a25f75b2 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 21:23:28 +0000 Subject: [PATCH 08/40] add docker plugin to devcontainer [PTV-1888] --- .devcontainer/devcontainer.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 7289d2f5..84e5909c 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -20,7 +20,8 @@ "mhutchie.git-graph", "ms-python.black-formatter", "ms-python.flake8", - "mikoz.autoflake-extension" + "mikoz.autoflake-extension", + "ms-azuretools.vscode-docker" ] } } From 10cc772fcc9f68026ced8f54507fbb17fb853b0d Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 21:24:23 +0000 Subject: [PATCH 09/40] put some work into docs [PTV-1888] move api and development docs from README to separate, linked docs; add docs for dev tools and dockerfiles --- README.md | 1180 +--------------- docs/api.md | 1484 ++++++++++++++++++++ docs/development/development-tools.md | 119 ++ docs/development/index.md | 5 + docs/development/maintaining-dockerfile.md | 48 + docs/development/original-doc.md | 64 + 6 files changed, 1753 insertions(+), 1147 deletions(-) create mode 100644 docs/api.md create mode 100644 docs/development/development-tools.md create mode 100644 docs/development/index.md create mode 100644 docs/development/maintaining-dockerfile.md create mode 100644 docs/development/original-doc.md diff --git a/README.md b/README.md index c85dc418..867749da 100644 --- a/README.md +++ b/README.md @@ -1,1169 +1,55 @@ -# staging_service +# KBase Staging Service +## Background -In order to setup local development, you must have docker installed and if you want to run it locally you must have python 3.9.6 or greater installed +The staging service provides an API to a shared filesystem designed primarily to +allow KBase users to upload raw data files for usage by KBase apps within a Narrative. +The API allows for uploading, decompressing and downloading files. In addition it +provides some utility for Globus transfer integration into the filesystem. +The `staging_service` is a core KBase service designed to be run within the KBase +infrastructure. The runtime requires access to other KBase services as well as a +suitable filesystem. Internal libraries implement the interfaces to these services, +which are all open source and documented in their respective repos. In a break from the +traditional KBase service, it is not based on the KBase SDK, but rather `aiohttp`. -# setup +For more about how data import works from the user perspective, please [refer to the KBase +documentation](https://docs.kbase.us/getting-started/narrative/add-data). -make a folder called /data as well as inside that /bulk and inside that a folder for any usernames you wish it to work with +## Installation -data - -bulk - -username - -username +The service is designed to be packagd into a Docker image and run as a container. +For deployment, the image is built by GitHub actions and stored in GitHub Container +Registry. -if you want to run locally you must install requirements.txt for python3 +For local development and evaluation, it may be run locally from within a devcontainer, +or from a locally-built image. -# running +> TODO: provide barebones instructions here -to run locally run /deployment/bin/entrypoint.sh +## Usage -to run inside docker run /run_in_docker.sh +> TBD -# tests +## API -* to test use ./run_tests.sh -* requires python 3.9.6 or higher -* requires installation on mac of libmagic `brew install libmagic` +See [the API documentation](./docs/api.md). +## Contributing +This repo is realistically only open to contributions from users within the KBase +organization. The contribution model is Pull Requests make from branches off of main. So +clearly one needs to have the ability to push up branches and create PRs. -# debugging +Forks are not supported at this time. -Included configurations for the Visual Studio Code debugger for python that mirror what is in the entrypoint.sh and testing configuration to run locally in the debugger, set breakpoints and if you open the project in VSCode the debugger should be good to go. The provided configurations can run locally and run tests locally +## Development -# development +For development support, see [the development docs](./docs/development.md) -When releasing a new version: -* Update the release notes -* Update the version in [staging_service/app.py](staging_service/app.py).VERSION +## License -# expected command line utilities -to run locally you will need all of these utils on your system: tar, unzip, zip, gzip, bzip2, md5sum, head, tail, wc +See [the KBase License](./LICENSE.md) -in the docker container all of these should be available - -# API - -all paths should be specified treating the user's home directory as root - -## Test Service - -**URL** : `ci.kbase.us/services/staging_service/test-service` - -**local URL** : `localhost:3000/test-service` - -**Method** : `GET` - -### Success Response - -**Code** : `200 OK` - -**Content example** - -``` -This is just a test. This is only a test. -``` -## Test Auth - -**URL** : `ci.kbase.us/services/staging_service/test-auth` - -**local URL** : `localhost:3000/test-auth` - -**Method** : `GET` - -**Headers** : `Authorization: ` - -### Success Response - -**Code** : `200 OK` - -**Content example** - -``` -I'm authenticated as -``` - -### Error Response - -**Condition** : if authentication is incorrect - -**Code** : `401 Unauthorized` - -**Content** : -``` -Error Connecting to auth service ... -``` -**Code** : `400 Bad Request` - -**Content** -``` -Must supply token -``` - -## File Lifetime -**URL** : `ci.kbase.us/services/staging_service/file-lifetime` -**local URL** : `localhost:3000/file-lifetime` - -**Method** : `GET` - -### Success Response - -**Code** : `200 OK` - -**Content example** -number of days a file will be held for in staging service before being deleted -this is not actually handled by the server but is expected to be performed by a cron job which shares the env variable read here - -``` -90 -``` - -## List Directory -defaults to not show hidden dotfiles - -**URL** : `ci.kbase.us/services/staging_service/list/{path to directory}` - -**URL** : `ci.kbase.us/services/staging_service/list/{path to directory}?showHidden={True/False}` - -**local URL** : `localhost:3000/list/{path to directory}` - -**local URL** : `localhost:3000/list/{path to directory}?showHidden={True/False}` - -**Method** : `GET` - -**Headers** : `Authorization: ` - -### Success Response - -**Code** : `200 OK` - -**Content example** - -```json -[ - { - "name": "testFolder", - "path": "nixonpjoshua/testFolder", - "mtime": 1510949575000, - "size": 96, - "isFolder": true - }, - { - "name": "testfile", - "path": "nixonpjoshua/testfile", - "mtime": 1510949629000, - "size": 335, - "isFolder": false - } -] -``` -### Error Response - -**Condition** : if authentication is incorrect - -**Code** : `401 Unauthorized` - -**Content** : -``` -Error Connecting to auth service ... -``` -**Code** : `400 Bad Request` - -**Content** -``` -Must supply token -``` - -**Code** : `404 Not Found` - -**Content** : -``` -path / does not exist -``` - -## Download file - -**URL** : `ci.kbase.us/services/staging_service/download/{path to file}` - -**URL** : `ci.kbase.us/services/staging_service/download/{path to file}` - -**local URL** : `localhost:3000/download/{path to file}` - -**local URL** : `localhost:3000/download/{path to file}` - -**Method** : `GET` - -**Headers** : `Authorization: ` - -### Success Response - -**Code** : `200 OK` -**Content** : `` - -### Error Response - -**Condition** : if authentication is incorrect - -**Code** : `401 Unauthorized` - -**Content** : -``` -Error Connecting to auth service ... -``` -**Code** : `400 Bad Request` - -**Content** -``` -Must supply token -``` - -**Code** : `400 Bad Request` - -**Content** : -``` -/ is a directory not a file -``` - -**Code** : `404 Not Found` - -**Content** : -``` -path / does not exist -``` - -## Search files and folders -defaults to not show hidden dotfiles - -**URL** : `ci.kbase.us/services/staging_service/search/{search query}` - -**URL** : `ci.kbase.us/services/staging_service/search/{search query}?showHidden={True/False}` - -**local URL** : `localhost:3000/search/{search query}` - -**local URL** : `localhost:3000/search/?showHidden={True/False}` - -**Method** : `GET` - -**Headers** : `Authorization: ` - -### Success Response - -**Code** : `200 OK` - -**Content example** - -```json -[ - { - "name": "testfile", - "path": "nixonpjoshua/testfile", - "mtime": 1510949629000, - "size": 335, - "isFolder": false - }, - { - "name": "testFolder", - "path": "nixonpjoshua/testFolder", - "mtime": 1510949575000, - "size": 96, - "isFolder": true - }, - { - "name": "testinnerFile", - "path": "nixonpjoshua/testFolder/testinnerFile", - "mtime": 1510949575000, - "size": 0, - "isFolder": false - } -] -``` -### Error Response - -**Condition** : if authentication is incorrect - -**Code** : `401 Unauthorized` - -**Content** : -``` -Error Connecting to auth service ... -``` -**Code** : `400 Bad Request` - -**Content** -``` -Must supply token -``` - -## File and Folder Metadata - -**URL** : `ci.kbase.us/services/staging_service/metadata/{path to file or folder}` - -**local URL** : `localhost:3000/metadata/{path to file or folder}` - -**Method** : `GET` - -**Headers** : `Authorization: ` - -### Success Response - -**Code** : `200 OK` - -**Content example** - -```json -{ - "name": "testFolder", - "path": "nixonpjoshua/testFolder", - "mtime": 1510949575000, - "size": 96, - "isFolder": true -} -``` - -```json -{ - "md5": "73cf08ad9d78d3fc826f0f265139de33", - "lineCount": "13", - "head": "there is stuff in this file\nthere is stuff in this file\nthere is stuff in this file\nthere is stuff in this file\nthere is stuff in this file\nthere is stuff in this file\nthere is stuff in this file\nstuff at the bottom\nstuff at the bottom\nstuff at the bottom", - "tail": "there is stuff in this file\nthere is stuff in this file\nthere is stuff in this file\nstuff at the bottom\nstuff at the bottom\nstuff at the bottom\nstuff at the bottom\nstuff at the bottom\nstuff at the bottom\nstuff at the bottom", - "name": "testFile", - "path": "nixonpjoshua/testFile", - "mtime": 1510949629000, - "size": 335, - "isFolder": false -} -``` -### Error Response - -**Condition** : if authentication is incorrect - -**Code** : `401 Unauthorized` - -**Content** : -``` -Error Connecting to auth service ... -``` -**Code** : `400 Bad Request` - -**Content** -``` -Must supply token -``` - -**Code** : `404 Not Found` - -**Content** : -``` -path / does not exist -``` - -## Upload File - -**URL** : `ci.kbase.us/services/staging_service/upload` - -**local URL** : `localhost:3000/upload` - -**Method** : `POST` - -**Headers** : `Authorization: ` - -**Body constraints** - -first element in request body should be - -destPath: {path file should end up in} - -second element in request body should be multipart file data - -uploads: {multipart file} - -Files starting with whitespace or a '.' are not allowed - -### Success Response - -**Code** : `200 OK` - -**Content example** - -```json -[ - { - "name": "fasciculatum_supercontig.fasta", - "path": "nixonpjoshua/fasciculatum_supercontig.fasta", - "mtime": 1510950061000, - "size": 31536508, - "isFolder": false - } -] -``` -### Error Response - -**Condition** : if authentication is incorrect - -**Code** : `401 Unauthorized` - -**Content** : -``` -Error Connecting to auth service ... -``` -**Code** : `400 Bad Request` - -**Content** -``` -Must supply token -``` - -## Define/Create UPA for file which has been imported - -**URL** : `ci.kbase.us/services/staging_service/define-upa/{path to imported file}` - -**local URL** : `localhost:3000/define-upa/{path to imported file}` - -**Method** : `POST` - -**Headers** : `Authorization: ` - -**Body constraints** - -first element in request body should be - -UPA: {the actual UPA of imported file} - -### Success Response - -**Code** : `200 OK` - -**Content example** - -``` -successfully update UPA for file -``` -### Error Response - -**Condition** : if authentication is incorrect - -**Code** : `401 Unauthorized` - -**Content** : -``` -Error Connecting to auth service ... -``` -**Code** : `400 Bad Request` - -**Content** -``` -Must supply token -``` - -**Code** : `400 Bad Request` - -**Content** -``` -must provide UPA field in body -``` - - -## Delete file or folder (will delete things contained in folder) - -**URL** : `ci.kbase.us/services/staging_service/delete/{path to file or folder}` - -**local URL** : `localhost:3000/delete/{path to file or folder}` - -**Method** : `DELETE` - -**Headers** : `Authorization: ` - -### Success Response - -**Code** : `200 OK` - -**Content example** - -``` -successfully deleted UPA -``` -### Error Response - -**Condition** : if authentication is incorrect - -**Code** : `401 Unauthorized` - -**Content** : -``` -Error Connecting to auth service ... -``` -**Code** : `400 Bad Request` - -**Content** -``` -Must supply token -``` - -**Code** : `404 Not Found` - -**Content** -``` -could not delete -``` - -**Code** : `403 Forbidden` - -**Content** -``` -cannot delete home directory -``` -``` -cannot delete protected file -``` - -## Move/rename a file or folder - -**URL** : `ci.kbase.us/services/staging_service/mv/{path to file or folder}` - -**local URL** : `localhost:3000/mv/{path to file or folder}` - -**Method** : `PATCH` - -**Headers** : `Authorization: ` - -**Body constraints** - -first element in request body should be - -newPath : {the new location/name for file or folder} - -### Success Response - -**Code** : `200 OK` - -**Content example** - -``` -successfully moved to -``` -### Error Response - -**Condition** : if authentication is incorrect - -**Code** : `401 Unauthorized` - -**Content** : -``` -Error Connecting to auth service ... -``` -**Code** : `400 Bad Request` - -**Content** -``` -Must supply token -``` - -**Code** : `400 Bad Request` - -**Content** -``` -must provide newPath field in body -``` - -**Code** : `403 Forbidden` - -**Content** -``` -cannot rename home or move directory -``` -``` -cannot rename or move protected file -``` -**Code**: `409 Conflict` - -**Content** -``` - allready exists -``` - -## Decompress various archive formats -supported archive formats are: -.zip, .ZIP, .tar.gz, .tgz, .tar.bz, .tar.bz2, .tar, .gz, .bz2, .bzip2 -**URL** : `ci.kbase.us/services/staging_service/decompress/{path to archive` - -**local URL** : `localhost:3000/decompress/{path to archive}` - -**Method** : `PATCH` - -**Headers** : `Authorization: ` - -### Success Response - -**Code** : `200 OK` - -**Content example** - -``` -successfully decompressed -``` -### Error Response - -**Condition** : if authentication is incorrect - -**Code** : `401 Unauthorized` - -**Content** : -``` -Error Connecting to auth service ... -``` -**Code** : `400 Bad Request` - -**Content** -``` -Must supply token -``` - -**Code** : `400 Bad Request` - -**Content** -``` -cannot decompress a file -``` - - -## Add Globus ACL - -After authenticating at this endpoint, AUTH is queried to get your filepath and globus id file for -linking to globus. - -**URL** : `ci.kbase.us/services/staging_service/add-acl` - -**local URL** : `localhost:3000/add-acl` - -**Method** : `GET` - -**Headers** : `Authorization: ` - -### Success Response - -**Code** : `200 OK` - -**Content example** - -``` -{ - "success": true, - "principal": "KBase-Example-59436z4-z0b6-z49f-zc5c-zbd455f97c39", - "path": "/username/", - "permissions": "rw" -} -``` -### Error Response - -**Condition** : if authentication is incorrect - -**Code** : `401 Unauthorized` - -**Content** : -``` -Error Connecting to auth service ... -``` - -**Condition** : If issue with Globus API or ACL Already Exists - -**Code** : `500 Internal Server Error` - -**Content** -``` -{ - 'success': False, - 'error_type': 'TransferAPIError', - 'error': "Can't create ACL rule; it already exists", - 'error_code': 'Exists', 'shared_directory_basename': '/username/' -} -``` - -## Remove Globus ACL - -After authenticating at this endpoint, AUTH is queried to get your filepath and globus id file for -linking to globus. - -**URL** : `ci.kbase.us/services/staging_service/remove-acl` - -**local URL** : `localhost:3000/remove-acl` - -**Method** : `GET` - -**Headers** : `Authorization: ` - -### Success Response - -**Code** : `200 OK` - -**Content example** - -``` -{ - "message": "{\n \"DATA_TYPE\": \"result\",\n \"code\": \"Deleted\", - "message\": \"Access rule 'KBASE-examplex766ada0-x8aa-x1e8-xc7b-xa1d4c5c824a' deleted successfully\", - "request_id\": \"x2KFzfop05\",\n \"resource\": \"/endpoint/KBaseExample2a-5e5b-11e6-8309-22000b97daec/access/KBaseExample-ada0-d8aa-11e8-8c7b-0a1d4c5c824a\"}", - "Success": true -} -``` -### Error Response - -**Condition** : if authentication is incorrect - -**Code** : `401 Unauthorized` - -**Content** : -``` -Error Connecting to auth service ... -``` - -**Condition** : If issue with Globus API or ACL Already Exists - -**Code** : `500 Internal Server Error` - -**Content** -``` -{ - 'success': False, - 'error_type': 'TransferAPIError', - 'error': "Can't create ACL rule; it already exists", - 'error_code': 'Exists', 'shared_directory_basename': '/username/' -} -``` - -## Parse bulk specifications - -This endpoint parses one or more bulk specification files in the staging area into a data -structure (close to) ready for insertion into the Narrative bulk import or analysis cell. - -It can parse `.tsv`, `.csv`, and Excel (`.xls` and `.xlsx`) files. Templates for the currently -supported data types are available in the [templates](./import_specifications/templates) -directory of this repo. See the [README.md](./import_specifications/templates/README.md) file -for instructions on template usage. - -See the [import specification ADR document](./docs/import_specifications.ADR.md) for design -details. - -**URL** : `ci.kbase.us/services/staging_service/bulk_specification` - -**local URL** : `localhost:3000/bulk_specification` - -**Method** : `GET` - -**Headers** : `Authorization: ` - -### Success Response - -**Code** : `200 OK` - -**Content example** - -``` -GET bulk_specification/?files=file1.[,file2.,...] -``` -`` is one of `csv`, `tsv`, `xls`, or `xlsx`. - -Reponse: -``` -{ - "types": { - : [ - {: , : , ...}, - {: , : , ...}, - ... - ], - : [ - {: , : , ...}, - ... - ], - ... - }, - "files": { - : {"file": "/file1.", "tab": "tabname"}, - : {"file": "/file2.", "tab": null}, - ... - } -} -``` - -* `` is a data type ID from the [Mappings.py](./staging_service/autodetect/Mappings.py) - file and the Narrative staging area configuration file - it is a shared namespace between the - staging service and Narrative to specify bulk applications, and has a 1:1 mapping to an - app. It is determined by the first header line from the templates. -* `` is the ID of an input parameter from a `KB-SDK` app's `spec.json` file. - These are determined by the second header line from the templates and will differ - by the data type. -* `` is the user-provided value for the input for a given `spec.json` ID - and import or analysis instance, where an import/analysis instance is effectively a row - in the data file. Each data file row is provided in order for each type. Each row is - provided in a mapping of `spec.json` ID to the data for the row. Lines > 3 in the templates are - user-provided data, and each line corresponds to a single import or analysis. - -### Error Response - -Error reponses are of the general form: -``` -{ - "errors": [ - {"type": , - ... other fields depending on the error code ... - }, - ... - ] -} -``` - -Existing error codes are currently: - -* `cannot_find_file` if an input file cannot be found -* `cannot_parse_file` if an input file cannot be parsed -* `incorrect_column_count` if the column count is not as expected - * For Excel files, this may mean there is a non-empty cell outside the bounds of the data area -* `multiple_specifications_for_data_type` if more than one tab or file per data type is submitted -* `no_files_provided` if no files were provided -* `unexpected_error` if some other error occurs - -The HTTP code returned will be, in order of precedence: -* 400 if any error other than `cannot_find_file` or `unexpected_error` occurs -* 404 if at least one error is `cannot_find_file` but there are no 400-type errors -* 500 if all errors are `unexpected_error` - -The per error type data structures are: - -#### `cannot_find_file` - -``` -{ - "type": "cannot_find_file", - "file": -} -``` - -#### `cannot_parse_file` - -``` -{ - "type": "cannot_parse_file", - "file": , - "tab": , - "message": -} -``` - -#### `incorrect_column_count` - -``` -{ - "type": "incorrect_column_count", - "file": , - "tab": , - "message": -} -``` - -#### `multiple_specifications_for_data_type` - -``` -{ - "type": "multiple_specifications_for_data_type", - "file_1": , - "tab_1": , - "file_2": , - "tab_2": , - "message": -} -``` - -#### `no_files_provided` - -``` -{ - "type": "no_files_provided" -} -``` - -#### `unexpected_error` - -``` -{ - "type": "unexpected_error", - "file": - "message": -} -``` - -## Write bulk specifications - -This endpoint is the reverse of the parse bulk specifications endpoint - it takes a similar -data structure to that which the parse endpoint returns and writes bulk specification templates. - -**URL** : `ci.kbase.us/services/staging_service/write_bulk_specification` - -**local URL** : `localhost:3000/write_bulk_specification` - -**Method** : `POST` - -**Headers** : -* `Authorization: ` -* `Content-Type: Application/JSON` - -### Success Response - -**Code** : `200 OK` - -**Content example** - -``` -POST write_bulk_specification/ -{ - "output_directory": , - "output_file_type": , - "types": { - : { - "order_and_display: [ - [, ], - [, ], - ... - ], - "data": [ - {: , : , ...}, - {: , : , ...} - ... - ] - }, - : { - "order_and_display: [ - [, ], - ... - ], - "data": [ - {: , : , ...}, - ... - ] - }, - ... - } -} -``` -* `output_directory` specifies where the output files should be written in the user's staging area. -* `output_file_type` specifies the format of the output files. -* `` is a data type ID from the [Mappings.py](./staging_service/autodetect/Mappings.py) - file and the Narrative staging area configuration file - it is a shared namespace between the - staging service and Narrative to specify bulk applications, and has a 1:1 mapping to an - app. It is included in the first header line in the templates. -* `order_and_display` determines the ordering of the columns in the written templates, as well - as mapping the spec.json ID of the parameter to the human readable name of the parameter in - the display.yml file. -* `` is the ID of an input parameter from a `KB-SDK` app's `spec.json` file. - These are written to the second header line from the import templates and will differ - by the data type. -* `data` contains any data to be written to the file as example data, and is analagous to the data - structure returned from the parse endpoint. To specify that no data should be written to the - template provide an empty list. -* `` is the value for the input for a given `spec.json` ID - and import or analysis instance, where an import/analysis instance is effectively a row - in the data file. Each data file row is provided in order for each type. Each row is - provided in a mapping of `spec.json` ID to the data for the row. Lines > 3 in the templates are - user-provided data, and each line corresponds to a single import or analysis. - -Reponse: -``` -{ - "output_file_type": , - "files": { - : , - ... - : , - } -} -``` - -* `output_file_type` has the same definition as above. -* `files` contains a mapping of each provided data type to the output template file for that type. - In the case of Excel, all the file paths will be the same since the data types are all written - to different tabs in the same file. - -### Error Response - -Method specific errors have the form: - -``` -{"error": } -``` -The error code in this case will be a 4XX error. - -The AioHTTP server may also return built in errors that are not in JSON format - an example of -this is overly large (> 1MB) request bodies. - - -## Get Importer Mappings - -This endpoint returns: -1) a mapping between a list of files and predicted importer apps, and -2) a file information list that includes the input file names split between the file prefix and - the file suffix, if any, that was used to determine the file -> importer mapping, and a list - of file types based on the file suffix. If a file has a suffix that does not match - any mapping (e.g. `.sys`), the suffix will be `null`, the prefix the entire file name, and - the file type list empty. - -For example, - * if we pass in nothing we get a response with no mappings - * if we pass in a list of files, such as ["file1.fasta", "file2.fq", "None"], we would get back a - response that maps to Fasta Importers and FastQ Importers, with a weight of 0 to 1 - which represents the probability that this is the correct importer for you. - * for files for which there is no predicted app, the return is a null value - * this endpoint is used to power the dropdowns for the staging service window in the Narrative - -**URL** : `ci.kbase.us/services/staging_service/importer_mappings` - -**local URL** : `localhost:3000/importer_mappings` - -**Method** : `POST` - -**Headers** : Not Required - -### Success Response - -**Code** : `200 OK` - -**Content example** - -``` -data = {"file_list": ["file1.txt", "file2.zip", "file3.gff3.gz"]} - async with AppClient(config, username) as cli: - resp = await cli.post( - "importer_mappings/", data=data - ) -``` -Response: -``` -{ - "mappings": [ - null, - [{ - "id": "decompress", - "title": "decompress/unpack", - "app_weight": 1, - }], - [{ - "app_weight": 1, - "id": "gff_genome", - "title": "GFF/FASTA Genome", - }, - { - "app_weight": 1, - "id": "gff_metagenome", - "title": "GFF/FASTA MetaGenome", - }] - ], - "fileinfo": [ - {"prefix": "file1.txt", "suffix": null, "file_ext_type": []}, - {"prefix": "file2", "suffix": "zip", "file_ext_type": ["CompressedFileFormatArchive"]}, - {"prefix": "file3", "suffix": "gff3.gz", "file_ext_type": ["GFF"]} - ] -} -``` -### Error Response -**Code** : `400 Bad Request` - -**Content** -``` -must provide file_list field -``` - -## Get importer filetypes - -This endpoint returns information about the file types associated with data types and the file -extensions for those file types. It is primarily of use for creating UI elements describing -which file extensions may be selected when performing bulk file selections. - -**URL** : `ci.kbase.us/services/staging_service/importer_filetypes` - -**local URL** : `localhost:3000/importer_filetypes` - -**Method** : `GET` - -**Headers** : Not Required - -### Success Response - -**Code** : `200 OK` - -**Content example** - -``` -GET importer_filetypes/ -``` -Response: -``` -{ - "datatype_to_filetype": { - : [, ... ], - ... - : [, ... ], - }, - "filetype_to_extensions": { - : [, ..., ], - ... - : [, ..., ], - } -} -``` - -* `` is a data type ID from the [Mappings.py](./staging_service/autodetect/Mappings.py) - file and the Narrative staging area configuration file - it is a shared namespace between the - staging service and Narrative to specify bulk applications, and has a 1:1 mapping to an - import app. It is included in the first header line in the templates. -* `` is a file type like `FASTA` or `GENBANK`. The supported file types are listed - below. -* `` is a file extension like `*.fa` or `*.gbk`. - -# Autodetect App and File Type IDs - -## App type IDs - -These are the currently supported upload app type IDs: - -``` -fastq_reads_interleaved -fastq_reads_noninterleaved -sra_reads -genbank_genome -gff_genome -gff_metagenome -expression_matrix -media -fba_model -assembly -phenotype_set -sample_set -metabolic_annotation -metabolic_annotation_bulk -escher_map -decompress -``` - -Note that decompress is only returned when no other file type can be detected from the file -extension. - -## File type IDs - -These are the currently supported file type IDs. These are primarily useful for apps that take -two different file types, like GFF/FASTA genomes. - -``` -FASTA -FASTQ -SRA -GFF -GENBANK -SBML -JSON -TSV -CSV -EXCEL -CompressedFileFormatArchive -``` diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 00000000..ea4298f1 --- /dev/null +++ b/docs/api.md @@ -0,0 +1,1484 @@ + +## API + +- all paths should be specified treating the user's home directory as root + +- The url base, in an actualy deployment, will be: + - https://ci.kbase.us for CI + - https://next.kbase.us for Next + - https://appdev.kbase.us for Appdev + - https://kbase.us for Production + +- The base path in all environments is `/services/staging_service` + +- All api paths will be suffixed to the url base and base path. For example: + + `https://ci.kbase.us/services/staging_service/file-lifetime` + + will invoke the + `file-lifetime` endpoint, which returns the number of days a file will be retained. + +- For local usage (i.e. spinning the service up locally) + - the base url is `http://localhost:3000` + - the base path is not required + - e.g. `http://localhost:3000/file-lifetime` + +- For endpoints that require authorization, the `Authorization` header must be supplied, + with the value a KBase auth token. + +## Test Service + +`GET /test-service` + +Returns a fixed text response. Used to determine if the service is running? + +> TODO: we probably don't need this endpoint + + +### Success Response + +`200 OK` + +#### Example + +``` +This is just a test. This is only a test. +``` + +## Test Auth + +`GET /test-auth` + +Returns a text response indicating that the + +### Headers + +- `Authorization: ` + +### Success Response + +`200 OK` + +`Content-Type: text/plain` + +**Example** + +```text +I'm authenticated as +``` + +### Error Responses + +The [common responses for an authorized request](#Common-Authorization-Error-Responses) + +##### Content + +`text/plain` + +```text +Must supply token +``` + +## File Lifetime + +`GET /file-lifetime` + +Number of days a file will be held for in staging service before being deleted. + +This is not actually handled by the server but is expected to be performed by a cron job which shares the env variable read here. + +### Success Response + +`200 OK` + +#### Content + +`text/plain` + +#### Example + +```text +90 +``` + +## List Directory + +`GET /list/{path to directory}?[showHidden={True/False}]` + +Returns a JSON Array containing entries for each file and folder within the provided directory. + +Defaults to not show hidden dotfiles. + +### Headers +`Authorization: ` + +### Success Response + +`200 OK` + +### Example + +```json +[ + { + "name": "testFolder", + "path": "nixonpjoshua/testFolder", + "mtime": 1510949575000, + "size": 96, + "isFolder": true + }, + { + "name": "testfile", + "path": "nixonpjoshua/testfile", + "mtime": 1510949629000, + "size": 335, + "isFolder": false + } +] +``` + +### Error Responses + +The [common responses for an authorized request](#Common-Authorization-Error-Responses) + +#### `404 Not Found` + +Results if the requested path is not found for the user associated with the +Authorization. + +##### Content + +`text/plain` + +```text +path / does not exist +``` + + + +## Download file + +`GET /download/{path to file}` + +Returns the request file. + +### Headers + +- `Authorization: ` + +### Success Response + +#### `200 OK` + +##### Content + +`application/octet-stream` + +The contents of the file are returned as the entire body of the response + +### Error Responses + +The [common responses for an authorized request](#Common-Authorization-Error-Responses) + +#### `400 Bad Request` + +Returned if the requested file is a directory, not a file. + +##### Content + +`text/plain` + +```text +/ is a directory not a file +``` + +#### `404 Not Found` + +The file does not exist + +##### Content + +`text/plain` + +``` +path / does not exist +``` + +## Search files and folders + +`GET /search/{search query}?[showHidden={True/False}]` + +Returns a list of files for the user identified by the Authorization, and matching the +given query text + +defaults to not show hidden dotfiles + +> TODO: explain how the search works! + +### Headers + +- `Authorization: ` + +### Success Response + +#### `200 OK` + +##### Content + +`application/json` + +```json +[ + { + "name": "testfile", + "path": "nixonpjoshua/testfile", + "mtime": 1510949629000, + "size": 335, + "isFolder": false + }, + { + "name": "testFolder", + "path": "nixonpjoshua/testFolder", + "mtime": 1510949575000, + "size": 96, + "isFolder": true + }, + { + "name": "testinnerFile", + "path": "nixonpjoshua/testFolder/testinnerFile", + "mtime": 1510949575000, + "size": 0, + "isFolder": false + } +] +``` + +### Error Responses + +The [common responses for an authorized request](#Common-Authorization-Error-Responses) + +## File and Folder Metadata + +`GET /metadata/{path to file or folder}` + +### Headers +- `Authorization: ` + +### Success Response + +#### `200 OK` + +If the file or directory is found, a Metadata object is returned + +##### Content + +`application/json` + +##### Examples + +```json +{ + "name": "testFolder", + "path": "nixonpjoshua/testFolder", + "mtime": 1510949575000, + "size": 96, + "isFolder": true +} +``` + +```json +{ + "md5": "73cf08ad9d78d3fc826f0f265139de33", + "lineCount": "13", + "head": "there is stuff in this file\nthere is stuff in this file\nthere is stuff in this file\nthere is stuff in this file\nthere is stuff in this file\nthere is stuff in this file\nthere is stuff in this file\nstuff at the bottom\nstuff at the bottom\nstuff at the bottom", + "tail": "there is stuff in this file\nthere is stuff in this file\nthere is stuff in this file\nstuff at the bottom\nstuff at the bottom\nstuff at the bottom\nstuff at the bottom\nstuff at the bottom\nstuff at the bottom\nstuff at the bottom", + "name": "testFile", + "path": "nixonpjoshua/testFile", + "mtime": 1510949629000, + "size": 335, + "isFolder": false +} +``` + +### Error Response + +The [common responses for an authorized request](#Common-Authorization-Error-Responses) + +#### `404 Not Found` + +##### Content + +`text/plain` + +```text +path / does not exist +``` + +## Upload File + +`POST /upload` + +This most important endpoint is designed to receive one or more files in a `POST` +request sending a `multipart/form-data` body. + +The body format is described in more detail below. + +The response is a JSON object containing file metadata. + +> This might seem a bit of an odd choice, and does complicate the API. It was chosen, I +> belive because the Narrative front-end component handling the submission of file +> uploads supports `multipart/form-data`, or it may be because historically the first +> front-end implementation was a form itself, as HTML forms will send as +> `mutlipart/form-data` if a file control is used to select a file. +> +> The current front-end implementation does not require this, although it does support +> it, so I would suggest that in the future we refactor to be a simple binary body, with +> the filename specified in the url search (query param). + +### Headers + +- `Authorization: ` +- `Content-Type: multipart/form-data` + +### Body + +The multpart form-data format supports sending of multiple form fields, each with their +own metadata as well as body. + +The service requires that two fields be present in the specified order: + +- `destPath` - path file should end up in +- `uploads` - the file itself. + + +Filenames starting with whitespace or a '.' are not allowed + +The `destPath` field contains the path at which the file should be created. It will +probably have been set based on the path of the file chosen for upload in the Narrative +interface (although the service API is agnostic about the how and why.) + +The `uploads` field will contain a binary representation of the file. The file binary +content will be literaly copied into the destination file, with no encoding or validation. + +#### Example + +``` +------WebKitFormBoundaryA0xgUu1fi1whAcSB +Content-Disposition: form-data; name="destPath" + +/ +------WebKitFormBoundaryA0xgUu1fi1whAcSB +Content-Disposition: form-data; name="uploads"; filename="foo.csv" +Content-Type: text/markdown + +[binary content excluded] +------WebKitFormBoundaryA0xgUu1fi1whAcSB-- +``` + +In this example, the `destPath` is `/`, indicating that the file will be created in the +root directory of the user's staging area. + +The `uploads` field would contain the file's binary content. Note that the field +metadata includes the "filename" property indicating that the target file +name is `"foo.csv"`. + + +### Success Response + +`200 OK` + +A successful response will return a JSON object containing a metadata description of the file. + +#### Content + +`application/json` + +```json +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "array", + "items": [ + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "path": { + "type": "string" + }, + "mtime": { + "type": "integer" + }, + "size": { + "type": "integer" + }, + "isFolder": { + "type": "boolean" + } + }, + "required": [ + "name", + "path", + "mtime", + "size", + "isFolder" + ] + } + ] +} +``` + +#### Example + +```json +[ + { + "name": "fasciculatum_supercontig.fasta", + "path": "nixonpjoshua/fasciculatum_supercontig.fasta", + "mtime": 1510950061000, + "size": 31536508, + "isFolder": false + } +] +``` + +### Error Response + +The [common responses for an authorized request](#Common-Authorization-Error-Responses) + + + +## Define/Create UPA for file which has been imported + +`POST /define-upa/{path to imported file}` + +### Headers + +- `Authorization: ` +- `Content-Type: application/json` + +### Body + +The POST request body contains an object whose sole (??) property contains the `upa` to +associate with the provided file. + +#### Schema + +```json +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "upa": { + "type": "string", + "description": "the actual UPA of imported file" + } + }, + "required": [ + "upa" + ] +} +``` + +#### Example + +```json +{ + "upa": "123.4.5" +} +``` + +### Success Response + +`200 OK` + +#### Headers + +- `Content-Type: text/plain` + + +#### Body + +```text +successfully update UPA for file +``` + +### Error Responses + +The [common responses for an authorized request](#Common-Authorization-Error-Responses) + +#### `400 Bad Request` + +UPA missing + +##### Content + +`text/plain` + +``` +must provide UPA field in body +``` + + + +## Delete file or folder (will delete things contained in folder) + +`DELETE /delete/{path to file or folder}` + + +### Headers + +- `Authorization: ` + +### Success Response + +`200 OK` + +#### Headers + +- `Content-Type: text/plain` + +#### Body + +```text +successfully deleted UPA +``` + +### Error Response + +The [common responses for an authorized request](#Common-Authorization-Error-Responses) + + +#### `403 Forbidden` + +##### Headers + +- `Content-Type: text/plain` + +##### Content + +```text +cannot delete home directory +``` + +```text +cannot delete protected file +``` + +or other file access errors. + +#### `404 Not Found` + +##### Headers + +- `Content-Type: text/plain` + +##### Body + +```text +could not delete +``` + + + + +## Move/rename a file or folder + +`PATCH /mv/{path to file or folder}` + +### Headers + +- `Authorization: ` +- `Content-Type: application/json` + +### Body + +#### Schema + +```json +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "newPath": { + "type": "string", + "description": "the new location/name for file or folder" + } + }, + "required": [ + "newPath" + ] +} +``` + +#### Example + +```json +{ + "newPath": "/foo/bar" +} +``` + + +### Success Response + +`200 OK` + +#### Headers +- `Content-Type: text/plain` + +#### Body + +``` +successfully moved to +``` + +### Error Response + +The [common responses for an authorized request](#Common-Authorization-Error-Responses) + + +#### `400 Bad Request` + +If the newPath field is missing in the content body + +##### Headers + +- `Content-Type: text/plain` + +##### Body + +``` +must provide newPath field in body +``` + +#### `403 Forbidden` + +##### Headers + +- `Content-Type: text/plain` + +##### Body + +``` +cannot rename home or move directory +``` + +``` +cannot rename or move protected file +``` + +#### `409 Conflict` + +##### Headers + +- `Content-Type: text/plain` + +##### Body + +``` + already exists +``` + + + + +## Decompress various archive formats + +`PATCH /decompress/{path to archive}` + +supported archive formats are: +.zip, .ZIP, .tar.gz, .tgz, .tar.bz, .tar.bz2, .tar, .gz, .bz2, .bzip2 + + +### Headers + +- `Authorization: ` + +### Success Response + +`200 OK` + +##### Headers + +- `Content-Type: text/plain` + +##### Body + +``` +successfully decompressed +``` + +### Error Response + +The [common responses for an authorized request](#Common-Authorization-Error-Responses) + + +#### `400 Bad Request` + +##### Headers + +- `Content-Type: text/plain` + +##### Body + + +```text +cannot decompress a file +``` + + + +## Add Globus ACL + +`GET /add-acl` + +After authenticating at this endpoint, AUTH is queried to get your filepath and globus id file for +linking to globus. + +### Headers + +- `Authorization: ` + +### Success Response + +`200 OK` + +#### Headers + +- `Content-Type: application/json` + +#### Body + +##### Schema + +```json +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "principal": { + "type": "string" + }, + "path": { + "type": "string" + }, + "permissions": { + "type": "string" + } + }, + "required": [ + "success", + "principal", + "path", + "permissions" + ] +} +``` + +##### Example + +```json +{ + "success": true, + "principal": "KBase-Example-59436z4-z0b6-z49f-zc5c-zbd455f97c39", + "path": "/username/", + "permissions": "rw" +} +``` + + +### Error Response + +The [common responses for an authorized request](#Common-Authorization-Error-Responses) + + +#### `500 Internal Server Error` + +If issue with Globus API or ACL Already Exists + +##### Headers + +- `Content-Type: application/json` + +##### Body + +###### Schema + +```json +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "error_type": { + "type": "string" + }, + "error": { + "type": "string" + }, + "error_code": { + "type": "string" + }, + "shared_directory_basename": { + "type": "string" + } + }, + "required": [ + "success", + "error_type", + "error", + "error_code", + "shared_directory_basename" + ] +} +``` + +###### Content (example) + +```json +{ + "success": false, + "error_type": "TransferAPIError", + "error": "Can't create ACL rule; it already exists", + "error_code": "Exists", + "shared_directory_basename": "/username/" +} +``` + + + + +## Remove Globus ACL + +`GET /remove-acl` + +After authenticating at this endpoint, AUTH is queried to get your filepath and globus id file for +linking to globus. + +### Headers + +- `Authorization: ` + +### Success Response + +`200 OK` + +#### Headers + +- `Content-Type: application/json` + +#### Body + +##### Schema + +##### Content (example) + +```json +{ + "message": "... message elided...", + "Success": true +} +``` + +Note that the "message" is that returned by the globus api, and out of scope for +documentation here. + +> TODO: we should provide our own message, or return the globus response data, but not +> return the globus response data (a json object) into a string and call it a message! + +### Error Response + +The [common responses for an authorized request](#Common-Authorization-Error-Responses) + +#### `500 Internal Server Error` + +An issue with Globus API or ACL Already Exists + +##### Headers + +- `Content-Type: application/json` + +##### Body + +####### Schema + +```json +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "error_type": { + "type": "string" + }, + "error": { + "type": "string" + }, + "error_code": { + "type": "string" + }, + "shared_directory_basename": { + "type": "string" + } + }, + "required": [ + "success", + "error_type", + "error", + "error_code", + "shared_directory_basename" + ] +} +``` + +####### Content (example) + +**Content** + +```json +{ + "success": false, + "error_type": "TansferAPIError", + "error": "Can't create ACL rule; it already exists", + "error_code": "Exists", + "shared_directory_basename": "/username/" +} +``` + + +## Parse bulk specifications + +`GET /bulk_specification/?files=file1.[,file2.,...]` + +where `` is one of `csv`, `tsv`, `xls`, or `xlsx`. + +This endpoint parses one or more bulk specification files in the staging area into a data +structure (close to) ready for insertion into the Narrative bulk import or analysis cell. + +It can parse `.tsv`, `.csv`, and Excel (`.xls` and `.xlsx`) files. Templates for the currently +supported data types are available in the [templates](./import_specifications/templates) +directory of this repo. See the [README.md](./import_specifications/templates/README.md) file +for instructions on template usage. + +See the [import specification ADR document](./docs/import_specifications.ADR.md) for design +details. + + +### Headers + +- `Authorization: ` + +### Success Response + +`200 OK` + +#### Headers + +#### Body + +##### Schema + +> TODO + +##### Content (example) + +``` +{ + "types": { + : [ + {: , : , ...}, + {: , : , ...}, + ... + ], + : [ + {: , : , ...}, + ... + ], + ... + }, + "files": { + : {"file": "/file1.", "tab": "tabname"}, + : {"file": "/file2.", "tab": null}, + ... + } +} +``` + +* `` is a data type ID from the [Mappings.py](./staging_service/autodetect/Mappings.py) + file and the Narrative staging area configuration file - it is a shared namespace between the + staging service and Narrative to specify bulk applications, and has a 1:1 mapping to an + app. It is determined by the first header line from the templates. +* `` is the ID of an input parameter from a `KB-SDK` app's `spec.json` file. + These are determined by the second header line from the templates and will differ + by the data type. +* `` is the user-provided value for the input for a given `spec.json` ID + and import or analysis instance, where an import/analysis instance is effectively a row + in the data file. Each data file row is provided in order for each type. Each row is + provided in a mapping of `spec.json` ID to the data for the row. Lines > 3 in the templates are + user-provided data, and each line corresponds to a single import or analysis. + +### Error Response + +Error reponses are of the general form: + +``` +{ + "errors": [ + {"type": , + ... other fields depending on the error code ... + }, + ... + ] +} +``` + +Existing error codes are currently: + +* `cannot_find_file` if an input file cannot be found +* `cannot_parse_file` if an input file cannot be parsed +* `incorrect_column_count` if the column count is not as expected + * For Excel files, this may mean there is a non-empty cell outside the bounds of the data area +* `multiple_specifications_for_data_type` if more than one tab or file per data type is submitted +* `no_files_provided` if no files were provided +* `unexpected_error` if some other error occurs + +The HTTP code returned will be, in order of precedence: + +* 400 if any error other than `cannot_find_file` or `unexpected_error` occurs +* 404 if at least one error is `cannot_find_file` but there are no 400-type errors +* 500 if all errors are `unexpected_error` + +The per error type data structures are: + +#### `cannot_find_file` + +``` +{ + "type": "cannot_find_file", + "file": +} +``` + +#### `cannot_parse_file` + +``` +{ + "type": "cannot_parse_file", + "file": , + "tab": , + "message": +} +``` + +#### `incorrect_column_count` + +``` +{ + "type": "incorrect_column_count", + "file": , + "tab": , + "message": +} +``` + +#### `multiple_specifications_for_data_type` + +``` +{ + "type": "multiple_specifications_for_data_type", + "file_1": , + "tab_1": , + "file_2": , + "tab_2": , + "message": +} +``` + +#### `no_files_provided` + +``` +{ + "type": "no_files_provided" +} +``` + +#### `unexpected_error` + +``` +{ + "type": "unexpected_error", + "file": + "message": +} +``` + + + + +## Write bulk specifications + +`POST /write_bulk_specification` + +This endpoint is the reverse of the parse bulk specifications endpoint - it takes a similar +data structure to that which the parse endpoint returns and writes bulk specification templates. + +### Headers + +- `Authorization: ` +- `Content-Type: application/json` + + +#### Body + +##### Schema + +> TODO + +##### Content (example) + +``` +{ + "output_directory": , + "output_file_type": , + "types": { + : { + "order_and_display: [ + [, ], + [, ], + ... + ], + "data": [ + {: , : , ...}, + {: , : , ...} + ... + ] + }, + : { + "order_and_display: [ + [, ], + ... + ], + "data": [ + {: , : , ...}, + ... + ] + }, + ... + } +} +``` + +* `output_directory` specifies where the output files should be written in the user's staging area. +* `output_file_type` specifies the format of the output files. +* `` is a data type ID from the [Mappings.py](./staging_service/autodetect/Mappings.py) + file and the Narrative staging area configuration file - it is a shared namespace between the + staging service and Narrative to specify bulk applications, and has a 1:1 mapping to an + app. It is included in the first header line in the templates. +* `order_and_display` determines the ordering of the columns in the written templates, as well + as mapping the spec.json ID of the parameter to the human readable name of the parameter in + the display.yml file. +* `` is the ID of an input parameter from a `KB-SDK` app's `spec.json` file. + These are written to the second header line from the import templates and will differ + by the data type. +* `data` contains any data to be written to the file as example data, and is analagous to the data + structure returned from the parse endpoint. To specify that no data should be written to the + template provide an empty list. +* `` is the value for the input for a given `spec.json` ID + and import or analysis instance, where an import/analysis instance is effectively a row + in the data file. Each data file row is provided in order for each type. Each row is + provided in a mapping of `spec.json` ID to the data for the row. Lines > 3 in the templates are + user-provided data, and each line corresponds to a single import or analysis. + + +### Success Response + +`200 OK` + +#### Headers + +- `Content-Type: application/json` + +#### Body + +##### Schema + +> TODO + +##### Content (example) + +```json +{ + "output_file_type": , + "files": { + : , + ... + : , + } +} +``` + +* `output_file_type` has the same definition as above. +* `files` contains a mapping of each provided data type to the output template file for that type. + In the case of Excel, all the file paths will be the same since the data types are all written + to different tabs in the same file. + +### Error Response + +Method specific errors have the form: + +```json +{"error": ""} +``` + +The error code in this case will be a 4XX error. + +The AioHTTP server may also return built in errors that are not in JSON format - an example of +this is overly large (> 1MB) request bodies. + + + +## Get Importer Mappings + +`POST /importer_mappings` + +This endpoint returns: + +1) a mapping between a list of files and predicted importer apps, and +2) a file information list that includes the input file names split between the file prefix and + the file suffix, if any, that was used to determine the file -> importer mapping, and a list + of file types based on the file suffix. If a file has a suffix that does not match + any mapping (e.g. `.sys`), the suffix will be `null`, the prefix the entire file name, and + the file type list empty. + +For example, + +* if we pass in nothing we get a response with no mappings +* if we pass in a list of files, such as ["file1.fasta", "file2.fq", "None"], we would get back a + response that maps to Fasta Importers and FastQ Importers, with a weight of 0 to 1 + which represents the probability that this is the correct importer for you. +* for files for which there is no predicted app, the return is a null value +* this endpoint is used to power the dropdowns for the staging service window in the Narrative + + +### Headers + +none + +### Success Response + +`200 OK` + +#### Headers + +- `Content-Type: application/json` + +#### Body + +##### Schema + +> TODO + +##### Content (example) + +```python +data = {"file_list": ["file1.txt", "file2.zip", "file3.gff3.gz"]} + async with AppClient(config, username) as cli: + resp = await cli.post( + "importer_mappings/", data=data + ) +``` + +### Success Response + +`200 OK` + +#### Headers + +- `Content-Type: application/json` + +#### Body + +##### Schema + +> TODO + +##### Content (example) + +```json +{ + "mappings": [ + null, + [{ + "id": "decompress", + "title": "decompress/unpack", + "app_weight": 1, + }], + [{ + "app_weight": 1, + "id": "gff_genome", + "title": "GFF/FASTA Genome", + }, + { + "app_weight": 1, + "id": "gff_metagenome", + "title": "GFF/FASTA MetaGenome", + }] + ], + "fileinfo": [ + {"prefix": "file1.txt", "suffix": null, "file_ext_type": []}, + {"prefix": "file2", "suffix": "zip", "file_ext_type": ["CompressedFileFormatArchive"]}, + {"prefix": "file3", "suffix": "gff3.gz", "file_ext_type": ["GFF"]} + ] +} +``` + +### Error Responses + +> TODO: hopefully the typical 401/400 auth errors are returned + +`400 Bad Request` + +#### Headers + +- `Content-Type: text/plain` + +#### Body + +``` +must provide file_list field +``` + + + +## Get importer filetypes + +`GET /importer_filetypes` + +This endpoint returns information about the file types associated with data types and the file +extensions for those file types. It is primarily of use for creating UI elements describing +which file extensions may be selected when performing bulk file selections. + +### Headers + +none + +### Success Response + +`200 OK` + +#### Headers + +#### Body + +##### Schema + +> TODO + +##### Content (example) + +``` +{ + "datatype_to_filetype": { + : [, ... ], + ... + : [, ... ], + }, + "filetype_to_extensions": { + : [, ..., ], + ... + : [, ..., ], + } +} +``` + +* `` is a data type ID from the [Mappings.py](./staging_service/autodetect/Mappings.py) + file and the Narrative staging area configuration file - it is a shared namespace between the + staging service and Narrative to specify bulk applications, and has a 1:1 mapping to an + import app. It is included in the first header line in the templates. +* `` is a file type like `FASTA` or `GENBANK`. The supported file types are listed + below. +* `` is a file extension like `*.fa` or `*.gbk`. + +## Autodetect App and File Type IDs + +### App type IDs + +These are the currently supported upload app type IDs: + +``` +fastq_reads_interleaved +fastq_reads_noninterleaved +sra_reads +genbank_genome +gff_genome +gff_metagenome +expression_matrix +media +fba_model +assembly +phenotype_set +sample_set +metabolic_annotation +metabolic_annotation_bulk +escher_map +decompress +``` + +Note that decompress is only returned when no other file type can be detected from the file +extension. + +### File type IDs + +These are the currently supported file type IDs. These are primarily useful for apps that take +two different file types, like GFF/FASTA genomes. + +``` +FASTA +FASTQ +SRA +GFF +GENBANK +SBML +JSON +TSV +CSV +EXCEL +CompressedFileFormatArchive +``` + +## Common Authorization Error Responses + +### `401 Unauthorized` + +Authentication is incorrect + +#### Content + +`text/plain` + +```text +Error Connecting to auth service ... +``` + +### `400 Bad Request` + +Results if the `Authorization` header field is absent or empty + +> TODO: this should be a 403 + +#### Content + +`text/plain` + +```text +Must supply token +``` diff --git a/docs/development/development-tools.md b/docs/development/development-tools.md new file mode 100644 index 00000000..fe28df1c --- /dev/null +++ b/docs/development/development-tools.md @@ -0,0 +1,119 @@ +# Development Tools + +This document provides an overview of two different but related development approaches - +devcontainers (via Visual Studio Code), and command line tools via a Docker container. + +## Visual Studio Code devcontainer + +One of your best friends for development may be Visual Studio Code (VSC) and a devcontainer. +This repo contains a VSC devcontainer all ready for use. + +> At the time of writing, devcontainer support for PyCharm is not mature enough to use; +> too many features are missing. However, in the near future this should also be a very +> productive development environment. + +A devcontainer provides a stable, reproducible development platform. For this repo, it +is based on the Python 3.11 debian image, the same one used for deployment. In fact, the +devcontainer should reflect the same OS and Python environment as the deployment and the +development tools images. This feature is important for reproducibility, and providing +the least surprise! + +There is also support for locally hosted development via VSC, but this developer (EAP) +does not do this style of Python development. + + +### Getting Started + +1. Ensure you have docker running. + +2. Open the project in VSC + +There are a few ways to open a project in VSC: +- from the repo directory `code .` +- from VSC menu: + - File > New Window + - From the new window: + - Click on Explorer in the left-nav, then click on Open Folder + - or + - Selecte File > Open Folder + - Select the folder containing the repo (project) + - Click Open + +3. Start the devcontainer. + +- press the keys `Shift` - `Command` - `P` (macOS) to open the command palette +- start typing "Dev Container: Reopen Folder in Container" +- when you see it appear, click on it +- the image will build, the container will start + +4. Open a terminal + +If a terminal is not already open, open the built-in terminal with `Control` - `~` (that is a tilde) + +5. Depending on the docker installation, you may need to grant access. If you don't see + git things enabled, click on the Git tool, and follow the instructions. + +Now you should treat this just like a local environment. The Python dependencies will +already be installed. If you have changes to them, however, you may simply make the +change to the requirements file and then reinstall them from the VSC devcontainer +terminal. + +### Running Tools + +All of the tools available for running from the host via docker are also directly +available within the devcontainer. I still often run them from a host termianl anyway, as +I can control the position and display of the native terminal a bit better, and it is +also decoupled from any issues VSC may have. YMMV. + +Here are the common ones: + +- `mypy staging_service` +- `black staging_service` +- `isort staging_service` +- `./run_tests.sh ` + +### Running the server + +It is very efficacious to develop the server as it is running, and to exercise new or +changed endpoints directly. This can be through a local host and port, or via kbase-ui +integration and an official kbase environment host. + +In either case, the startup of the service is the same: + +```shell +FILE_LIFETIME="90" KB_DEPLOYMENT_CONFIG="${PWD}/deployment/conf/local.cfg" PYTHONPATH="${PWD}/staging_service" python -m staging_service +``` + +The `FILE_LIFETIME` environment variable is required, and sets the file retention policy. + +We ensure that the configuration is available via `KB_DEPLOYMENT_CONFIG`. In our case we +are using a configuration file already prepared for local usage. If you inspect it, +you'll find that all directory references are relative to the current directory. We are +referecing the `local.cfg` local configuration where it is stored in the codebase. + +We also ensure that the `staging_service` is included in the `PYTHONPATH`. + +Finally, we invoke the service simply by starting the bare aiohttp server invocation +located in `staging_service/__main__.py`. + +As a quick sanity test once the service is running, try this from a host terminal: + + +```shell +curl http://localhost:3000/test-service +``` + +> TODO: The service should have a `/status` endpoint, not `/test-service`. + +## Host Tools + +The code checking tools can all be run from the host via the `run` script. This script +actually runs the tools in a Docker container which is very similar to the devcontainer +and service Docker configurations. + +To run the tools: + +- `./development/scripts/run mypy staging_service` +- `./development/scripts/run black staging_service` +- `./development/scripts/run isort staging_service` +- `./development/scripts/run ./run_tests.sh ` diff --git a/docs/development/index.md b/docs/development/index.md new file mode 100644 index 00000000..45ef30be --- /dev/null +++ b/docs/development/index.md @@ -0,0 +1,5 @@ +# Development + +- [Original docs from readme](./original-doc.md) +- [Development Tools](./development-tools.md) +- [Maintaining the Dockerfiles](./maintaining-dockerfile.md) \ No newline at end of file diff --git a/docs/development/maintaining-dockerfile.md b/docs/development/maintaining-dockerfile.md new file mode 100644 index 00000000..074b123c --- /dev/null +++ b/docs/development/maintaining-dockerfile.md @@ -0,0 +1,48 @@ +# Maintaining Dockerfile + +## Base Image + +The Base Image should be an official Python distribution. We should prefer the most +recent release, when possible. + +## OS version + +The OS should be debian, as debian/ubunto are widely used at KBase. The version should +be the current stable. The LTS version, which is often several versions behind stable, +does not seem to be supported by recent Python distributions. That is, to get the most +recent Python versions requires we can't use LTS, and conversely if we wished to use the +LTS debian we wouldn't be able to use the most recent Python releases. + +It is more important to have a current Python, than a current OS. The OS really isn't +much of a concern to the service, as long as the required OS-level dependencies are +available. Python itself takes care of the interface to the OS, and that is all we are +concerned with. + +## OS dependencies + +The OS dependencies are indicated in the Dockerfile as exact versions. This ensures that +images are consistent, even as the base image evolves over time. + +## Different Dockerfiles + +At present, there are three Dockerfiles which must be kept in synchrony. They differ +enough that the inconvenience of keeping them consistent seems worth the effort. + +- `./Dockerfile` + - used for the production image + - needs to copy all service files + - does not include development Python dependencies + - has the production entrypoint +- `./.devcontainer/Dockerfile` + - used for the devcontainer workflow + - does not copy service files + - contains development Python dependencies + - no entrypoint as that is provided by the docker-compose.yml +- `./development/tools/Dockerfile` + - used for the host cli tools + - does not copy service files + - contains development Python dependencies + - has special entrypoint which execs whatever command is passed from the command + line + + diff --git a/docs/development/original-doc.md b/docs/development/original-doc.md new file mode 100644 index 00000000..234bff1f --- /dev/null +++ b/docs/development/original-doc.md @@ -0,0 +1,64 @@ +# staging_service + +In order to setup local development, you must have docker installed and if you want to run it locally you must have python 3.11.5 or greater installed + +## setup + +make a folder called /data as well as inside that /bulk and inside that a folder for any usernames you wish it to work with + +data + -bulk + -username + -username + +if you want to run locally you must install requirements.txt for python3 + +## running + +to run locally run `/deployment/bin/entrypoint.sh` + +to run inside docker run `/run_in_docker.sh` + +to run in coordination with the kbase-ui development proxy, enabling it to serve locally as a back end for Narrative, kbase-ui and other services: + +```shell +make run-dev +``` + +## tests + +### Run on host +* to test use `./run_tests.sh` +* requires python 3.11.4 or higher +* requires installation on mac of libmagic `brew install libmagic` or `sudo port install libmagic` + +### Run in container + +You can also run tests in a container which uses the same base image and uses the same dependencies. (This container can also run other python tasks.) + +```shell +./development/scripts/run run_tests.sh +``` + +To run tests in individual test file you may supply the path to it. By default, the tests run against `tests/`. + +```shell +./development/scripts/run run_tests.sh tests/test_app.py +``` + +## debugging + +Included configurations for the Visual Studio Code debugger for python that mirror what is in the entrypoint.sh and testing configuration to run locally in the debugger, set breakpoints and if you open the project in VSCode the debugger should be good to go. The provided configurations can run locally and run tests locally + +## development + +When releasing a new version: + +* Update the release notes +* Update the version in [staging_service/app.py](staging_service/app.py).VERSION + +## expected command line utilities + +to run locally you will need all of these utils on your system: tar, unzip, zip, gzip, bzip2, md5sum, head, tail, wc + +in the docker container all of these should be available From f23e9255153616272d3beec102ac9ea034ab9c4b Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 16:26:14 -0700 Subject: [PATCH 10/40] update release notes, update GHA test to use python 3.11.5 --- .github/workflows/run_tests.yml | 4 ++-- RELEASE_NOTES.md | 7 +++++-- docs/development/original-doc.md | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 262a872e..1726e753 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -16,11 +16,11 @@ jobs: - name: Set up Python 3.11 uses: actions/setup-python@v2 with: - python-version: 3.11.4 + python-version: 3.11.5 - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt + pip install -r requirements.txt -r requirements_dev.txt - name: Test with pytest run: | bash run_tests.sh diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 8114a293..a74bb4dd 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -2,8 +2,11 @@ ## Unreleased -- update to Python 3.11.4 -- run black on all service and test files, fix most linting complaints (pylint, +- update to Python 3.11.5 +- images base is python 3.11.5 / debian bullseye +- pin all image dependencies to those currently supported by the debian bullseye +- update all Python dependencies to latest versions +- run black on all service and test files, fix most linting complaints (pylint, sonarlint) ## Version 1.3.6 diff --git a/docs/development/original-doc.md b/docs/development/original-doc.md index 234bff1f..0797581c 100644 --- a/docs/development/original-doc.md +++ b/docs/development/original-doc.md @@ -29,7 +29,7 @@ make run-dev ### Run on host * to test use `./run_tests.sh` -* requires python 3.11.4 or higher +* requires python 3.11.5 or higher * requires installation on mac of libmagic `brew install libmagic` or `sudo port install libmagic` ### Run in container From aba6c7e9cc2625ec87f5129632242640c3a34371 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 16:30:37 -0700 Subject: [PATCH 11/40] fix sonar warning [PTV-1888] --- development/scripts/generate-random-strings.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/development/scripts/generate-random-strings.py b/development/scripts/generate-random-strings.py index ce754add..35e187fd 100644 --- a/development/scripts/generate-random-strings.py +++ b/development/scripts/generate-random-strings.py @@ -10,7 +10,9 @@ def make_random_string(string_length: int) -> str: Generate a string of random ascii letters of the given length """ possible_letters = string.ascii_letters - return "".join(random.choice(possible_letters) for i in range(string_length)) + # ignore the SONAR warning below; this is just for generating test data, security is + # not an issue. + return "".join(random.choice(possible_letters) for i in range(string_length)) # NOSONAR if __name__ == "__main__": From e5c191544c6aa5a3eeaf315c3ca546b814377601 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 16:32:37 -0700 Subject: [PATCH 12/40] code quality gatekeeper changes [PTV-1888] --- development/scripts/generate-random-strings.py | 2 +- tests/test_app.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/development/scripts/generate-random-strings.py b/development/scripts/generate-random-strings.py index 35e187fd..cbf40479 100644 --- a/development/scripts/generate-random-strings.py +++ b/development/scripts/generate-random-strings.py @@ -12,7 +12,7 @@ def make_random_string(string_length: int) -> str: possible_letters = string.ascii_letters # ignore the SONAR warning below; this is just for generating test data, security is # not an issue. - return "".join(random.choice(possible_letters) for i in range(string_length)) # NOSONAR + return "".join(random.choice(possible_letters) for _ in range(string_length)) # NOSONAR if __name__ == "__main__": diff --git a/tests/test_app.py b/tests/test_app.py index aeaefc54..dede25df 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -655,7 +655,6 @@ async def test_download(txt): username = "testuser" with FileUtil() as fs: async with AppClient(config, username) as cli: - # TODO: here and elsewhere, we should probably use a fake fs. # Creates the test file (test_file_1) with text provided by # hypothesis. fs.make_dir(os.path.join(username, "test")) From 62daccb4b6aa22f9c9b000ff20ddf82e1fbce290 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 17:17:14 -0700 Subject: [PATCH 13/40] run tests in our container [PTV-1888] --- .github/workflows/run_tests.yml | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 1726e753..9868b523 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -1,26 +1,10 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - name: Run Staging Service Tests - on: [pull_request] - jobs: build: - runs-on: ubuntu-latest - steps: - uses: actions/checkout@v2 - - name: Set up Python 3.11 - uses: actions/setup-python@v2 - with: - python-version: 3.11.5 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt -r requirements_dev.txt - - name: Test with pytest - run: | - bash run_tests.sh + - name: Run the tests + run: ./development/scripts/run test \ No newline at end of file From b68c168361b292c62b9f5fbf24d1c28cac99254e Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 17:29:37 -0700 Subject: [PATCH 14/40] run tests in our container [PTV-1888] --- .github/workflows/run_tests.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 9868b523..faa3dac8 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -5,6 +5,8 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - name: Checkout the repo + uses: actions/checkout@v3 + - name: Run the tests run: ./development/scripts/run test \ No newline at end of file From 0f90a226e24f6c75b754d384bd634066c19b3de2 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 17:34:24 -0700 Subject: [PATCH 15/40] improve script invocation [PTV-1888] --- .github/workflows/run_tests.yml | 2 +- development/scripts/run | 3 ++- run_tests.sh | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index faa3dac8..4a228886 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -9,4 +9,4 @@ jobs: uses: actions/checkout@v3 - name: Run the tests - run: ./development/scripts/run test \ No newline at end of file + run: ./development/scripts/run run_tests.sh \ No newline at end of file diff --git a/development/scripts/run b/development/scripts/run index 219486f2..c5becdfd 100755 --- a/development/scripts/run +++ b/development/scripts/run @@ -1,4 +1,5 @@ -#!/bin/sh +#!/usr/bin/env bash + PROJECT_DIRECTORY=${DIR:-$PWD} echo "Project Directory: ${PROJECT_DIRECTORY}" docker compose \ diff --git a/run_tests.sh b/run_tests.sh index 52ea58d5..c16ab8d5 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash DIR="$( cd "$( dirname "$0" )" && pwd )" export KB_DEPLOYMENT_CONFIG="$DIR/deployment/conf/testing.cfg" export FILE_LIFETIME="90" @@ -10,5 +10,5 @@ echo "** Running tests in ${TESTS}" echo "**" echo "****************************" echo -python3 -m pytest -s -vv --cov=staging_service --cov-report term --cov-report html "${TESTS}" +python -m pytest -s -vv --cov=staging_service --cov-report term --cov-report html "${TESTS}" From b79f478d623729bdbbf7577f524baa18f7223e40 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 17:41:53 -0700 Subject: [PATCH 16/40] somehow wasn't bookworm --- Dockerfile | 2 +- RELEASE_NOTES.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7773cab7..84aee833 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.11.5-slim-bullseye +FROM python:3.11.5-slim-bookworm # ----------------------------------------- RUN mkdir -p /kb/deployment/lib RUN apt-get update && \ diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index a74bb4dd..1921e4aa 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -3,7 +3,7 @@ ## Unreleased - update to Python 3.11.5 -- images base is python 3.11.5 / debian bullseye +- images base is python 3.11.5 / debian bookworm - pin all image dependencies to those currently supported by the debian bullseye - update all Python dependencies to latest versions - run black on all service and test files, fix most linting complaints (pylint, From 153f85250152d2503120e858a4fb905166b19470 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 17:54:48 -0700 Subject: [PATCH 17/40] don't run as regular user. [PTV-1888] --- development/tools/runner/Dockerfile | 8 ++++---- run_tests.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/development/tools/runner/Dockerfile b/development/tools/runner/Dockerfile index 30035426..512bbe5f 100644 --- a/development/tools/runner/Dockerfile +++ b/development/tools/runner/Dockerfile @@ -23,11 +23,11 @@ RUN python -m pip install "pip==23.2.1" && pip install -r /app/requirements.txt ENV PATH="/app:${PATH}" # Run as a regular user, not root. -RUN addgroup --system kbapp && \ - adduser --system --ingroup kbapp kbapp && \ - chown -R kbapp:kbapp /app +# RUN addgroup --system kbapp && \ +# adduser --system --ingroup kbapp kbapp && \ +# chown -R kbapp:kbapp /app -USER kbapp +# USER kbapp ENTRYPOINT [ "/app/development/tools/runner/entrypoint.sh" ] diff --git a/run_tests.sh b/run_tests.sh index c16ab8d5..47b8b6e4 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash DIR="$( cd "$( dirname "$0" )" && pwd )" -export KB_DEPLOYMENT_CONFIG="$DIR/deployment/conf/testing.cfg" +export KB_DEPLOYMENT_CONFIG="${DIR}/deployment/conf/testing.cfg" export FILE_LIFETIME="90" export TESTS="${1:-tests}" echo From b19cd9cff00149a21622f6f8939ab53cc11e9e3b Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 18:06:10 -0700 Subject: [PATCH 18/40] be more tolerant of different st_mtime implementations [PTV-1888] --- tests/test_app.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index dede25df..c54a19e7 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -584,8 +584,14 @@ async def test_list(): # FAILED tests/test_app.py::test_list - assert 1693508770000 <= # (1693508769.9508653 * 1000) # Thursday, August 31, 2023 7:06:10 PM - # Thursday, August 31, 2023 7:06:09.950 PM - assert json[0]["mtime"] <= round(time.time()) * 1000 + # Thursday, August 31, 2023 7:06:09.950 PM + # Hmm, so although that was observed locally, it is not working that way + # on GHA-hosted containers, so let's assume that is due to the base Linux + # system (Ubuntu-latest), and make this more resilient. Let's round both, + # then + diff = json[0]["mtime"] - time.time() * 1000 + assert abs(diff) < 3 + # assert json[0]["mtime"] <= round(time.time()) * 1000 assert len(file_folder_count) == 4 # 2 folders and 2 files assert sum(file_folder_count) == 2 @@ -598,7 +604,9 @@ async def test_list(): assert json[0]["isFolder"] is True assert json[0]["name"] == "test" assert json[0]["path"] == "testuser/test" - assert json[0]["mtime"] <= round(time.time()) * 1000 + diff = json[0]["mtime"] - time.time() * 1000 + assert abs(diff) < 3 + # assert json[0]["mtime"] <= round(time.time()) * 1000 assert len(file_folder_count) == 4 # 2 folders and 2 files assert sum(file_folder_count) == 2 From 806dca87ece160e6027a42419fe7e896ce184bf2 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 1 Sep 2023 18:15:14 -0700 Subject: [PATCH 19/40] erik, you are too tired [PTV-1888] --- tests/test_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index c54a19e7..2678f844 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -589,7 +589,7 @@ async def test_list(): # on GHA-hosted containers, so let's assume that is due to the base Linux # system (Ubuntu-latest), and make this more resilient. Let's round both, # then - diff = json[0]["mtime"] - time.time() * 1000 + diff = (json[0]["mtime"] - time.time() * 1000)/1000 assert abs(diff) < 3 # assert json[0]["mtime"] <= round(time.time()) * 1000 assert len(file_folder_count) == 4 # 2 folders and 2 files @@ -604,7 +604,7 @@ async def test_list(): assert json[0]["isFolder"] is True assert json[0]["name"] == "test" assert json[0]["path"] == "testuser/test" - diff = json[0]["mtime"] - time.time() * 1000 + diff = (json[0]["mtime"] - time.time() * 1000)/1000 assert abs(diff) < 3 # assert json[0]["mtime"] <= round(time.time()) * 1000 assert len(file_folder_count) == 4 # 2 folders and 2 files From f0432289240b295531e219c4f70e23d413375fb5 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 07:59:08 -0700 Subject: [PATCH 20/40] clarify test condition comment [PTV-1888] --- tests/test_app.py | 62 +++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index 2678f844..e771ab7d 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -31,6 +31,8 @@ decoder = JSONDecoder() +MTIME_PRECISION = 3 + config = configparser.ConfigParser() config.read(os.environ["KB_DEPLOYMENT_CONFIG"]) @@ -569,29 +571,42 @@ async def test_list(): assert json[0]["isFolder"] is True assert json[0]["name"] == "test" assert json[0]["path"] == "testuser/test" - # This can fail due to the calculation of mtime, which may be have the - # precision of time.time(). That is, in my observations it may be in - # seconds. If the mtime is rounded up, the test will quite likely fail as - # the file mtime will be greater than the current time returned by - # time.time(). - # The solution applied is to round the expected time up. - # This will fail, still, on Windows, where apparently st_mtime has 2 - # second precision. - # Alternatively, the service implementation could use finer precition time - # measurement via st_mtime_ns, although that is also platform-dependent - # (i.e. it is not always nanosecond precise.)) - # E.g. + + # This test is meant to prove that the time of the file just created is less + # than the current time measured immediately afterward. This carries some + # truth, as the actions in a single-threaded application should be in + # chronological sequence. + # However, a naive implementation can fail due to the manner in which the + # file modification time, or "mtime" is measured. In some systems it may + # have precision of "second", even if the underlying system can measure time + # at higher precision. In this case, the time may also be "rounded up" to + # the nearest second. If the comparision time has a highter precision, e.g. + # ms or ns, the comparison time may actually appear to be BEFORE the file's + # mtime which was actually measured BEFORE. + + # One solution could be to round the comparison time, but that would implie + # that we KNOW that the system will round up to the second. But we don't + # actually know that, although we could measure that through sampling, I + # suppose. + + # In any case, the method we choose to use is to change the test condition a + # bit. We just want to assert that the file's mtime, as provided by the api, + # is within the ballpark of the current time. We define this ballpark as 3 + # seconds, since in at least some cases, Windows mtime precision is 2 seconds. + + # Here is an actual example of a terst that failed with the naive + # implementation: + # FAILED tests/test_app.py::test_list - assert 1693508770000 <= # (1693508769.9508653 * 1000) # Thursday, August 31, 2023 7:06:10 PM - # Thursday, August 31, 2023 7:06:09.950 PM - # Hmm, so although that was observed locally, it is not working that way - # on GHA-hosted containers, so let's assume that is due to the base Linux - # system (Ubuntu-latest), and make this more resilient. Let's round both, - # then - diff = (json[0]["mtime"] - time.time() * 1000)/1000 - assert abs(diff) < 3 - # assert json[0]["mtime"] <= round(time.time()) * 1000 + # Thursday, August 31, 2023 7:06:09.950 PM + + # The "mtime" in the structure is in ms. + + diff = json[0]["mtime"]/1000 - time.time() + assert abs(diff) < MTIME_PRECISION + assert len(file_folder_count) == 4 # 2 folders and 2 files assert sum(file_folder_count) == 2 @@ -604,9 +619,10 @@ async def test_list(): assert json[0]["isFolder"] is True assert json[0]["name"] == "test" assert json[0]["path"] == "testuser/test" - diff = (json[0]["mtime"] - time.time() * 1000)/1000 - assert abs(diff) < 3 - # assert json[0]["mtime"] <= round(time.time()) * 1000 + + diff = json[0]["mtime"]/1000 - time.time() + assert abs(diff) < MTIME_PRECISION + assert len(file_folder_count) == 4 # 2 folders and 2 files assert sum(file_folder_count) == 2 From c5ee5a0886f61954edb14b5aaf2ad03464d736a7 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 16:20:36 +0000 Subject: [PATCH 21/40] add markdown lint, fix linting errors in all markdown files [PTV-1888] --- .devcontainer/devcontainer.json | 3 +- .markdownlint.yaml | 8 + README.md | 20 +- RELEASE_NOTES.md | 88 +++-- docs/api.md | 351 ++++++++----------- docs/development/development-tools.md | 24 +- docs/development/maintaining-dockerfile.md | 30 +- docs/development/original-doc.md | 35 +- docs/import_specifications.ADR.md | 149 ++++---- scripts/run-dev-server.sh | 3 + run_in_docker.sh => scripts/run_in_docker.sh | 0 run_tests.sh => scripts/run_tests.sh | 0 12 files changed, 355 insertions(+), 356 deletions(-) create mode 100644 .markdownlint.yaml create mode 100755 scripts/run-dev-server.sh rename run_in_docker.sh => scripts/run_in_docker.sh (100%) rename run_tests.sh => scripts/run_tests.sh (100%) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 84e5909c..9c70bcb6 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -21,7 +21,8 @@ "ms-python.black-formatter", "ms-python.flake8", "mikoz.autoflake-extension", - "ms-azuretools.vscode-docker" + "ms-azuretools.vscode-docker", + "DavidAnson.vscode-markdownlint" ] } } diff --git a/.markdownlint.yaml b/.markdownlint.yaml new file mode 100644 index 00000000..1db5789b --- /dev/null +++ b/.markdownlint.yaml @@ -0,0 +1,8 @@ +# We don't want the annoying warnings when there are duplicated non-sibling headers as +# many types of docs have a pattern for headings that is repeated throughout the document. +MD024: + siblings_only: true + +MD013: + line_length: 88 + code_blocks: false \ No newline at end of file diff --git a/README.md b/README.md index 867749da..e2a7eeb2 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ documentation](https://docs.kbase.us/getting-started/narrative/add-data). ## Installation -The service is designed to be packagd into a Docker image and run as a container. +The service is designed to be packagd into a Docker image and run as a container. For deployment, the image is built by GitHub actions and stored in GitHub Container Registry. @@ -30,7 +30,21 @@ or from a locally-built image. ## Usage -> TBD +There are three basic usage scenarios - development, local deployment, and production +deployment. + +Development has [its own documentation](./docs/development/inde.md). + +Production deployment is out of scope (though it can be larged deduced from local +deployment). + +Local deployement is as easy as running + +```shell +./scripts/run-dev-server.sh +``` + +from within the devcontainer. ## API @@ -48,8 +62,6 @@ Forks are not supported at this time. For development support, see [the development docs](./docs/development.md) - ## License See [the KBase License](./LICENSE.md) - diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 1921e4aa..4caf5ab5 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -3,85 +3,101 @@ ## Unreleased - update to Python 3.11.5 -- images base is python 3.11.5 / debian bookworm +- image base is python 3.11.5 / debian bookworm - pin all image dependencies to those currently supported by the debian bullseye - update all Python dependencies to latest versions - run black on all service and test files, fix most linting complaints (pylint, sonarlint) +- add markdownlint plugin to devcontainer, config file to repo, and fix all linting + errors in markdown files. ## Version 1.3.6 -- Fixed a bug that would cause NaN and Inf values in xSV to be returned as JSON barewords, which could cause some JSON parsers to fail. They are now returned as strings. -- Changed the Excel parser to not consider NaN and Inf as missing values to maintain consistency with the xSV parsers +- Fixed a bug that would cause NaN and Inf values in xSV to be returned as JSON + barewords, which could cause some JSON parsers to fail. They are now returned as + strings. +- Changed the Excel parser to not consider NaN and Inf as missing values to maintain + consistency with the xSV parsers ## Version 1.3.5 -- Fixed a bug that under some circumstances could cause incomplete file metadata to be returned. +- Fixed a bug that under some circumstances could cause incomplete file metadata to be returned. ## Version 1.3.4 -- Alter the behavior of the bulk specification file writers to return an error if the input `types` parameter is empty. -- Fixed a bug in the csv/tsv bulk specification parser that would case a failure if the first header of a file had trailing separators. This occurs if a csv/tsv file is opened and saved by Excel. +- Alter the behavior of the bulk specification file writers to return an error if the + input `types` parameter is empty. +- Fixed a bug in the csv/tsv bulk specification parser that would case a failure if the + first header of a file had trailing separators. This occurs if a csv/tsv file is + opened and saved by Excel. ## Version 1.3.3 -- Fixed a bug in the csv/tsv bulk specification parser that would include an empty entry for each empty line in the file. +- Fixed a bug in the csv/tsv bulk specification parser that would include an empty entry + for each empty line in the file. ## Version 1.3.2 -- Add `write_bulk_specification` endpoint for writing bulk specifications -- Add `import_filetypes` endpoint for getting datatype -> filetype -> extension mappings -- Fixed a bug in the csv/tsv bulk specification parser that would throw an error on any empty lines in the file, even at the end. The parser now ignores empty lines the same way the Excel parser does. -- Fixed a bug in the `bulk_specification` endpoint where a missing header item in a `*.?sv` file would cause it to be replaced with a strange name. -- Fixed a bug in the `bulk_specification` endpoint where a missing header item in an Excel file would cause a duplicate header error rather than a missing header error. -- As part of the two fixes above, some error message text has changed due to the rewrite of the parsers. +- Add `write_bulk_specification` endpoint for writing bulk specifications +- Add `import_filetypes` endpoint for getting datatype -> filetype -> extension mappings +- Fixed a bug in the csv/tsv bulk specification parser that would throw an error on any + empty lines in the file, even at the end. The parser now ignores empty lines the same + way the Excel parser does. +- Fixed a bug in the `bulk_specification` endpoint where a missing header item in a + `*.?sv` file would cause it to be replaced with a strange name. +- Fixed a bug in the `bulk_specification` endpoint where a missing header item in an + Excel file would cause a duplicate header error rather than a missing header error. +- As part of the two fixes above, some error message text has changed due to the rewrite + of the parsers. ## Version 1.3.1 -- added the `files` key to the returned data from the `bulk_specification` endpoint. +- added the `files` key to the returned data from the `bulk_specification` endpoint. ## Version 1.3.0 -- Update to Python 3.9 -- Add `bulk_specification` endpoint for parsing import specifications +- Update to Python 3.9 +- Add `bulk_specification` endpoint for parsing import specifications ## Version 1.2.0 -- BACKWARDS INCOMPATIBILITY: remove the unused `apps` key from the importer mappings endpoint. -- added a `fileinfo` field to the return of the importer mappings endpoint that includes the file prefix, suffix, and file type(s), if any. -- reverted change to expose dotfiles in the api by default -- attempting to upload a dotfile will now cause an error +- BACKWARDS INCOMPATIBILITY: remove the unused `apps` key from the importer mappings endpoint. +- added a `fileinfo` field to the return of the importer mappings endpoint that includes + the file prefix, suffix, and file type(s), if any. +- reverted change to expose dotfiles in the api by default +- attempting to upload a dotfile will now cause an error ## Version 1.1.9 -- Added support for Genbank `*.gb` and `*.gbff` extensions -- Added support for gzipped Reads, Assemblies, Genbank Files, and GFF files. +- Added support for Genbank `*.gb` and `*.gbff` extensions +- Added support for gzipped Reads, Assemblies, Genbank Files, and GFF files. ## Version 1.1.8 -- Added new endpoint `importer-mappings/` for getting a mapping of importers for file names -- Updated endpoint to use GET and query_string -- Ran black -- BUGFIX: Change head/tail functionality to return 1024 chars to avoid bad cases with really large one line files -- Update FASTQ/SRA to use their own APP Uis +- Added new endpoint `importer-mappings/` for getting a mapping of importers for file names +- Updated endpoint to use GET and query_string +- Ran black +- BUGFIX: Change head/tail functionality to return 1024 chars to avoid bad cases with + really large one line files +- Update FASTQ/SRA to use their own APP Uis ## Version 1.1.7 -- Add a file name check to void user uploading files with name starting with space -- Update list endpoint so that it only hide .globus_id file by default +- Add a file name check to void user uploading files with name starting with space +- Update list endpoint so that it only hide .globus_id file by default ## Version 1.1.3 -- Add a add-acl-concierge endpoint -- Added configs for that endpoint -- Added options to dockerfile/docker-compose +- Add a add-acl-concierge endpoint +- Added configs for that endpoint +- Added options to dockerfile/docker-compose ## Version 1.1.2 -- Added capability to check 'kbase_session_backup' cookie -- Added a `add-acl` and `remove-acl` endpoint for globus endpoint access -- Change logging to STDOUT +- Added capability to check 'kbase_session_backup' cookie +- Added a `add-acl` and `remove-acl` endpoint for globus endpoint access +- Change logging to STDOUT ### Version 1.1.0 -- Added a `download` endpoint for files +- Added a `download` endpoint for files diff --git a/docs/api.md b/docs/api.md index ea4298f1..a8fb8e1b 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1,27 +1,26 @@ -## API +# API - all paths should be specified treating the user's home directory as root - The url base, in an actualy deployment, will be: - - https://ci.kbase.us for CI - - https://next.kbase.us for Next - - https://appdev.kbase.us for Appdev - - https://kbase.us for Production + - `https://ci.kbase.us` for CI + - `https://next.kbase.us` for Next + - `https://appdev.kbase.us` for Appdev + - `https://kbase.us` for Production - The base path in all environments is `/services/staging_service` - All api paths will be suffixed to the url base and base path. For example: - `https://ci.kbase.us/services/staging_service/file-lifetime` - + `https://ci.kbase.us/services/staging_service/file-lifetime` will invoke the `file-lifetime` endpoint, which returns the number of days a file will be retained. - For local usage (i.e. spinning the service up locally) - - the base url is `http://localhost:3000` - - the base path is not required - - e.g. `http://localhost:3000/file-lifetime` + - the base url is `http://localhost:3000` + - the base path is not required + - e.g. `http://localhost:3000/file-lifetime` - For endpoints that require authorization, the `Authorization` header must be supplied, with the value a KBase auth token. @@ -34,14 +33,13 @@ Returns a fixed text response. Used to determine if the service is running? > TODO: we probably don't need this endpoint - ### Success Response `200 OK` #### Example -``` +```text This is just a test. This is only a test. ``` @@ -49,7 +47,7 @@ This is just a test. This is only a test. `GET /test-auth` -Returns a text response indicating that the +Returns a text response indicating that the ### Headers @@ -61,7 +59,7 @@ Returns a text response indicating that the `Content-Type: text/plain` -**Example** +#### Example ```text I'm authenticated as @@ -69,15 +67,7 @@ I'm authenticated as ### Error Responses -The [common responses for an authorized request](#Common-Authorization-Error-Responses) - -##### Content - -`text/plain` - -```text -Must supply token -``` +The [common responses for an authorized request](#common-authorization-error-responses) ## File Lifetime @@ -85,7 +75,8 @@ Must supply token Number of days a file will be held for in staging service before being deleted. -This is not actually handled by the server but is expected to be performed by a cron job which shares the env variable read here. +This is not actually handled by the server but is expected to be performed by a cron job +which shares the env variable read here. ### Success Response @@ -110,6 +101,7 @@ Returns a JSON Array containing entries for each file and folder within the prov Defaults to not show hidden dotfiles. ### Headers + `Authorization: ` ### Success Response @@ -139,7 +131,7 @@ Defaults to not show hidden dotfiles. ### Error Responses -The [common responses for an authorized request](#Common-Authorization-Error-Responses) +The [common responses for an authorized request](#common-authorization-error-responses) #### `404 Not Found` @@ -154,8 +146,6 @@ Authorization. path / does not exist ``` - - ## Download file `GET /download/{path to file}` @@ -178,7 +168,7 @@ The contents of the file are returned as the entire body of the response ### Error Responses -The [common responses for an authorized request](#Common-Authorization-Error-Responses) +The [common responses for an authorized request](#common-authorization-error-responses) #### `400 Bad Request` @@ -200,7 +190,7 @@ The file does not exist `text/plain` -``` +```text path / does not exist ``` @@ -255,13 +245,14 @@ defaults to not show hidden dotfiles ### Error Responses -The [common responses for an authorized request](#Common-Authorization-Error-Responses) +The [common responses for an authorized request](#common-authorization-error-responses) ## File and Folder Metadata `GET /metadata/{path to file or folder}` ### Headers + - `Authorization: ` ### Success Response @@ -302,7 +293,7 @@ If the file or directory is found, a Metadata object is returned ### Error Response -The [common responses for an authorized request](#Common-Authorization-Error-Responses) +The [common responses for an authorized request](#common-authorization-error-responses) #### `404 Not Found` @@ -319,7 +310,7 @@ path / does not exist `POST /upload` This most important endpoint is designed to receive one or more files in a `POST` -request sending a `multipart/form-data` body. +request sending a `multipart/form-data` body. The body format is described in more detail below. @@ -330,7 +321,7 @@ The response is a JSON object containing file metadata. > uploads supports `multipart/form-data`, or it may be because historically the first > front-end implementation was a form itself, as HTML forms will send as > `mutlipart/form-data` if a file control is used to select a file. -> +> > The current front-end implementation does not require this, although it does support > it, so I would suggest that in the future we refactor to be a simple binary body, with > the filename specified in the url search (query param). @@ -340,7 +331,7 @@ The response is a JSON object containing file metadata. - `Authorization: ` - `Content-Type: multipart/form-data` -### Body +### Body The multpart form-data format supports sending of multiple form fields, each with their own metadata as well as body. @@ -350,7 +341,6 @@ The service requires that two fields be present in the specified order: - `destPath` - path file should end up in - `uploads` - the file itself. - Filenames starting with whitespace or a '.' are not allowed The `destPath` field contains the path at which the file should be created. It will @@ -362,7 +352,7 @@ content will be literaly copied into the destination file, with no encoding or v #### Example -``` +```text ------WebKitFormBoundaryA0xgUu1fi1whAcSB Content-Disposition: form-data; name="destPath" @@ -380,14 +370,14 @@ root directory of the user's staging area. The `uploads` field would contain the file's binary content. Note that the field metadata includes the "filename" property indicating that the target file -name is `"foo.csv"`. - +name is `"foo.csv"`. ### Success Response `200 OK` -A successful response will return a JSON object containing a metadata description of the file. +A successful response will return a JSON object containing a metadata description of the +file. #### Content @@ -445,9 +435,7 @@ A successful response will return a JSON object containing a metadata descriptio ### Error Response -The [common responses for an authorized request](#Common-Authorization-Error-Responses) - - +The [common responses for an authorized request](#common-authorization-error-responses) ## Define/Create UPA for file which has been imported @@ -497,7 +485,6 @@ associate with the provided file. - `Content-Type: text/plain` - #### Body ```text @@ -506,7 +493,7 @@ successfully update UPA for file ### Error Responses -The [common responses for an authorized request](#Common-Authorization-Error-Responses) +The [common responses for an authorized request](#common-authorization-error-responses) #### `400 Bad Request` @@ -516,17 +503,14 @@ UPA missing `text/plain` -``` +```text must provide UPA field in body ``` - - ## Delete file or folder (will delete things contained in folder) `DELETE /delete/{path to file or folder}` - ### Headers - `Authorization: ` @@ -547,8 +531,7 @@ successfully deleted UPA ### Error Response -The [common responses for an authorized request](#Common-Authorization-Error-Responses) - +The [common responses for an authorized request](#common-authorization-error-responses) #### `403 Forbidden` @@ -580,9 +563,6 @@ or other file access errors. could not delete ``` - - - ## Move/rename a file or folder `PATCH /mv/{path to file or folder}` @@ -620,24 +600,23 @@ could not delete } ``` - ### Success Response `200 OK` #### Headers + - `Content-Type: text/plain` #### Body -``` +```text successfully moved to ``` ### Error Response -The [common responses for an authorized request](#Common-Authorization-Error-Responses) - +The [common responses for an authorized request](#common-authorization-error-responses) #### `400 Bad Request` @@ -649,7 +628,7 @@ If the newPath field is missing in the content body ##### Body -``` +```text must provide newPath field in body ``` @@ -661,11 +640,11 @@ must provide newPath field in body ##### Body -``` +```text cannot rename home or move directory ``` -``` +```text cannot rename or move protected file ``` @@ -677,13 +656,10 @@ cannot rename or move protected file ##### Body -``` +```text already exists ``` - - - ## Decompress various archive formats `PATCH /decompress/{path to archive}` @@ -691,7 +667,6 @@ cannot rename or move protected file supported archive formats are: .zip, .ZIP, .tar.gz, .tgz, .tar.bz, .tar.bz2, .tar, .gz, .bz2, .bzip2 - ### Headers - `Authorization: ` @@ -700,20 +675,19 @@ supported archive formats are: `200 OK` -##### Headers +#### Headers - `Content-Type: text/plain` -##### Body +#### Body -``` +```text successfully decompressed ``` ### Error Response -The [common responses for an authorized request](#Common-Authorization-Error-Responses) - +The [common responses for an authorized request](#common-authorization-error-responses) #### `400 Bad Request` @@ -723,19 +697,16 @@ The [common responses for an authorized request](#Common-Authorization-Error-Res ##### Body - ```text cannot decompress a file ``` - - ## Add Globus ACL `GET /add-acl` -After authenticating at this endpoint, AUTH is queried to get your filepath and globus id file for -linking to globus. +After authenticating at this endpoint, AUTH is queried to get your filepath and globus +id file for linking to globus. ### Headers @@ -791,11 +762,9 @@ linking to globus. } ``` - ### Error Response -The [common responses for an authorized request](#Common-Authorization-Error-Responses) - +The [common responses for an authorized request](#common-authorization-error-responses) #### `500 Internal Server Error` @@ -805,7 +774,7 @@ If issue with Globus API or ACL Already Exists - `Content-Type: application/json` -##### Body +##### Body ###### Schema @@ -852,15 +821,12 @@ If issue with Globus API or ACL Already Exists } ``` - - - ## Remove Globus ACL `GET /remove-acl` -After authenticating at this endpoint, AUTH is queried to get your filepath and globus id file for -linking to globus. +After authenticating at this endpoint, AUTH is queried to get your filepath and globus +id file for linking to globus. ### Headers @@ -890,12 +856,12 @@ linking to globus. Note that the "message" is that returned by the globus api, and out of scope for documentation here. -> TODO: we should provide our own message, or return the globus response data, but not -> return the globus response data (a json object) into a string and call it a message! +> TODO: we should provide our own message, or return the globus rZesponse data, but not +> return the globus response data (a json object) into a string and call it a message! ### Error Response -The [common responses for an authorized request](#Common-Authorization-Error-Responses) +The [common responses for an authorized request](#common-authorization-error-responses) #### `500 Internal Server Error` @@ -942,8 +908,6 @@ An issue with Globus API or ACL Already Exists ####### Content (example) -**Content** - ```json { "success": false, @@ -954,7 +918,6 @@ An issue with Globus API or ACL Already Exists } ``` - ## Parse bulk specifications `GET /bulk_specification/?files=file1.[,file2.,...]` @@ -964,15 +927,15 @@ where `` is one of `csv`, `tsv`, `xls`, or `xlsx`. This endpoint parses one or more bulk specification files in the staging area into a data structure (close to) ready for insertion into the Narrative bulk import or analysis cell. -It can parse `.tsv`, `.csv`, and Excel (`.xls` and `.xlsx`) files. Templates for the currently -supported data types are available in the [templates](./import_specifications/templates) -directory of this repo. See the [README.md](./import_specifications/templates/README.md) file -for instructions on template usage. +It can parse `.tsv`, `.csv`, and Excel (`.xls` and `.xlsx`) files. Templates for the +currently supported data types are available in the +[templates](./import_specifications/templates) directory of this repo. See the +[README.md](./import_specifications/templates/README.md) file for instructions on +template usage. See the [import specification ADR document](./docs/import_specifications.ADR.md) for design details. - ### Headers - `Authorization: ` @@ -991,7 +954,7 @@ details. ##### Content (example) -``` +```text { "types": { : [ @@ -1013,24 +976,25 @@ details. } ``` -* `` is a data type ID from the [Mappings.py](./staging_service/autodetect/Mappings.py) - file and the Narrative staging area configuration file - it is a shared namespace between the - staging service and Narrative to specify bulk applications, and has a 1:1 mapping to an - app. It is determined by the first header line from the templates. -* `` is the ID of an input parameter from a `KB-SDK` app's `spec.json` file. +- `` is a data type ID from the [Mappings.py](./staging_service/autodetect/Mappings.py) + file and the Narrative staging area configuration file - it is a shared namespace + between the staging service and Narrative to specify bulk applications, and has a + 1:1 mapping to an app. It is determined by the first header line from the templates. +- `` is the ID of an input parameter from a `KB-SDK` app's `spec.json` file. These are determined by the second header line from the templates and will differ by the data type. -* `` is the user-provided value for the input for a given `spec.json` ID - and import or analysis instance, where an import/analysis instance is effectively a row - in the data file. Each data file row is provided in order for each type. Each row is - provided in a mapping of `spec.json` ID to the data for the row. Lines > 3 in the templates are - user-provided data, and each line corresponds to a single import or analysis. +- `` is the user-provided value for the input for a given + `spec.json` + ID and import or analysis instance, where an import/analysis instance is effectively a + row in the data file. Each data file row is provided in order for each type. Each row is + provided in a mapping of `spec.json` ID to the data for the row. Lines > 3 in the + templates are user-provided data, and each line corresponds to a single import or analysis. ### Error Response Error reponses are of the general form: -``` +```json { "errors": [ {"type": , @@ -1043,25 +1007,26 @@ Error reponses are of the general form: Existing error codes are currently: -* `cannot_find_file` if an input file cannot be found -* `cannot_parse_file` if an input file cannot be parsed -* `incorrect_column_count` if the column count is not as expected - * For Excel files, this may mean there is a non-empty cell outside the bounds of the data area -* `multiple_specifications_for_data_type` if more than one tab or file per data type is submitted -* `no_files_provided` if no files were provided -* `unexpected_error` if some other error occurs +- `cannot_find_file` if an input file cannot be found +- `cannot_parse_file` if an input file cannot be parsed +- `incorrect_column_count` if the column count is not as expected + - For Excel files, this may mean there is a non-empty cell outside the bounds of the + data area +- `multiple_specifications_for_data_type` if more than one tab or file per data type is submitted +- `no_files_provided` if no files were provided +- `unexpected_error` if some other error occurs The HTTP code returned will be, in order of precedence: -* 400 if any error other than `cannot_find_file` or `unexpected_error` occurs -* 404 if at least one error is `cannot_find_file` but there are no 400-type errors -* 500 if all errors are `unexpected_error` +- 400 if any error other than `cannot_find_file` or `unexpected_error` occurs +- 404 if at least one error is `cannot_find_file` but there are no 400-type errors +- 500 if all errors are `unexpected_error` The per error type data structures are: #### `cannot_find_file` -``` +```json { "type": "cannot_find_file", "file": @@ -1070,7 +1035,7 @@ The per error type data structures are: #### `cannot_parse_file` -``` +```json { "type": "cannot_parse_file", "file": , @@ -1081,7 +1046,7 @@ The per error type data structures are: #### `incorrect_column_count` -``` +```json { "type": "incorrect_column_count", "file": , @@ -1092,7 +1057,7 @@ The per error type data structures are: #### `multiple_specifications_for_data_type` -``` +```json { "type": "multiple_specifications_for_data_type", "file_1": , @@ -1105,7 +1070,7 @@ The per error type data structures are: #### `no_files_provided` -``` +```json { "type": "no_files_provided" } @@ -1113,7 +1078,7 @@ The per error type data structures are: #### `unexpected_error` -``` +```json { "type": "unexpected_error", "file": @@ -1121,9 +1086,6 @@ The per error type data structures are: } ``` - - - ## Write bulk specifications `POST /write_bulk_specification` @@ -1133,9 +1095,8 @@ data structure to that which the parse endpoint returns and writes bulk specific ### Headers -- `Authorization: ` -- `Content-Type: application/json` - +- `Authorization: ` +- `Content-Type: application/json` #### Body @@ -1145,7 +1106,7 @@ data structure to that which the parse endpoint returns and writes bulk specific ##### Content (example) -``` +```json { "output_directory": , "output_file_type": , @@ -1177,27 +1138,27 @@ data structure to that which the parse endpoint returns and writes bulk specific } ``` -* `output_directory` specifies where the output files should be written in the user's staging area. -* `output_file_type` specifies the format of the output files. -* `` is a data type ID from the [Mappings.py](./staging_service/autodetect/Mappings.py) - file and the Narrative staging area configuration file - it is a shared namespace between the - staging service and Narrative to specify bulk applications, and has a 1:1 mapping to an - app. It is included in the first header line in the templates. -* `order_and_display` determines the ordering of the columns in the written templates, as well - as mapping the spec.json ID of the parameter to the human readable name of the parameter in - the display.yml file. -* `` is the ID of an input parameter from a `KB-SDK` app's `spec.json` file. +- `output_directory` specifies where the output files should be written in the user's + staging area. +- `output_file_type` specifies the format of the output files. +- `` is a data type ID from the [Mappings.py](./staging_service/autodetect/Mappings.py) + file and the Narrative staging area configuration file - it is a shared namespace + between the staging service and Narrative to specify bulk applications, and has a + 1:1 mapping to an app. It is included in the first header line in the templates. +- `order_and_display` determines the ordering of the columns in the written templates, + as well as mapping the spec.json ID of the parameter to the human readable name of the + parameter in the display.yml file. +- `` is the ID of an input parameter from a `KB-SDK` app's `spec.json` file. These are written to the second header line from the import templates and will differ by the data type. -* `data` contains any data to be written to the file as example data, and is analagous to the data - structure returned from the parse endpoint. To specify that no data should be written to the - template provide an empty list. -* `` is the value for the input for a given `spec.json` ID +- `data` contains any data to be written to the file as example data, and is analagous + to the data structure returned from the parse endpoint. To specify that no data + should be written to the template provide an empty list. +- `` is the value for the input for a given `spec.json` ID and import or analysis instance, where an import/analysis instance is effectively a row in the data file. Each data file row is provided in order for each type. Each row is - provided in a mapping of `spec.json` ID to the data for the row. Lines > 3 in the templates are - user-provided data, and each line corresponds to a single import or analysis. - + provided in a mapping of `spec.json` ID to the data for the row. Lines > 3 in the + templates are user-provided data, and each line corresponds to a single import or analysis. ### Success Response @@ -1205,7 +1166,7 @@ data structure to that which the parse endpoint returns and writes bulk specific #### Headers -- `Content-Type: application/json` +- `Content-Type: application/json` #### Body @@ -1226,10 +1187,10 @@ data structure to that which the parse endpoint returns and writes bulk specific } ``` -* `output_file_type` has the same definition as above. -* `files` contains a mapping of each provided data type to the output template file for that type. - In the case of Excel, all the file paths will be the same since the data types are all written - to different tabs in the same file. +- `output_file_type` has the same definition as above. +- `files` contains a mapping of each provided data type to the output template file for + that type. In the case of Excel, all the file paths will be the same since the data + types are all written to different tabs in the same file. ### Error Response @@ -1241,10 +1202,8 @@ Method specific errors have the form: The error code in this case will be a 4XX error. -The AioHTTP server may also return built in errors that are not in JSON format - an example of -this is overly large (> 1MB) request bodies. - - +The AioHTTP server may also return built in errors that are not in JSON format - an +example of this is overly large (> 1MB) request bodies. ## Get Importer Mappings @@ -1253,21 +1212,20 @@ this is overly large (> 1MB) request bodies. This endpoint returns: 1) a mapping between a list of files and predicted importer apps, and -2) a file information list that includes the input file names split between the file prefix and - the file suffix, if any, that was used to determine the file -> importer mapping, and a list - of file types based on the file suffix. If a file has a suffix that does not match - any mapping (e.g. `.sys`), the suffix will be `null`, the prefix the entire file name, and - the file type list empty. +2) a file information list that includes the input file names split between the file + prefix and the file suffix, if any, that was used to determine the file -> importer + mapping, and a list of file types based on the file suffix. If a file has a suffix + that does not match any mapping (e.g. `.sys`), the suffix will be `null`, the prefix + the entire file name, and the file type list empty. For example, -* if we pass in nothing we get a response with no mappings -* if we pass in a list of files, such as ["file1.fasta", "file2.fq", "None"], we would get back a - response that maps to Fasta Importers and FastQ Importers, with a weight of 0 to 1 - which represents the probability that this is the correct importer for you. -* for files for which there is no predicted app, the return is a null value -* this endpoint is used to power the dropdowns for the staging service window in the Narrative - +- if we pass in nothing we get a response with no mappings +- if we pass in a list of files, such as ["file1.fasta", "file2.fq", "None"], we would + get back a response that maps to Fasta Importers and FastQ Importers, with a weight + of 0 to 1 which represents the probability that this is the correct importer for you. +- for files for which there is no predicted app, the return is a null value +- this endpoint is used to power the dropdowns for the staging service window in the Narrative ### Headers @@ -1297,22 +1255,6 @@ data = {"file_list": ["file1.txt", "file2.zip", "file3.gff3.gz"]} ) ``` -### Success Response - -`200 OK` - -#### Headers - -- `Content-Type: application/json` - -#### Body - -##### Schema - -> TODO - -##### Content (example) - ```json { "mappings": [ @@ -1345,27 +1287,26 @@ data = {"file_list": ["file1.txt", "file2.zip", "file3.gff3.gz"]} > TODO: hopefully the typical 401/400 auth errors are returned -`400 Bad Request` +#### `400 Bad Request` -#### Headers +##### Headers - `Content-Type: text/plain` -#### Body +##### Body -``` +```text must provide file_list field ``` - - ## Get importer filetypes `GET /importer_filetypes` -This endpoint returns information about the file types associated with data types and the file -extensions for those file types. It is primarily of use for creating UI elements describing -which file extensions may be selected when performing bulk file selections. +This endpoint returns information about the file types associated with data types and +the file extensions for those file types. It is primarily of use for creating UI +elements describing which file extensions may be selected when performing bulk file +selections. ### Headers @@ -1385,7 +1326,7 @@ none ##### Content (example) -``` +```json { "datatype_to_filetype": { : [, ... ], @@ -1400,13 +1341,13 @@ none } ``` -* `` is a data type ID from the [Mappings.py](./staging_service/autodetect/Mappings.py) - file and the Narrative staging area configuration file - it is a shared namespace between the - staging service and Narrative to specify bulk applications, and has a 1:1 mapping to an - import app. It is included in the first header line in the templates. -* `` is a file type like `FASTA` or `GENBANK`. The supported file types are listed - below. -* `` is a file extension like `*.fa` or `*.gbk`. +- `` is a data type ID from the [Mappings.py](./staging_service/autodetect/Mappings.py) + file and the Narrative staging area configuration file - it is a shared namespace + between the staging service and Narrative to specify bulk applications, and has a + 1:1 mapping to an import app. It is included in the first header line in the templates. +- `` is a file type like `FASTA` or `GENBANK`. The supported file types are + listed below. +- `` is a file extension like `*.fa` or `*.gbk`. ## Autodetect App and File Type IDs @@ -1414,7 +1355,7 @@ none These are the currently supported upload app type IDs: -``` +```text fastq_reads_interleaved fastq_reads_noninterleaved sra_reads @@ -1438,10 +1379,10 @@ extension. ### File type IDs -These are the currently supported file type IDs. These are primarily useful for apps that take -two different file types, like GFF/FASTA genomes. +These are the currently supported file type IDs. These are primarily useful for apps +that take two different file types, like GFF/FASTA genomes. -``` +```text FASTA FASTQ SRA diff --git a/docs/development/development-tools.md b/docs/development/development-tools.md index fe28df1c..9e3ac2f6 100644 --- a/docs/development/development-tools.md +++ b/docs/development/development-tools.md @@ -21,16 +21,16 @@ the least surprise! There is also support for locally hosted development via VSC, but this developer (EAP) does not do this style of Python development. - ### Getting Started 1. Ensure you have docker running. 2. Open the project in VSC -There are a few ways to open a project in VSC: -- from the repo directory `code .` -- from VSC menu: + There are a few ways to open a project in VSC: + + - from the repo directory `code .` + - from VSC menu: - File > New Window - From the new window: - Click on Explorer in the left-nav, then click on Open Folder @@ -41,14 +41,15 @@ There are a few ways to open a project in VSC: 3. Start the devcontainer. -- press the keys `Shift` - `Command` - `P` (macOS) to open the command palette -- start typing "Dev Container: Reopen Folder in Container" -- when you see it appear, click on it -- the image will build, the container will start + - press the keys `Shift` - `Command` - `P` (macOS) to open the command palette + - start typing "Dev Container: Reopen Folder in Container" + - when you see it appear, click on it + - the image will build, the container will start 4. Open a terminal -If a terminal is not already open, open the built-in terminal with `Control` - `~` (that is a tilde) + If a terminal is not already open, open the built-in terminal with `Control` - `~` + (that is a tilde) 5. Depending on the docker installation, you may need to grant access. If you don't see git things enabled, click on the Git tool, and follow the instructions. @@ -70,7 +71,7 @@ Here are the common ones: - `mypy staging_service` - `black staging_service` - `isort staging_service` -- `./run_tests.sh ` +- `./run_tests.sh` ### Running the server @@ -98,7 +99,6 @@ located in `staging_service/__main__.py`. As a quick sanity test once the service is running, try this from a host terminal: - ```shell curl http://localhost:3000/test-service ``` @@ -116,4 +116,4 @@ To run the tools: - `./development/scripts/run mypy staging_service` - `./development/scripts/run black staging_service` - `./development/scripts/run isort staging_service` -- `./development/scripts/run ./run_tests.sh ` +- `./development/scripts/run ./run_tests.sh` diff --git a/docs/development/maintaining-dockerfile.md b/docs/development/maintaining-dockerfile.md index 074b123c..10cb13e4 100644 --- a/docs/development/maintaining-dockerfile.md +++ b/docs/development/maintaining-dockerfile.md @@ -21,7 +21,7 @@ concerned with. ## OS dependencies The OS dependencies are indicated in the Dockerfile as exact versions. This ensures that -images are consistent, even as the base image evolves over time. +images are consistent, even as the base image evolves over time. ## Different Dockerfiles @@ -29,20 +29,18 @@ At present, there are three Dockerfiles which must be kept in synchrony. They di enough that the inconvenience of keeping them consistent seems worth the effort. - `./Dockerfile` - - used for the production image - - needs to copy all service files - - does not include development Python dependencies - - has the production entrypoint -- `./.devcontainer/Dockerfile` - - used for the devcontainer workflow - - does not copy service files - - contains development Python dependencies - - no entrypoint as that is provided by the docker-compose.yml + - used for the production image + - needs to copy all service files + - does not include development Python dependencies + - has the production entrypoint +- `./.devcontainer/Dockerfile` + - used for the devcontainer workflow + - does not copy service files + - contains development Python dependencies + - no entrypoint as that is provided by the docker-compose.yml - `./development/tools/Dockerfile` - - used for the host cli tools - - does not copy service files - - contains development Python dependencies - - has special entrypoint which execs whatever command is passed from the command + - used for the host cli tools + - does not copy service files + - contains development Python dependencies + - has special entrypoint which execs whatever command is passed from the command line - - diff --git a/docs/development/original-doc.md b/docs/development/original-doc.md index 0797581c..0af81be1 100644 --- a/docs/development/original-doc.md +++ b/docs/development/original-doc.md @@ -1,10 +1,12 @@ # staging_service -In order to setup local development, you must have docker installed and if you want to run it locally you must have python 3.11.5 or greater installed +In order to setup local development, you must have docker installed and if you want to +run it locally you must have python 3.11.5 or greater installed ## setup -make a folder called /data as well as inside that /bulk and inside that a folder for any usernames you wish it to work with +make a folder called /data as well as inside that /bulk and inside that a folder for any +usernames you wish it to work with data -bulk @@ -19,7 +21,8 @@ to run locally run `/deployment/bin/entrypoint.sh` to run inside docker run `/run_in_docker.sh` -to run in coordination with the kbase-ui development proxy, enabling it to serve locally as a back end for Narrative, kbase-ui and other services: +to run in coordination with the kbase-ui development proxy, enabling it to serve locally +as a back end for Narrative, kbase-ui and other services: ```shell make run-dev @@ -28,19 +31,23 @@ make run-dev ## tests ### Run on host -* to test use `./run_tests.sh` -* requires python 3.11.5 or higher -* requires installation on mac of libmagic `brew install libmagic` or `sudo port install libmagic` + +- to test use `./run_tests.sh` +- requires python 3.11.5 or higher +- requires installation on mac of libmagic `brew install libmagic` or `sudo port install + libmagic` ### Run in container -You can also run tests in a container which uses the same base image and uses the same dependencies. (This container can also run other python tasks.) +You can also run tests in a container which uses the same base image and uses the same +dependencies. (This container can also run other python tasks.) ```shell ./development/scripts/run run_tests.sh ``` -To run tests in individual test file you may supply the path to it. By default, the tests run against `tests/`. +To run tests in individual test file you may supply the path to it. By default, the +tests run against `tests/`. ```shell ./development/scripts/run run_tests.sh tests/test_app.py @@ -48,17 +55,21 @@ To run tests in individual test file you may supply the path to it. By default, ## debugging -Included configurations for the Visual Studio Code debugger for python that mirror what is in the entrypoint.sh and testing configuration to run locally in the debugger, set breakpoints and if you open the project in VSCode the debugger should be good to go. The provided configurations can run locally and run tests locally +Included configurations for the Visual Studio Code debugger for python that mirror what +is in the entrypoint.sh and testing configuration to run locally in the debugger, set +breakpoints and if you open the project in VSCode the debugger should be good to go. The +provided configurations can run locally and run tests locally ## development When releasing a new version: -* Update the release notes -* Update the version in [staging_service/app.py](staging_service/app.py).VERSION +- Update the release notes +- Update the version in [staging_service/app.py](staging_service/app.py).VERSION ## expected command line utilities -to run locally you will need all of these utils on your system: tar, unzip, zip, gzip, bzip2, md5sum, head, tail, wc +to run locally you will need all of these utils on your system: tar, unzip, zip, gzip, +bzip2, md5sum, head, tail, wc in the docker container all of these should be available diff --git a/docs/import_specifications.ADR.md b/docs/import_specifications.ADR.md index dbd04386..31ba0c47 100644 --- a/docs/import_specifications.ADR.md +++ b/docs/import_specifications.ADR.md @@ -1,8 +1,9 @@ # Import Specifications Architecture Design Record -This document specifies the design for handling import specifications in the Staging Service (StS). -An import specification is an Excel, CSV, or TSV file that contains instructions for how -to import one or more files in the staging area to KBase as KBase data types. +This document specifies the design for handling import specifications in the Staging +Service (StS). An import specification is an Excel, CSV, or TSV file that contains +instructions for how to import one or more files in the staging area to KBase as KBase +data types. ## Resources @@ -13,19 +14,19 @@ to import one or more files in the staging area to KBase as KBase data types. ## Front end changes -The design introduces a new StS data type, `import_specification`. The FE's current -behavior is to display any data types returned from the StS in the file dropdown, but silently -ignore user-selected files for which the selected data type is unknown to the narrative, a bug. -The FE will be updated to ignore unknown data types returned from the StS, allowing for phased, -non-lockstep upgrades. This work is not included in this project, but will be in a future FE -project. +The design introduces a new StS data type, `import_specification`. The FE's current +behavior is to display any data types returned from the StS in the file dropdown, but +silently ignore user-selected files for which the selected data type is unknown to the +narrative, a bug. The FE will be updated to ignore unknown data types returned from the +StS, allowing for phased, non-lockstep upgrades. This work is not included in this +project, but will be in a future FE project. ## Import specification input files -Input file formats may be Excel, CSV, or TSV. An example CSV file structure for GFF/FASTA imports -is below: +Input file formats may be Excel, CSV, or TSV. An example CSV file structure for +GFF/FASTA imports is below: -``` +```csv Data type: gff_metagenome; Columns: 7; Version: 1 fasta_file, gff_file, genome_name, source, release, genetic_code, generate_missing_genes FASTA File Path, GFF3 File Path, Metagenome Object Name, Source of metagenome, Release or Version of the Source Data, Genetic Code for protein translation, Spoof Genes for parentless CDS @@ -35,11 +36,13 @@ mygenome2.fa, mygenome2.gff3, mygenomeobject2, yermumspoo, 30456, 11, 1 ``` The file, by row, is: -1. The data type, in this case `gff_metagenome`, the column count, and the version, in this case 1. + +1. The data type, in this case `gff_metagenome`, the column count, and the version, in + this case 1. * The data type is from the list in the [Mappings.py](https://github.com/kbase/staging_service/blob/master/staging_service/autodetect/Mappings.py) - file in the StS. The Narrative is expected to understand these types and map them to - importer apps. + file in the StS. The Narrative is expected to understand these types and map them + to importer apps. * The column count allows for error checking row sizes. This particularly important for the Excel parser, which uses `pandas` under the hood. `pandas` will silently pad data and headers out to match the longest row in the sheet, so the column count is required @@ -47,49 +50,53 @@ The file, by row, is: * The version allows us to update the file format and increment the version, allowing backwards compatibility - the staging service can process the file appropriately depending on the version number. -2. The IDs of the app inputs from the `spec.json` file. +2. The IDs of the app inputs from the `spec.json` file. 3. The corresponding human readable names of the app inputs from the `display.yaml` file. 4. (and beyond) Import specifications. Each line corresponds to a single import. -For Excel files, the first two rows may be hidden in any provided templates. Additionally, -Excel files may contain multiple data types, one per tab. Empty tabs will be ignored, and tabs -that don't match the expected structure will be treated as an error. +For Excel files, the first two rows may be hidden in any provided templates. +Additionally, Excel files may contain multiple data types, one per tab. Empty tabs will +be ignored, and tabs that don't match the expected structure will be treated as an +error. -Parsing will be on a "best effort" basis given the underlying `pandas` technology. We have -decided not to include type information per column in the spec, which means that the type -will be determined by `pandas`. Pandas specifies a single type per column, which means that -if a single `string` value is accidentally inserted into a column that is otherwise -full of `float`s, `pandas` will coerce all the values in that column to `string`s. Under normal -operations this should not be an issue, but could cause unexpected type returns if a user adds -an incorrect value to a column +Parsing will be on a "best effort" basis given the underlying `pandas` technology. We +have decided not to include type information per column in the spec, which means that +the type will be determined by `pandas`. Pandas specifies a single type per column, +which means that if a single `string` value is accidentally inserted into a column that +is otherwise full of `float`s, `pandas` will coerce all the values in that column to +`string`s. Under normal operations this should not be an issue, but could cause +unexpected type returns if a user adds an incorrect value to a column As part of this project we will deliver: + 1. CSV templates for each in scope app (e.g. the first 3 lines of the example file) 2. An Excel template containing a tab for each in scope app 3. A `README.md` file explaining how to use the templates. - * The `README.md` should include a link to rich import specification documentation on the KBase - website once it is developed. - * Note: cover booleans / checkboxes which are not intuitive. 0 will indicate unchecked, and 1 - checked. These values may show up as strings in the API, and the consumer will be expected to - handle conversion appropriately. + * The `README.md` should include a link to rich import specification documentation on + the KBase website once it is developed. + * Note: cover booleans / checkboxes which are not intuitive. 0 will indicate + unchecked, and 1 checked. These values may show up as strings in the API, and the + consumer will be expected to handle conversion appropriately. These files will reside in the StS repo. As part of the front end effort, some means of delivering the templates and instructions to users will be developed. -Currently, for all in scope apps, the inputs are all strings, numbers, or booleans. There are -no unusual inputs such as grouped parameters or dynamic lookups. Including future import apps -with these features in CSV-based import may require additional engineering. +Currently, for all in scope apps, the inputs are all strings, numbers, or booleans. +There are no unusual inputs such as grouped parameters or dynamic lookups. Including +future import apps with these features in CSV-based import may require additional +engineering. + +Note that the endpoint will return individual data for each row, while the current front +end only supports individual input files and output objects (this may be improved in a +future update). The front end will be expected to either -Note that the endpoint will return individual data for each row, while the current front end -only supports individual input files and output objects (this may be improved in a future update). -The front end will be expected to either 1. Ignore all parameters other than in the first entry in the return per type, or 2. Throw an error if parameters differ from the first entry. ## User operations -* The user uploads the import specification files to the staging area along with all the files - inluded in the specification. +* The user uploads the import specification files to the staging area along with all the + files inluded in the specification. * The user selects the `Import Specification` type for the specification files. * The user may also select other files in the staging area to include in the import along with the files listed in the specification. @@ -108,12 +115,13 @@ cell with those specifications. ## Staging service import specification endpoint -The StS endpoint responds to an HTTP GET with a file list in a `files` URL parameter. It is -extremely unlikely that there will be a large enough set of data types that URL length limits -will be reached so this seems adequate. The StS will respond with the contents of the files -parsed into a structure that is similar to that of the input spec, mapped by data type: +The StS endpoint responds to an HTTP GET with a file list in a `files` URL parameter. It +is extremely unlikely that there will be a large enough set of data types that URL +length limits will be reached so this seems adequate. The StS will respond with the +contents of the files parsed into a structure that is similar to that of the input spec, +mapped by data type: -``` +```text Headers: Authorization: @@ -149,10 +157,10 @@ Response: The order of the input structures MUST be the same as the order in the input files. -Notably, the service will provide the contents of the files as is and will not perform most error -checking, including for missing or unknown input parameters. Most error checking will be performed -in the bulk import cell configuration tab like other imports, allowing for a consistent user -experience. +Notably, the service will provide the contents of the files as is and will not perform +most error checking, including for missing or unknown input parameters. Most error +checking will be performed in the bulk import cell configuration tab like other imports, +allowing for a consistent user experience. ### Error handling @@ -162,7 +170,8 @@ each error type. Currently the error types are: * `cannot_find_file` if an input file cannot be found * `cannot_parse_file` if an input file cannot be parsed * `incorrect_column_count` if the column count is not as expected - * For Excel files, this may mean there is a non-empty cell outside the bounds of the data area + * For Excel files, this may mean there is a non-empty cell outside the bounds of the + data area * `multiple_specifications_for_data_type` if more than one tab or file per data type is submitted * `no_files_provided` if no files were provided * `unexpected_error` if some other error occurs @@ -175,7 +184,7 @@ The HTTP code returned will be, in order of precedence: The general structure of the error response is: -``` +```json {"errors": [ {"type": , ... other fields depending on the error code ... @@ -188,7 +197,7 @@ The individual error structures per error type are as follows: #### `cannot_find_file` -``` +```json {"type": "cannot_find_file", "file": } @@ -196,7 +205,7 @@ The individual error structures per error type are as follows: #### `cannot_parse_file` -``` +```json {"type": "cannot_parse_file", "file": , "tab": , @@ -204,15 +213,15 @@ The individual error structures per error type are as follows: } ``` -The service will check that the data type is valid and that rows >=2 all have the same number of -entries, but will not do further error checking. +The service will check that the data type is valid and that rows >=2 all have the same +number of entries, but will not do further error checking. In this case the service should log the stack trace along with the filename for each invalid file if the trace would assist in debugging the error. #### `incorrect_column_count` -``` +```json {"type": "incorrect_column_count", "file": , "tab": , @@ -222,7 +231,7 @@ each invalid file if the trace would assist in debugging the error. #### `multiple_specifications_for_data_type` -``` +```json {"type": "multiple_specifications_for_data_type", "file_1": , "tab_1": , @@ -234,13 +243,13 @@ each invalid file if the trace would assist in debugging the error. #### `no_files_provided` -``` +```json {"type": "no_files_provided"} ``` #### `unexpected_error` -``` +```json {"type": "unexpected_error", "file": "message": @@ -251,10 +260,10 @@ Note in this case the service MUST log the stack trace along with the filename f ## Alternatives explored -* We considered parsing the files in the Narrative backend (pulling the files as is from the StS) - but the current design allows for other services and UIs reusing the parsing code. There doesn't - otherwise seem to be a strong reason to include the code one place or the other so we decided - to include it in the StS. +* We considered parsing the files in the Narrative backend (pulling the files as is from + the StS) but the current design allows for other services and UIs reusing the parsing + code. There doesn't otherwise seem to be a strong reason to include the code one place + or the other so we decided to include it in the StS. ## Questions @@ -268,8 +277,8 @@ Note in this case the service MUST log the stack trace along with the filename f * Should we strictly enforce a column count for every row in xSV files? * Not enforcing a count makes it somewhat easier for users to fill in the data - they don't need to add extraneous commas or tabs to the end of the line. - * Enforcing a count makes it less likely that a user will commit silent counting errors if - there are many empty entries between items in a line. + * Enforcing a count makes it less likely that a user will commit silent counting + errors if there are many empty entries between items in a line. * A: Enforce a column count to prevent user errors. ## Addendum: dynamic parameter lookup @@ -277,16 +286,16 @@ Note in this case the service MUST log the stack trace along with the filename f Dynamic scientific name to taxon lookup may be added to the Genbank (and the currently out of scope, but trivial to add GFF/FASTA Genome) importer in the near future. If that occurs, for the purpose of xSV import the user will be expected to provide the entire, correct, -scientific name as returned from the taxon API. +scientific name as returned from the taxon API. -* The user could get this name by starting a genome import and running the query from the +* The user could get this name by starting a genome import and running the query from the import app cell configuration screen. * This will be documented in the README.md for the template files. * As part of the UI work we could theoretically provide a landing page for looking up valid scientific names. -* Presumably the UI would need to run the dynamic query and report an error to the user if the - dynamic service returns 0 or > 1 entries. -* Providing the scientific name vs. the taxon ID seems simpler because the machinery already +* Presumably the UI would need to run the dynamic query and report an error to the user + if the dynamic service returns 0 or > 1 entries. +* Providing the scientific name vs. the taxon ID seems simpler because the machinery already exists to perform the query and is part of the spec. * Expect these plans to change as it becomes more clear how dynamic fields will work in the - context of bulk import. \ No newline at end of file + context of bulk import. diff --git a/scripts/run-dev-server.sh b/scripts/run-dev-server.sh new file mode 100755 index 00000000..c46b413c --- /dev/null +++ b/scripts/run-dev-server.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +FILE_LIFETIME="90" KB_DEPLOYMENT_CONFIG="${PWD}/deployment/conf/local.cfg" PYTHONPATH="${PWD}/staging_service" python -m staging_service diff --git a/run_in_docker.sh b/scripts/run_in_docker.sh similarity index 100% rename from run_in_docker.sh rename to scripts/run_in_docker.sh diff --git a/run_tests.sh b/scripts/run_tests.sh similarity index 100% rename from run_tests.sh rename to scripts/run_tests.sh From ccb76607ae11a1f18942d45be92e33d099500dc5 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 16:20:52 +0000 Subject: [PATCH 22/40] remove old, unused travis config [PTV-1888] --- .travis.yml | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index b891d07f..00000000 --- a/.travis.yml +++ /dev/null @@ -1,16 +0,0 @@ -sudo: required -services: - - docker -language: python -python: "3.6" -install: - - pip install -r requirements.txt -script: - - ./run_tests.sh - - make -after_success: - - IMAGE_NAME=kbase/staging_service ./build/push2dockerhub.sh -env: - global: - - secure: "mH9OrGjZYV021EfQPjx0hnwi7/UlGVNTnE6kC81O1/9/lOWihnDKBb/GXuWyj7hbiCZgA2ofDblkm+bIZQxU/+40k8ZrQyk4xe9Bm3jM9MCZ7y6r3g3HZnKXGbzdQTACLoJwdHRWEnYWcBC+aEnDsJjm/uh+gFE3WFXABHO5nOBVBJ1vnZoqczPnLIqOtTWwybV58eYxXOm7J6GvxrTWxdqoEOKsLcjdbm9dej/i9xsxHr6x0I/fiB8mFsIgl1QS9WobpTjs1AQghShz8oFO4YI70mJF4b9tU0FbfF1Wm6K4fe69ff65RwUhT9o8ZgwBt/mzrDFTLMI9f/3J8v6Bu5UxQ1yEPc9QP78IWW/g2G20WxjLzmbKogsGZkSyPEz1TLq8CgEGvU5IOoDSW1NjQJr9XQaE8A1ILDEfi1xVG/d6o8uhEMEiloveRa8iKdNFoGv5O0fbS8RC6KVs9SPP0nkApmv29MhWf0injEROZNXiDJr6Da4HiU+RkDwdM8rbIOS0586t/czVIRremmon5Xb9NDZapLbqdIX/zd//IC6WGM0ytX36t8FgU0Du+V+KfaAc7XP3ff+GLUL/sxMGGjdl+gCCdUCrTPCl2+D9P54/HRB5Q5xKNnO/OVou9WOe1HanthH5n2QGelzLisU2OxCiWl/iZ15pl+CGntF9+Ek=" - - secure: "sHLRR/xLA6fiPkJB6IolHUaJxNJJYcfSKG3MmRAU7v0LxxoqUkneBfk9jyk2GdDdtOR+RocOQmwie0Vf6ik5bIxcs/Bse1tR0+yhmg9EpgClokfgZN8815yNVclGkVFWZS7BhRjm7mt0K3Vo7Of0mKikp5L/tYmwQy1hesJ+mTa83igeQKpFU3QgkRJB2ALgSI54ruhO88x+9HtLqkhN8KqHj5ykGzS7ShagFywflFDVFWJoBK76NideU0gTU7+2dNV0+EPmnRWs9QoAh5LSUDa8v7fo+I2WHFCOLfiKzaNMc85+Gc72qyxvkpWl2UCJpiFVK1X4DFGvHYU9FzM0W9VuhrBAa3h1XFncYk52WzKNGVvQnOZE965iNrl/xSh2oTUOfyVIfX5JuldEWffU/vhAjvnqfsoq17geRxLyqbRx/4ZvNq6jf3Ef6kLwKlnPzcw85zZs2jsy3qC5UGJf0xqchWfsqxACvmGhVyv+KxHJxaFq48UHp6Ezb/X2DweD6/6WPfwXI6cOwXy6Uj3uvkYSsKE7HJu3K9aMeh158ZMrW/JeyH5dCRAv1qeAyLN0yOw/lPUapdrbB9nXkLbHIOhV8b6A2cOtkkuM6HXXBxICJ5BIHmVzO+4W7VXueXfVjAVY2+xs0ouFsPIOINfR5hgsglncDY+r/wU8LrUHxHg=" \ No newline at end of file From 09b36e40a7dee56244f79273e5b90c8a6818b203 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 09:36:08 -0700 Subject: [PATCH 23/40] top level scripts have moved to the scripts directory [PTV-1888] --- .github/workflows/run_tests.yml | 2 +- docs/development/development-tools.md | 4 ++-- docs/development/original-doc.md | 6 +++--- scripts/run_tests.sh | 11 ++++++++++- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 4a228886..d5063df2 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -9,4 +9,4 @@ jobs: uses: actions/checkout@v3 - name: Run the tests - run: ./development/scripts/run run_tests.sh \ No newline at end of file + run: ./development/scripts/run scripts/run_tests.sh \ No newline at end of file diff --git a/docs/development/development-tools.md b/docs/development/development-tools.md index 9e3ac2f6..a0cf74b5 100644 --- a/docs/development/development-tools.md +++ b/docs/development/development-tools.md @@ -71,7 +71,7 @@ Here are the common ones: - `mypy staging_service` - `black staging_service` - `isort staging_service` -- `./run_tests.sh` +- `./scripts/run_tests.sh` ### Running the server @@ -116,4 +116,4 @@ To run the tools: - `./development/scripts/run mypy staging_service` - `./development/scripts/run black staging_service` - `./development/scripts/run isort staging_service` -- `./development/scripts/run ./run_tests.sh` +- `./development/scripts/run ./scripts/run_tests.sh` diff --git a/docs/development/original-doc.md b/docs/development/original-doc.md index 0af81be1..eb5f36bf 100644 --- a/docs/development/original-doc.md +++ b/docs/development/original-doc.md @@ -32,7 +32,7 @@ make run-dev ### Run on host -- to test use `./run_tests.sh` +- to test use `./scripts/run_tests.sh` - requires python 3.11.5 or higher - requires installation on mac of libmagic `brew install libmagic` or `sudo port install libmagic` @@ -43,14 +43,14 @@ You can also run tests in a container which uses the same base image and uses th dependencies. (This container can also run other python tasks.) ```shell -./development/scripts/run run_tests.sh +./development/scripts/run scripts/run_tests.sh ``` To run tests in individual test file you may supply the path to it. By default, the tests run against `tests/`. ```shell -./development/scripts/run run_tests.sh tests/test_app.py +./development/scripts/run scripts/run_tests.sh tests/test_app.py ``` ## debugging diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index 47b8b6e4..db638ef3 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -1,8 +1,17 @@ #!/usr/bin/env bash -DIR="$( cd "$( dirname "$0" )" && pwd )" + +# DIR is the root of the project. +DIR="$( cd "$( dirname "$0" )" && pwd )/.." + export KB_DEPLOYMENT_CONFIG="${DIR}/deployment/conf/testing.cfg" export FILE_LIFETIME="90" + +# Tests are located in the `tests` directory. Specific tests or groups of +# tests may be run by proving an argument to the script which is an acceptable +# test path spec for pytest. E.g. `./scripts/run_tests.sh tests/test_app.py` +# will run just the tests in `tests/test_app.py`. export TESTS="${1:-tests}" + echo echo "****************************" echo "**" From 766872d87599d74a20810cc589bcbaf0a874d916 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 09:38:31 -0700 Subject: [PATCH 24/40] fix typos [PTV-1888] --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e2a7eeb2..8313446e 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ documentation](https://docs.kbase.us/getting-started/narrative/add-data). ## Installation -The service is designed to be packagd into a Docker image and run as a container. +The service is designed to be packaged into a Docker image and run as a container. For deployment, the image is built by GitHub actions and stored in GitHub Container Registry. @@ -38,7 +38,7 @@ Development has [its own documentation](./docs/development/inde.md). Production deployment is out of scope (though it can be larged deduced from local deployment). -Local deployement is as easy as running +Local deployment is as easy as running ```shell ./scripts/run-dev-server.sh From 4c38f0dfc02639283de8906f20d37bfa4fed1a5b Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 09:40:32 -0700 Subject: [PATCH 25/40] fix doc [PTV-1888] --- docs/development/original-doc.md | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/docs/development/original-doc.md b/docs/development/original-doc.md index eb5f36bf..51206820 100644 --- a/docs/development/original-doc.md +++ b/docs/development/original-doc.md @@ -19,14 +19,7 @@ if you want to run locally you must install requirements.txt for python3 to run locally run `/deployment/bin/entrypoint.sh` -to run inside docker run `/run_in_docker.sh` - -to run in coordination with the kbase-ui development proxy, enabling it to serve locally -as a back end for Narrative, kbase-ui and other services: - -```shell -make run-dev -``` +to run inside docker run `./scripts/run_in_docker.sh` ## tests From d710581077bf261e9ff186772a90a57916cc9fec Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 11:57:58 -0700 Subject: [PATCH 26/40] adjust mtime test precision. --- tests/test_app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index e771ab7d..32a0f0c6 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -31,7 +31,7 @@ decoder = JSONDecoder() -MTIME_PRECISION = 3 +MTIME_TEST_PRECISION = 1 config = configparser.ConfigParser() config.read(os.environ["KB_DEPLOYMENT_CONFIG"]) @@ -605,7 +605,7 @@ async def test_list(): # The "mtime" in the structure is in ms. diff = json[0]["mtime"]/1000 - time.time() - assert abs(diff) < MTIME_PRECISION + assert abs(diff) < MTIME_TEST_PRECISION assert len(file_folder_count) == 4 # 2 folders and 2 files assert sum(file_folder_count) == 2 @@ -621,7 +621,7 @@ async def test_list(): assert json[0]["path"] == "testuser/test" diff = json[0]["mtime"]/1000 - time.time() - assert abs(diff) < MTIME_PRECISION + assert abs(diff) < MTIME_TEST_PRECISION assert len(file_folder_count) == 4 # 2 folders and 2 files assert sum(file_folder_count) == 2 From c2f9c1659897f09cf12e9030ceae3e1df8eb5caf Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 12:13:27 -0700 Subject: [PATCH 27/40] remove spaces (codacy) [PTV-1888] --- tests/test_app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index 32a0f0c6..788dbbb9 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -593,10 +593,10 @@ async def test_list(): # bit. We just want to assert that the file's mtime, as provided by the api, # is within the ballpark of the current time. We define this ballpark as 3 # seconds, since in at least some cases, Windows mtime precision is 2 seconds. - - # Here is an actual example of a terst that failed with the naive + + # Here is an actual example of a test that failed with the naive # implementation: - + # FAILED tests/test_app.py::test_list - assert 1693508770000 <= # (1693508769.9508653 * 1000) # Thursday, August 31, 2023 7:06:10 PM From 1ad078acc6a41ca5916c57634feb33981352d54d Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 19:39:04 +0000 Subject: [PATCH 28/40] markdown formatting [PTV-1888] --- .markdownlint.yaml | 10 ++-- docs/development/index.md | 2 +- docs/import_specifications.ADR.md | 86 ++++++++++++++++--------------- 3 files changed, 51 insertions(+), 47 deletions(-) diff --git a/.markdownlint.yaml b/.markdownlint.yaml index 1db5789b..eb5393e3 100644 --- a/.markdownlint.yaml +++ b/.markdownlint.yaml @@ -1,8 +1,10 @@ +MD013: + # Matches black line length for python code. + line_length: 88 + # some long examples would break the line length and are awkward to rearrange + code_blocks: false + # We don't want the annoying warnings when there are duplicated non-sibling headers as # many types of docs have a pattern for headings that is repeated throughout the document. MD024: siblings_only: true - -MD013: - line_length: 88 - code_blocks: false \ No newline at end of file diff --git a/docs/development/index.md b/docs/development/index.md index 45ef30be..d65a9c39 100644 --- a/docs/development/index.md +++ b/docs/development/index.md @@ -2,4 +2,4 @@ - [Original docs from readme](./original-doc.md) - [Development Tools](./development-tools.md) -- [Maintaining the Dockerfiles](./maintaining-dockerfile.md) \ No newline at end of file +- [Maintaining the Dockerfiles](./maintaining-dockerfile.md) diff --git a/docs/import_specifications.ADR.md b/docs/import_specifications.ADR.md index 31ba0c47..44eed525 100644 --- a/docs/import_specifications.ADR.md +++ b/docs/import_specifications.ADR.md @@ -7,9 +7,9 @@ data types. ## Resources -* [The original strategy document for this approach](https://docs.google.com/document/d/1ocmZVBlTzAh_cdZaWGRwIbAuH-mcPRZFdhcwRhAfzxM/edit) - * Readable for everyone that has access to the KBase Google Docs folder - * The document contains short descriptions of future projects not included in this work, +- [The original strategy document for this approach](https://docs.google.com/document/d/1ocmZVBlTzAh_cdZaWGRwIbAuH-mcPRZFdhcwRhAfzxM/edit) + - Readable for everyone that has access to the KBase Google Docs folder + - The document contains short descriptions of future projects not included in this work, such as generating template files on the fly and rich templates. ## Front end changes @@ -39,15 +39,15 @@ The file, by row, is: 1. The data type, in this case `gff_metagenome`, the column count, and the version, in this case 1. - * The data type is from the list in the + - The data type is from the list in the [Mappings.py](https://github.com/kbase/staging_service/blob/master/staging_service/autodetect/Mappings.py) file in the StS. The Narrative is expected to understand these types and map them to importer apps. - * The column count allows for error checking row sizes. This particularly important for + - The column count allows for error checking row sizes. This particularly important for the Excel parser, which uses `pandas` under the hood. `pandas` will silently pad data and headers out to match the longest row in the sheet, so the column count is required to detect errors. - * The version allows us to update the file format and increment the version, + - The version allows us to update the file format and increment the version, allowing backwards compatibility - the staging service can process the file appropriately depending on the version number. 2. The IDs of the app inputs from the `spec.json` file. @@ -72,9 +72,9 @@ As part of this project we will deliver: 1. CSV templates for each in scope app (e.g. the first 3 lines of the example file) 2. An Excel template containing a tab for each in scope app 3. A `README.md` file explaining how to use the templates. - * The `README.md` should include a link to rich import specification documentation on + - The `README.md` should include a link to rich import specification documentation on the KBase website once it is developed. - * Note: cover booleans / checkboxes which are not intuitive. 0 will indicate + - Note: cover booleans / checkboxes which are not intuitive. 0 will indicate unchecked, and 1 checked. These values may show up as strings in the API, and the consumer will be expected to handle conversion appropriately. @@ -95,13 +95,13 @@ future update). The front end will be expected to either ## User operations -* The user uploads the import specification files to the staging area along with all the +- The user uploads the import specification files to the staging area along with all the files inluded in the specification. -* The user selects the `Import Specification` type for the specification files. - * The user may also select other files in the staging area to include in the import along +- The user selects the `Import Specification` type for the specification files. + - The user may also select other files in the staging area to include in the import along with the files listed in the specification. - * The user *does not* have to select any files included in the specification. -* The user clicks `Import Selected`. + - The user *does not* have to select any files included in the specification. +- The user clicks `Import Selected`. As such, a new type must be added to the StS: `import_specification` with the title `Import Specification`. *Nota bene*: It may be preferable to have the Narrative specify the @@ -167,20 +167,22 @@ allowing for a consistent user experience. The StS will return errors on a (mostly) per input file basis, with a string error code for each error type. Currently the error types are: -* `cannot_find_file` if an input file cannot be found -* `cannot_parse_file` if an input file cannot be parsed -* `incorrect_column_count` if the column count is not as expected - * For Excel files, this may mean there is a non-empty cell outside the bounds of the +- `cannot_find_file` if an input file cannot be found + +- `cannot_parse_file` if an input file cannot be parsed +- `incorrect_column_count` if the column count is not as expected + - For Excel files, this may mean there is a non-empty cell outside the bounds of the data area -* `multiple_specifications_for_data_type` if more than one tab or file per data type is submitted -* `no_files_provided` if no files were provided -* `unexpected_error` if some other error occurs +- `multiple_specifications_for_data_type` if more than one tab or file per data type + is submitted +- `no_files_provided` if no files were provided +- `unexpected_error` if some other error occurs The HTTP code returned will be, in order of precedence: -* 400 if any error other than `cannot_find_file` or `unexpected_error` occurs -* 404 if at least one error is `cannot_find_file` but there are no 400-type errors -* 500 if all errors are `unexpected_error` +- 400 if any error other than `cannot_find_file` or `unexpected_error` occurs +- 404 if at least one error is `cannot_find_file` but there are no 400-type errors +- 500 if all errors are `unexpected_error` The general structure of the error response is: @@ -260,26 +262,26 @@ Note in this case the service MUST log the stack trace along with the filename f ## Alternatives explored -* We considered parsing the files in the Narrative backend (pulling the files as is from +- We considered parsing the files in the Narrative backend (pulling the files as is from the StS) but the current design allows for other services and UIs reusing the parsing code. There doesn't otherwise seem to be a strong reason to include the code one place or the other so we decided to include it in the StS. ## Questions -* Should it be an error to submit multiple files or tabs for the same type? If not, +- Should it be an error to submit multiple files or tabs for the same type? If not, how should the different files be denoted and ordered? - * This has follow on effect for how spreadsheet type views in the UI should be displayed. - * A: For the MVP disallow submitting more than one file / tab per type. Post release we'll - find out if this is a use case that users care about. -* Should we disallow filenames with commas? They may cause problems with the new endpoint. - * A: Disallow commas, the same way we disallow files staring with whitespace or periods. -* Should we strictly enforce a column count for every row in xSV files? - * Not enforcing a count makes it somewhat easier for users to fill in the data - they don't - need to add extraneous commas or tabs to the end of the line. - * Enforcing a count makes it less likely that a user will commit silent counting + - This has follow on effect for how spreadsheet type views in the UI should be displayed. + - A: For the MVP disallow submitting more than one file / tab per type. Post release + we'll find out if this is a use case that users care about. +- Should we disallow filenames with commas? They may cause problems with the new endpoint. + - A: Disallow commas, the same way we disallow files staring with whitespace or periods. +- Should we strictly enforce a column count for every row in xSV files? + - Not enforcing a count makes it somewhat easier for users to fill in the data - + they don't need to add extraneous commas or tabs to the end of the line. + - Enforcing a count makes it less likely that a user will commit silent counting errors if there are many empty entries between items in a line. - * A: Enforce a column count to prevent user errors. + - A: Enforce a column count to prevent user errors. ## Addendum: dynamic parameter lookup @@ -288,14 +290,14 @@ out of scope, but trivial to add GFF/FASTA Genome) importer in the near future. for the purpose of xSV import the user will be expected to provide the entire, correct, scientific name as returned from the taxon API. -* The user could get this name by starting a genome import and running the query from the +- The user could get this name by starting a genome import and running the query from the import app cell configuration screen. - * This will be documented in the README.md for the template files. -* As part of the UI work we could theoretically provide a landing page for looking up valid + - This will be documented in the README.md for the template files. +- As part of the UI work we could theoretically provide a landing page for looking up valid scientific names. -* Presumably the UI would need to run the dynamic query and report an error to the user +- Presumably the UI would need to run the dynamic query and report an error to the user if the dynamic service returns 0 or > 1 entries. -* Providing the scientific name vs. the taxon ID seems simpler because the machinery already +- Providing the scientific name vs. the taxon ID seems simpler because the machinery already exists to perform the query and is part of the spec. -* Expect these plans to change as it becomes more clear how dynamic fields will work in the - context of bulk import. +- Expect these plans to change as it becomes more clear how dynamic fields will work + in the context of bulk import. From d384ec3771bc10ad275e2ba018b3f616db43c3d0 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 19:59:50 +0000 Subject: [PATCH 29/40] omit line for bandit/codacy not sonarcloud (codacy) [PTV-1888] --- development/scripts/generate-random-strings.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/development/scripts/generate-random-strings.py b/development/scripts/generate-random-strings.py index cbf40479..f1d8f40c 100644 --- a/development/scripts/generate-random-strings.py +++ b/development/scripts/generate-random-strings.py @@ -10,9 +10,10 @@ def make_random_string(string_length: int) -> str: Generate a string of random ascii letters of the given length """ possible_letters = string.ascii_letters - # ignore the SONAR warning below; this is just for generating test data, security is - # not an issue. - return "".join(random.choice(possible_letters) for _ in range(string_length)) # NOSONAR + # ignore the bandit warning below triggered by codacy; + # this is just for generating data for testing, either formal or informa; + # security is not an issue. + return "".join(random.choice(possible_letters) for _ in range(string_length)) # nosec if __name__ == "__main__": From 8b6f62ee70c6458f6514918fb2e1ba46de33abf8 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 20:09:50 +0000 Subject: [PATCH 30/40] add openssh-client so can push to github inside devcontainer [PTV-1888] --- .devcontainer/Dockerfile | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index f1e42edf..37d8f8e1 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -5,7 +5,7 @@ FROM python:3.11.5-slim-bookworm # We don't upgrade, and we pin all os dependency versions RUN apt-get update && \ - apt-get install -y --no-install-recommends zip=3.0-13 unzip=6.0-28 bzip2=1.0.8-5+b1 libmagic-dev=1:5.44-3 htop=3.2.2-2 wget=1.21.3-1+b2 git=1:2.39.2-1.1 && \ + apt-get install -y --no-install-recommends zip=3.0-13 unzip=6.0-28 bzip2=1.0.8-5+b1 libmagic-dev=1:5.44-3 htop=3.2.2-2 wget=1.21.3-1+b2 git=1:2.39.2-1.1 openssh-client=1:9.2p1-2 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -21,14 +21,3 @@ COPY ./requirements_dev.txt /app/requirements_dev.txt RUN python -m pip install "pip==23.2.1" && pip install -r /app/requirements.txt -r /app/requirements_dev.txt ENV PATH="/app:${PATH}" - -# Run as a regular user, not root. -# RUN addgroup --system kbapp && \ -# adduser --system --ingroup kbapp kbapp && \ -# chown -R kbapp:kbapp /app - -# USER kbapp - -# ENTRYPOINT [ "/app/development/tools/runner/entrypoint.sh" ] - -# CMD [ "bash" ] \ No newline at end of file From b0485e35585e553b80791c91553c0001980305f3 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 20:13:33 +0000 Subject: [PATCH 31/40] okay, ignore in sonarcloud too [PTV-1888] --- development/scripts/generate-random-strings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/development/scripts/generate-random-strings.py b/development/scripts/generate-random-strings.py index f1d8f40c..ae2e7bda 100644 --- a/development/scripts/generate-random-strings.py +++ b/development/scripts/generate-random-strings.py @@ -10,10 +10,10 @@ def make_random_string(string_length: int) -> str: Generate a string of random ascii letters of the given length """ possible_letters = string.ascii_letters - # ignore the bandit warning below triggered by codacy; + # ignore the Sonarcloud and bandit warning below triggered by codacy; # this is just for generating data for testing, either formal or informa; # security is not an issue. - return "".join(random.choice(possible_letters) for _ in range(string_length)) # nosec + return "".join(random.choice(possible_letters) for _ in range(string_length)) # nosec NOSONAR if __name__ == "__main__": From 01dc36a37ebf7570ab509b4edbfb93a0d4e92df2 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 5 Sep 2023 20:57:57 +0000 Subject: [PATCH 32/40] bullseye -> bookworm [PTV-1888] --- RELEASE_NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 4caf5ab5..0acd6fee 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -4,7 +4,7 @@ - update to Python 3.11.5 - image base is python 3.11.5 / debian bookworm -- pin all image dependencies to those currently supported by the debian bullseye +- pin all image dependencies to those currently supported by the debian bookworm - update all Python dependencies to latest versions - run black on all service and test files, fix most linting complaints (pylint, sonarlint) From ae1fc76f3f580fb0c99d7a1ef5cd3fb9008e9523 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 12 Sep 2023 13:02:58 -0700 Subject: [PATCH 33/40] revert globus sdk version bump [PTV-1888] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 974f1eb7..5552ecbc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ aiohttp==3.8.5 aiohttp_cors==0.7.0 defusedxml==0.7.1 frozendict==2.3.8 -globus-sdk==3.28.0 +globus-sdk==1.6.1 gunicorn==21.2.0 openpyxl==3.1.2 pandas==2.1.0 From 14602e25906f7f4cb4bda8c44b4c70b69ef2bd91 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Tue, 12 Sep 2023 13:05:03 -0700 Subject: [PATCH 34/40] fix typos [PTV-1899] --- docs/api.md | 18 +++++++++--------- docs/development/development-tools.md | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/api.md b/docs/api.md index a8fb8e1b..aa84aef3 100644 --- a/docs/api.md +++ b/docs/api.md @@ -3,7 +3,7 @@ - all paths should be specified treating the user's home directory as root -- The url base, in an actualy deployment, will be: +- The url base, in an actual deployment, will be: - `https://ci.kbase.us` for CI - `https://next.kbase.us` for Next - `https://appdev.kbase.us` for Appdev @@ -317,7 +317,7 @@ The body format is described in more detail below. The response is a JSON object containing file metadata. > This might seem a bit of an odd choice, and does complicate the API. It was chosen, I -> belive because the Narrative front-end component handling the submission of file +> believe because the Narrative front-end component handling the submission of file > uploads supports `multipart/form-data`, or it may be because historically the first > front-end implementation was a form itself, as HTML forms will send as > `mutlipart/form-data` if a file control is used to select a file. @@ -333,7 +333,7 @@ The response is a JSON object containing file metadata. ### Body -The multpart form-data format supports sending of multiple form fields, each with their +The multipart form-data format supports sending of multiple form fields, each with their own metadata as well as body. The service requires that two fields be present in the specified order: @@ -348,7 +348,7 @@ probably have been set based on the path of the file chosen for upload in the Na interface (although the service API is agnostic about the how and why.) The `uploads` field will contain a binary representation of the file. The file binary -content will be literaly copied into the destination file, with no encoding or validation. +content will be literally copied into the destination file, with no encoding or validation. #### Example @@ -856,7 +856,7 @@ id file for linking to globus. Note that the "message" is that returned by the globus api, and out of scope for documentation here. -> TODO: we should provide our own message, or return the globus rZesponse data, but not +> TODO: we should provide our own message, or return the globus response data, but not > return the globus response data (a json object) into a string and call it a message! ### Error Response @@ -992,7 +992,7 @@ details. ### Error Response -Error reponses are of the general form: +Error responses are of the general form: ```json { @@ -1151,7 +1151,7 @@ data structure to that which the parse endpoint returns and writes bulk specific - `` is the ID of an input parameter from a `KB-SDK` app's `spec.json` file. These are written to the second header line from the import templates and will differ by the data type. -- `data` contains any data to be written to the file as example data, and is analagous +- `data` contains any data to be written to the file as example data, and is analogous to the data structure returned from the parse endpoint. To specify that no data should be written to the template provide an empty list. - `` is the value for the input for a given `spec.json` ID @@ -1180,9 +1180,9 @@ data structure to that which the parse endpoint returns and writes bulk specific { "output_file_type": , "files": { - : , + : , ... - : , + : , } } ``` diff --git a/docs/development/development-tools.md b/docs/development/development-tools.md index a0cf74b5..84ca5c27 100644 --- a/docs/development/development-tools.md +++ b/docs/development/development-tools.md @@ -35,7 +35,7 @@ does not do this style of Python development. - From the new window: - Click on Explorer in the left-nav, then click on Open Folder - or - - Selecte File > Open Folder + - Select: File > Open Folder - Select the folder containing the repo (project) - Click Open @@ -62,7 +62,7 @@ terminal. ### Running Tools All of the tools available for running from the host via docker are also directly -available within the devcontainer. I still often run them from a host termianl anyway, as +available within the devcontainer. I still often run them from a host terminal anyway, as I can control the position and display of the native terminal a bit better, and it is also decoupled from any issues VSC may have. YMMV. @@ -90,7 +90,7 @@ The `FILE_LIFETIME` environment variable is required, and sets the file retentio We ensure that the configuration is available via `KB_DEPLOYMENT_CONFIG`. In our case we are using a configuration file already prepared for local usage. If you inspect it, you'll find that all directory references are relative to the current directory. We are -referecing the `local.cfg` local configuration where it is stored in the codebase. +referencing the `local.cfg` local configuration where it is stored in the codebase. We also ensure that the `staging_service` is included in the `PYTHONPATH`. From 150421e1b4a4d2058f2cf0d0a27f173033687f0b Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 15 Sep 2023 11:20:28 -0700 Subject: [PATCH 35/40] refactor Dockerfile to support dev and prod workflows [PTV-1888] Uses multi-stage build to create a dev stage and production stage; removed other Dockerfiles, adjusted docker-compose and other things. --- .devcontainer/Dockerfile | 23 ------ .devcontainer/devcontainer.json | 3 +- .devcontainer/docker-compose.yml | 11 ++- Dockerfile | 80 ++++++++++++++++----- deployment/bin/entrypoint.sh | 25 +++---- development/docker-compose-kbase-ui.yml | 30 -------- development/scripts/run | 1 - development/tools/runner/Dockerfile | 34 --------- development/tools/runner/docker-compose.yml | 10 +-- development/tools/runner/entrypoint.sh | 5 +- docker-compose.yml | 33 +++++++-- docs/development/maintaining-dockerfile.md | 50 +++++++------ 12 files changed, 139 insertions(+), 166 deletions(-) delete mode 100644 .devcontainer/Dockerfile delete mode 100644 development/docker-compose-kbase-ui.yml delete mode 100644 development/tools/runner/Dockerfile diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index 37d8f8e1..00000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1,23 +0,0 @@ -FROM python:3.11.5-slim-bookworm -# ----------------------------------------- -# We use the "stable" debian release; python does not seem to release current -# versions on LTS debian releases. - -# We don't upgrade, and we pin all os dependency versions -RUN apt-get update && \ - apt-get install -y --no-install-recommends zip=3.0-13 unzip=6.0-28 bzip2=1.0.8-5+b1 libmagic-dev=1:5.44-3 htop=3.2.2-2 wget=1.21.3-1+b2 git=1:2.39.2-1.1 openssh-client=1:9.2p1-2 && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -SHELL ["/bin/bash", "-c"] - -RUN mkdir -p /app - -WORKDIR /app - -# Standard simplified python setup. -COPY ./requirements.txt /app/requirements.txt -COPY ./requirements_dev.txt /app/requirements_dev.txt -RUN python -m pip install "pip==23.2.1" && pip install -r /app/requirements.txt -r /app/requirements_dev.txt - -ENV PATH="/app:${PATH}" diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 9c70bcb6..96db58bd 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -22,7 +22,8 @@ "ms-python.flake8", "mikoz.autoflake-extension", "ms-azuretools.vscode-docker", - "DavidAnson.vscode-markdownlint" + "DavidAnson.vscode-markdownlint", + "matangover.mypy", ] } } diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 7498be19..01401422 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -6,16 +6,21 @@ services: staging_service: build: context: .. - dockerfile: .devcontainer/Dockerfile + target: dev - image: kbase/staging_server_devcontainer:dev + # image: kbase/staging_server_devcontainer:dev container_name: staging_server_devcontainer dns: 8.8.8.8 volumes: - - ..:/workspace:cached + - ..:/workspace networks: - kbase-dev + # Set some default environment variables, which enable the + # service to work inside the container without too much fussing. + env: + KB_DEPLOYMENT_CONFIG=./deployment/conf/local.cfg + FILE_LIFETIME=90 # Required for a devcontainer -- keeps the container running. # Don't worry, our main interaction with the container is through the # VSC terminal, which for a devcontainer opens a shell within the diff --git a/Dockerfile b/Dockerfile index 84aee833..52301c69 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,34 +1,78 @@ -FROM python:3.11.5-slim-bookworm +FROM python:3.11.5-slim-bookworm AS base # ----------------------------------------- -RUN mkdir -p /kb/deployment/lib RUN apt-get update && \ - apt-get install -y --no-install-recommends zip=3.0-13 unzip=6.0-28 bzip2=1.0.8-5+b1 libmagic-dev=1:5.44-3 htop=3.2.2-2 wget=1.21.3-1+b2 && \ + apt-get install -y --no-install-recommends zip=3.0-13 unzip=6.0-28 bzip2=1.0.8-5+b1 libmagic-dev=1:5.44-3 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -COPY ./requirements.txt /requirements.txt -RUN python -m pip install "pip==23.2.1" && pip install -r /requirements.txt && rm /requirements.txt +COPY requirements.txt /kb/requirements.txt +RUN python -m pip install "pip==23.2.1" && pip install -r /kb/requirements.txt && rm -r /kb +# The BUILD_DATE value seem to bust the docker cache when the timestamp changes, move to +# the end +LABEL org.label-schema.build-date=$BUILD_DATE \ + org.label-schema.vcs-url="https://github.com/kbase/staging_service.git" \ + org.label-schema.vcs-ref=$VCS_REF \ + org.label-schema.schema-version="1.1.8" \ + us.kbase.vcs-branch=$BRANCH \ + maintainer="Steve Chan sychan@lbl.gov" + +# +# Dev Layer +# Used in devcontainer, and as base for tools +# +FROM base AS dev +# Install OS dependencies required by or nice-to-have in a development image +RUN apt-get update && \ + apt-get install -y --no-install-recommends htop=3.2.2-2 wget=1.21.3-1+b2 git=1:2.39.2-1.1 openssh-client=1:9.2p1-2 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* +# Install Python dependencies require by development tools (cli tools and devcontainer) +COPY ./requirements_dev.txt /kb/requirements_dev.txt +RUN pip install -r /kb/requirements_dev.txt && rm -r /kb +WORKDIR /kb/module +# Note - entrypoint defined in docker compose file, and /kb/module is volume mounted by +# the devcontainer and the tools + +# +# Prod layer +# +FROM base AS prod + +# Install globus configuration into expected location. +# TODO: point to location for documentation of this. COPY ./globus.cfg /etc/globus.cfg RUN touch /var/log/globus.log && chmod 777 /var/log/globus.log +# We expect this to run on port 3000 +# TODO: this is weird, kbase services usually run at port 5000. +EXPOSE 3000 + +# We keep the entire repo in /kb/module; for why, I know not. +# TODO: could someone add a comment here explaining why? COPY ./ /kb/module -RUN cp -r /kb/module/staging_service /kb/deployment/lib -RUN cp -r /kb/module/deployment /kb +WORKDIR /kb/module +# Otherwise, the service is installed in /kb/deployment (!) +# RUN mkdir -p /kb/deployment/lib +# RUN cp -r /kb/module/staging_service /kb/deployment/lib -EXPOSE 3000 +# +# Here we copy all of the required runtime components that need +# to be in the image. +# -WORKDIR /kb/deployment/lib +# This contains the entrypoint +COPY ./deployment/bin /kb/deployment/bin +# This contains the CI deployment +# TODO: why is it copied to the codebase, though? +COPY ./deployment/conf/deployment.cfg /kb/deployment/conf/deployment.cfg -# The BUILD_DATE value seem to bust the docker cache when the timestamp changes, move to -# the end -LABEL org.label-schema.build-date=$BUILD_DATE \ - org.label-schema.vcs-url="https://github.com/kbase/staging_service.git" \ - org.label-schema.vcs-ref=$VCS_REF \ - org.label-schema.schema-version="1.1.8" \ - us.kbase.vcs-branch=$BRANCH \ - maintainer="Steve Chan sychan@lbl.gov" +# Configuration for mapping file extensions to importers +COPY ./deployment/conf/supported_apps_w_extensions.json /kb/deployment/conf/supported_apps_w_extensions.json + +# The service code. +COPY ./staging_service /kb/deployment/lib/staging_service -ENTRYPOINT ["/kb/deployment/bin/entrypoint.sh"] +ENTRYPOINT ["/kb/module/deployment/bin/entrypoint.sh"] diff --git a/deployment/bin/entrypoint.sh b/deployment/bin/entrypoint.sh index 787b2446..083f9eab 100755 --- a/deployment/bin/entrypoint.sh +++ b/deployment/bin/entrypoint.sh @@ -1,19 +1,10 @@ -#!/bin/bash -# if you change this file and the places it looks for local paths: -# please also update the launch.json for vscode accordingly -# the things that must be kept in sync are KB_DEPLOYMENT_CONFIG and PYTHONPATH +#!/usr/bin/env bash -#top section for local running -DIR="$( cd "$( dirname "$0" )" && pwd )" -if [ -d "$DIR/../../staging_service" ]; then - PYTHONPATH="$DIR/../../staging_service" - export KB_DEPLOYMENT_CONFIG="$DIR/../conf/local.cfg" - export FILE_LIFETIME="90" -fi +# +# This is the production entrypoint, whose sole job is to start the service. +# The service starts via staging_service/__main__.py which is why the python +# invocation below references the staging_service directory. +# -#bottom section for running inside docker -if [ -d "kb/deployment/lib/staging_service" ]; then - PYTHONPATH="kb/deployment/lib/staging_service" - # environment variable for KB_DEPLOYMENT_CONFIG set in docker-compose.yml -fi -python3 -m staging_service \ No newline at end of file +export PYTHONPATH="/kb/deployment/lib" +python -m staging_service \ No newline at end of file diff --git a/development/docker-compose-kbase-ui.yml b/development/docker-compose-kbase-ui.yml deleted file mode 100644 index 170a4036..00000000 --- a/development/docker-compose-kbase-ui.yml +++ /dev/null @@ -1,30 +0,0 @@ -# This docker compose file works with kbase-ui - specifically the kbase-dev -# network that allows proxying service requests from kbase-ui and the narrative. -version: "3.8" -networks: - kbase-dev: - name: kbase-dev - external: true -services: - staging_service: - hostname: staging_service - build: - context: . - ports: - # TODO: should be port 5000 to match other kbase services - - "3010:3000" - volumes: - - "./data:/kb/deployment/lib/src/data" - - "./staging_service:/kb/deployment/lib/staging_service" - networks: - - kbase-dev - dns: - - 8.8.8.8 - environment: - - KB_DEPLOYMENT_CONFIG=/kb/deployment/conf/deployment.cfg - - FILE_LIFETIME=90 - # - SAVE_STRATEGY=SAVE_STRATEGY_TEMP_THEN_COPY - # - SAVE_STRATEGY=SAVE_STRATEGY_SAVE_TO_DESTINATION - # - CHUNK_SIZE= - entrypoint: - ["/kb/deployment/bin/entrypoint-dev.sh"] diff --git a/development/scripts/run b/development/scripts/run index c5becdfd..dce2653f 100755 --- a/development/scripts/run +++ b/development/scripts/run @@ -4,6 +4,5 @@ PROJECT_DIRECTORY=${DIR:-$PWD} echo "Project Directory: ${PROJECT_DIRECTORY}" docker compose \ -f "${PROJECT_DIRECTORY}/development/tools/runner/docker-compose.yml" \ - --project-directory "${PROJECT_DIRECTORY}" \ --project-name tools \ run --rm runner "${@:1}" \ No newline at end of file diff --git a/development/tools/runner/Dockerfile b/development/tools/runner/Dockerfile deleted file mode 100644 index 512bbe5f..00000000 --- a/development/tools/runner/Dockerfile +++ /dev/null @@ -1,34 +0,0 @@ -FROM python:3.11.5-slim-bookworm -# ----------------------------------------- -# We use the "stable" debian release; python does not seem to release current -# versions on LTS debian releases. - -# We don't upgrade, and we pin all os dependency versions -RUN apt-get update && \ - apt-get install -y --no-install-recommends zip=3.0-13 unzip=6.0-28 bzip2=1.0.8-5+b1 libmagic-dev=1:5.44-3 htop=3.2.2-2 wget=1.21.3-1+b2 && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -SHELL ["/bin/bash", "-c"] - -RUN mkdir -p /app - -WORKDIR /app - -# Standard simplified python setup. -COPY ./requirements.txt /app/requirements.txt -COPY ./requirements_dev.txt /app/requirements_dev.txt -RUN python -m pip install "pip==23.2.1" && pip install -r /app/requirements.txt -r /app/requirements_dev.txt - -ENV PATH="/app:${PATH}" - -# Run as a regular user, not root. -# RUN addgroup --system kbapp && \ -# adduser --system --ingroup kbapp kbapp && \ -# chown -R kbapp:kbapp /app - -# USER kbapp - -ENTRYPOINT [ "/app/development/tools/runner/entrypoint.sh" ] - -CMD [ "bash" ] \ No newline at end of file diff --git a/development/tools/runner/docker-compose.yml b/development/tools/runner/docker-compose.yml index f1b8e1ea..9d48d9d3 100644 --- a/development/tools/runner/docker-compose.yml +++ b/development/tools/runner/docker-compose.yml @@ -2,9 +2,9 @@ version: '3.8' services: runner: build: - context: . - dockerfile: development/tools/runner/Dockerfile + context: ../../.. + target: dev volumes: - - .:/app - command: - - bash + - ../../..:/kb/module + entrypoint: + - /kb/module/development/tools/runner/entrypoint.sh diff --git a/development/tools/runner/entrypoint.sh b/development/tools/runner/entrypoint.sh index 2ba1545b..5939162f 100755 --- a/development/tools/runner/entrypoint.sh +++ b/development/tools/runner/entrypoint.sh @@ -1,9 +1,10 @@ -#!/bin/bash +#!/usr/bin/env bash set -e # Nice to set this up for all future python code being run. -export PYTHONPATH="${PWD}/src" +# We set the root of the repo as the python path. +export PYTHONPATH="${PWD}" # # This execs whatever is provided as a COMMAND to the container. By default, as established diff --git a/docker-compose.yml b/docker-compose.yml index f14b742c..ba0f0576 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,16 +1,37 @@ # WARNING this file does not sync up with the deployment automatically +# +# This compose file is for running the service locally. +# +# Local (host) directories are overlaid into the container in order to facilitate +# live coding of source while exercising the service api live. +# The file locations in the container should be those expected by the service. +# +# It is probably more efficacious to use a devcontainer, e.g. with VSC, though, +# so that the development UI can recognize installed libraries, etc. +# +# The "kbase-dev" network allows the container to interact with KBase user interfaces +# via the docker network. In this development scenario, kbase-ui provides a proxy so +# that kbase-ui and the narrative can communicate with the local service over +# https://ci.kbase.us and provide a natural co-development experience. +# version: "3.1" +networks: + kbase-dev: + name: kbase-dev services: staging_service: - image: kbase/staging_service:develop + # image: kbase/staging_service:develop + build: + context: . + networks: + - kbase-dev ports: - "3000:3000" # The following mount assumes the user dirs are under /data/bulk/{username} # it further assumes that there is a pre-existing /data/metadata directory volumes: - - "./data:/kb/deployment/lib/src/data" - - "./:/staging_service" - + - ./data:/kb/deployment/lib/src/data + - ./staging_service:/kb/deployment/lib/staging_service environment: - - KB_DEPLOYMENT_CONFIG=/kb/deployment/conf/deployment.cfg - - FILE_LIFETIME="90" + - KB_DEPLOYMENT_CONFIG=/kb/module/deployment/conf/deployment.cfg + - FILE_LIFETIME=90 diff --git a/docs/development/maintaining-dockerfile.md b/docs/development/maintaining-dockerfile.md index 10cb13e4..1d67e389 100644 --- a/docs/development/maintaining-dockerfile.md +++ b/docs/development/maintaining-dockerfile.md @@ -7,10 +7,10 @@ recent release, when possible. ## OS version -The OS should be debian, as debian/ubunto are widely used at KBase. The version should +The OS should be Debian, as Debian/Ubuntu are widely used at KBase. The version should be the current stable. The LTS version, which is often several versions behind stable, -does not seem to be supported by recent Python distributions. That is, to get the most -recent Python versions requires we can't use LTS, and conversely if we wished to use the +does not seem to be supported by recent Python distributions. That is, if we want to get +recent Python versions we probably can't use LTS, and conversely if we wished to use the LTS debian we wouldn't be able to use the most recent Python releases. It is more important to have a current Python, than a current OS. The OS really isn't @@ -21,26 +21,24 @@ concerned with. ## OS dependencies The OS dependencies are indicated in the Dockerfile as exact versions. This ensures that -images are consistent, even as the base image evolves over time. - -## Different Dockerfiles - -At present, there are three Dockerfiles which must be kept in synchrony. They differ -enough that the inconvenience of keeping them consistent seems worth the effort. - -- `./Dockerfile` - - used for the production image - - needs to copy all service files - - does not include development Python dependencies - - has the production entrypoint -- `./.devcontainer/Dockerfile` - - used for the devcontainer workflow - - does not copy service files - - contains development Python dependencies - - no entrypoint as that is provided by the docker-compose.yml -- `./development/tools/Dockerfile` - - used for the host cli tools - - does not copy service files - - contains development Python dependencies - - has special entrypoint which execs whatever command is passed from the command - line +images are consistent, even as the base image evolves over time. We should keep an eye +on this, as there are reports from some Linux distros (e.g. Alpine) that package +retention is not permanent, and older packages may eventually be dropped, meaning that a +future build may actually fail if it pins package versions. There is some controversy +over this, with distro maintainers complaining that they dont' have infinite storage +space for all package versions they have distributed. + +## Dockerfile Design + +The Dockerfile serves at least three purpopses. Its design reflects this by utilizing a +multi-stage build. Multi-stage builds are primarily for creating internal build layers +that can be omitted from the final image. However, in this case we use this design to +create a base image with most OS and Python dependencies installed, a dev stage which +has development dependencies (OS and Python) installed, and finally a production stage +which adds all runtime files and the entrypoint. + +The dev stage is used by the devcontainer and tool docker-compose files to run a +container which is all ready for development, just needing the repo to be volume mounted +at `/kb/module``. + +The production deploy image can be exercised with the top level `docker-compose.yml`. From 34a463621cee5f5180d9976e8ca443fa912ad281 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 15 Sep 2023 11:27:23 -0700 Subject: [PATCH 36/40] consolidate tool config into pyproject [PTV-1888] includes aligning black to the previous line length of 100, aligns isort to black (otherwise they fight each other), set up mypy and autoflake, flake8 doesn't use pyproject --- pyproject.toml | 23 +++++++++++++++++++++++ tox.ini | 5 ++++- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..37d001c3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[tool.black] +line-length = 100 + +[tool.isort] +profile = "black" + +[tool.mypy] +strict=true + +[[tool.mypy.overrides]] +module = "aiohttp_cors.*" +ignore_missing_imports = true + + +[[tool.mypy.overrides]] +module = "globus_sdk.*" +ignore_missing_imports = true + + +[tool.autoflake] +remove_all_unused_imports = true +remove_unused_variables = true +quiet = true diff --git a/tox.ini b/tox.ini index 51b50a04..bba11d30 100644 --- a/tox.ini +++ b/tox.ini @@ -1,2 +1,5 @@ [flake8] -max-line-length = 100 \ No newline at end of file +# unfortunately, flake8 does not play with pyproject.tom. what a shame. +# one day perhaps we'll switch to ruff. +max-line-length = 100 +extend-ignore = E203 \ No newline at end of file From 2090307eb2cf447eca2c08b1a19ed5cae67ea692 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 15 Sep 2023 11:28:21 -0700 Subject: [PATCH 37/40] update python dependencies [PTV-1888] Hold back globus_sdk --- requirements.txt | 2 +- requirements_dev.txt | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5552ecbc..8c8cf3c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,12 +3,12 @@ aiohttp==3.8.5 aiohttp_cors==0.7.0 defusedxml==0.7.1 frozendict==2.3.8 +# Hold globus-sdk back until further notice. globus-sdk==1.6.1 gunicorn==21.2.0 openpyxl==3.1.2 pandas==2.1.0 python-dotenv==1.0.0 python-magic==0.4.27 -types-openpyxl===3.1.0.17 uvloop==0.17.0 xlrd==2.0.1 # fairly out of date \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt index cfd35eb6..c13b5200 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,11 +1,12 @@ -autoflake==2.2.0 -black==23.7.0 -coverage==7.3.0 -hypothesis==6.82.7 +autoflake==2.2.1 +black==23.9.1 +coverage==7.3.1 +hypothesis==6.84.3 mypy==1.5.1 -pymarkdownlnt==0.9.12 +pymarkdownlnt==0.9.13.4 pylint==2.17.5 -pytest-aiohttp==1.0.4 +pytest-aiohttp==1.0.5 pytest-asyncio==0.21.1 pytest-cov==4.1.0 -pytest==7.4.0 +pytest==7.4.2 +types-openpyxl===3.1.0.19 From 73b02666efe7d03a69ec5d20de0bb8bb36354a0e Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 15 Sep 2023 11:28:41 -0700 Subject: [PATCH 38/40] set pythonpath [PTV-1888] --- scripts/run_tests.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index db638ef3..5e72dc3e 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -3,6 +3,7 @@ # DIR is the root of the project. DIR="$( cd "$( dirname "$0" )" && pwd )/.." +export PYTHONPATH="${PWD}" export KB_DEPLOYMENT_CONFIG="${DIR}/deployment/conf/testing.cfg" export FILE_LIFETIME="90" From 34f6f3698ccbf21738188738fc3cc4d6593e1a6d Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 15 Sep 2023 11:29:50 -0700 Subject: [PATCH 39/40] apply black and isort after new configs [PTV-1888] --- staging_service/app.py | 52 +++++-------------- staging_service/app_error_formatter.py | 4 +- staging_service/auth2Client.py | 8 +-- .../autodetect/GenerateMappings.py | 47 ++++++++++++----- staging_service/autodetect/Mappings.py | 8 +-- staging_service/globus.py | 4 +- .../import_specifications/file_parser.py | 4 +- .../import_specifications/file_writers.py | 38 ++++---------- .../individual_parsers.py | 31 +++-------- staging_service/metadata.py | 8 +-- staging_service/utils.py | 46 +++++----------- 11 files changed, 84 insertions(+), 166 deletions(-) diff --git a/staging_service/app.py b/staging_service/app.py index 89a1d4a3..a2e57f46 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -123,9 +123,7 @@ async def bulk_specification(request: web.Request) -> web.json_response: res = parse_import_specifications( tuple(list(paths)), _file_type_resolver, - lambda e: logging.error( - "Unexpected error while parsing import specs", exc_info=e - ), + lambda e: logging.error("Unexpected error while parsing import specs", exc_info=e), ) if res.results: types = {dt: result.result for dt, result in res.results.items()} @@ -191,9 +189,7 @@ async def write_bulk_specification(request: web.Request) -> web.json_response: folder = data.get("output_directory") type_ = data.get("output_file_type") if type(folder) != str: - return _createJSONErrorResponse( - "output_directory is required and must be a string" - ) + return _createJSONErrorResponse("output_directory is required and must be a string") writer = _IMPSPEC_FILE_TO_WRITER.get(type_) if not writer: return _createJSONErrorResponse(f"Invalid output_file_type: {type_}") @@ -218,12 +214,8 @@ async def add_acl_concierge(request: web.Request): user_dir = Path.validate_path(username).full_path concierge_path = f"{Path._CONCIERGE_PATH}/{username}/" aclm = AclManager() - result = aclm.add_acl_concierge( - shared_directory=user_dir, concierge_path=concierge_path - ) - result[ - "msg" - ] = f"Requesting Globus Perms for the following globus dir: {concierge_path}" + result = aclm.add_acl_concierge(shared_directory=user_dir, concierge_path=concierge_path) + result["msg"] = f"Requesting Globus Perms for the following globus dir: {concierge_path}" result[ "link" ] = f"https://app.globus.org/file-manager?destination_id={aclm.endpoint_id}&destination_path={concierge_path}" @@ -273,9 +265,7 @@ async def file_exists(request: web.Request): show_hidden = False # this scans the entire directory recursively just to see if one file exists... why? results = await dir_info(user_dir, show_hidden, query) - filtered_results = [ - file_info for file_info in results if file_info["name"] == query - ] + filtered_results = [file_info for file_info in results if file_info["name"] == query] if filtered_results: exists = True just_is_folder = [file_info["isFolder"] for file_info in filtered_results] @@ -322,9 +312,7 @@ async def download_files(request: web.Request): elif not os.path.isfile(path.full_path): raise web.HTTPBadRequest(text=f"{path.full_path} is a directory not a file") # hard coding the mime type to force download - return web.FileResponse( - path.full_path, headers={"content-type": "application/octet-stream"} - ) + return web.FileResponse(path.full_path, headers={"content-type": "application/octet-stream"}) @routes.get("/similar/{path:.+}") @@ -413,9 +401,7 @@ async def upload_files_chunked(request: web.Request): counter = 0 user_file = None destination_path = None - while ( - counter < 100 - ): # TODO this is arbitrary to keep an attacker from creating infinite loop + while counter < 100: # TODO this is arbitrary to keep an attacker from creating infinite loop # This loop handles the null parts that come in inbetween destpath and file part = await reader.next() @@ -487,9 +473,7 @@ async def define_upa(request: web.Request): except KeyError as key_error: raise web.HTTPBadRequest(text="must provide UPA field in body") from key_error await add_upa(path, UPA) - return web.Response( - text=f"successfully updated UPA {UPA} for file {path.user_path}" - ) + return web.Response(text=f"successfully updated UPA {UPA} for file {path.user_path}") @routes.delete("/delete/{path:.+}") @@ -533,9 +517,7 @@ async def rename(request: web.Request): try: new_path = body["newPath"] except KeyError as wrong_key: - raise web.HTTPBadRequest( - text="must provide newPath field in body" - ) from wrong_key + raise web.HTTPBadRequest(text="must provide newPath field in body") from wrong_key new_path = Path.validate_path(username, new_path) if os.path.exists(path.full_path): if not os.path.exists(new_path.full_path): @@ -546,9 +528,7 @@ async def rename(request: web.Request): raise web.HTTPConflict(text="{new_path.user_path} allready exists") else: raise web.HTTPNotFound(text=f"{path.user_path} not found") - return web.Response( - text=f"successfully moved {path.user_path} to {new_path.user_path}" - ) + return web.Response(text=f"successfully moved {path.user_path} to {new_path.user_path}") @routes.patch("/decompress/{path:.+}") @@ -563,13 +543,9 @@ async def decompress(request: web.Request): # 2 could try again after doing an automatic rename scheme (add numbers to end) # 3 just overwrite and force destination = os.path.dirname(path.full_path) - if ( - upper_file_extension == ".tar" and file_extension == ".gz" - ) or file_extension == ".tgz": + if (upper_file_extension == ".tar" and file_extension == ".gz") or file_extension == ".tgz": await run_command("tar", "xzf", path.full_path, "-C", destination) - elif upper_file_extension == ".tar" and ( - file_extension == ".bz" or file_extension == ".bz2" - ): + elif upper_file_extension == ".tar" and (file_extension == ".bz" or file_extension == ".bz2"): await run_command("tar", "xjf", path.full_path, "-C", destination) elif file_extension == ".zip" or file_extension == ".ZIP": await run_command("unzip", path.full_path, "-d", destination) @@ -640,9 +616,7 @@ def inject_config_dependencies(config): if FILE_EXTENSION_MAPPINGS is None: raise Exception("Please provide FILE_EXTENSION_MAPPINGS in the config file ") - with open( - FILE_EXTENSION_MAPPINGS, "r", encoding="utf-8" - ) as file_extension_mappings_file: + with open(FILE_EXTENSION_MAPPINGS, "r", encoding="utf-8") as file_extension_mappings_file: AutoDetectUtils.set_mappings(json.load(file_extension_mappings_file)) datatypes = defaultdict(set) extensions = defaultdict(set) diff --git a/staging_service/app_error_formatter.py b/staging_service/app_error_formatter.py index 5766444a..1e83212b 100644 --- a/staging_service/app_error_formatter.py +++ b/staging_service/app_error_formatter.py @@ -68,8 +68,6 @@ def format_import_spec_errors( file2 = str(path_translations[error.source_2.file]) tab2 = error.source_2.tab errs.append( - _IMPORT_SPEC_ERROR_FORMATTERS[error.error_type]( - error.message, file1, tab1, file2, tab2 - ) + _IMPORT_SPEC_ERROR_FORMATTERS[error.error_type](error.message, file1, tab1, file2, tab2) ) return errs diff --git a/staging_service/auth2Client.py b/staging_service/auth2Client.py index 3b136b4f..27a43443 100644 --- a/staging_service/auth2Client.py +++ b/staging_service/auth2Client.py @@ -40,9 +40,7 @@ def add_valid_token(self, token, user, expire_time): token = hashlib.sha256(token.encode("utf8")).hexdigest() self._cache[token] = [user, _time.time(), expire_time] if len(self._cache) > self._maxsize: - for i, (t, _) in enumerate( - sorted(self._cache.items(), key=lambda v: v[1][1]) - ): + for i, (t, _) in enumerate(sorted(self._cache.items(), key=lambda v: v[1][1])): if i <= self._halfmax: del self._cache[t] else: @@ -69,9 +67,7 @@ async def get_user(self, token): return user async with aiohttp.ClientSession() as session: - async with session.get( - self._authurl, headers={"Authorization": token} - ) as resp: + async with session.get(self._authurl, headers={"Authorization": token}) as resp: ret = await resp.json() if resp.reason != "OK": http_code = ret["error"]["httpcode"] diff --git a/staging_service/autodetect/GenerateMappings.py b/staging_service/autodetect/GenerateMappings.py index ab4e44da..528d61b6 100644 --- a/staging_service/autodetect/GenerateMappings.py +++ b/staging_service/autodetect/GenerateMappings.py @@ -26,17 +26,38 @@ """ from collections import defaultdict -from staging_service.autodetect.Mappings import CSV, EXCEL, FASTA, FASTQ, GENBANK, GFF, JSON, SBML, SRA, TSV, ZIP, \ - assembly_id, \ - decompress_id, \ - escher_map_id, \ - expression_matrix_id, \ - extension_to_file_format_mapping, fastq_reads_interleaved_id, \ - fastq_reads_noninterleaved_id, \ - fba_model_id, file_format_to_extension_mapping, genbank_genome_id, gff_genome_id, gff_metagenome_id, \ - import_specification, media_id, \ - metabolic_annotations_bulk_id, \ - metabolic_annotations_id, phenotype_set_id, sample_set_id, sra_reads_id +from staging_service.autodetect.Mappings import ( + CSV, + EXCEL, + FASTA, + FASTQ, + GENBANK, + GFF, + JSON, + SBML, + SRA, + TSV, + ZIP, + assembly_id, + decompress_id, + escher_map_id, + expression_matrix_id, + extension_to_file_format_mapping, + fastq_reads_interleaved_id, + fastq_reads_noninterleaved_id, + fba_model_id, + file_format_to_extension_mapping, + genbank_genome_id, + gff_genome_id, + gff_metagenome_id, + import_specification, + media_id, + metabolic_annotations_bulk_id, + metabolic_annotations_id, + phenotype_set_id, + sample_set_id, + sra_reads_id, +) # Note that some upload apps are not included - in particular batch apps, which are now # redundant, and MSAs and attribute mappings because they're out of scope at the current time. @@ -104,9 +125,7 @@ app_id_to_extensions = defaultdict(list) for filecat, apps in file_format_to_app_mapping.items(): for app_id in apps: - app_id_to_extensions[app_id].extend( - file_format_to_extension_mapping[filecat] - ) + app_id_to_extensions[app_id].extend(file_format_to_extension_mapping[filecat]) # Create the mapping between file extensions and apps # For example, the .gbk and .genkbank extensions map to app with id of "genbank_genome" diff --git a/staging_service/autodetect/Mappings.py b/staging_service/autodetect/Mappings.py index fffd09ac..af08e54c 100644 --- a/staging_service/autodetect/Mappings.py +++ b/staging_service/autodetect/Mappings.py @@ -71,9 +71,7 @@ def _flatten(some_list): # longer term there's probably a better way to do this but this is quick def _add_gzip(extension_list): - return _flatten( - [[ext + comp for comp in _COMPRESSION_EXT] for ext in extension_list] - ) + return _flatten([[ext + comp for comp in _COMPRESSION_EXT] for ext in extension_list]) file_format_to_extension_mapping = { @@ -116,7 +114,5 @@ def _add_gzip(extension_list): for ext in extensions: if ext in extension_to_file_format_mapping: type2 = extension_to_file_format_mapping[ext] - raise ValueError( - f"Duplicate entry for extension {ext} in {type_} and {type2}" - ) + raise ValueError(f"Duplicate entry for extension {ext} in {type_} and {type2}") extension_to_file_format_mapping[ext] = type_ diff --git a/staging_service/globus.py b/staging_service/globus.py index 6a905adf..2b23666a 100644 --- a/staging_service/globus.py +++ b/staging_service/globus.py @@ -64,7 +64,5 @@ async def assert_globusid_exists(username, token): # such as the commented code below, for multiple linked accounts # text = '\n'.join(globus_ids) text = globus_ids[0] - async with aiofiles.open( - path.full_path, mode="w", encoding="utf-8" - ) as globus_file: + async with aiofiles.open(path.full_path, mode="w", encoding="utf-8") as globus_file: await globus_file.writelines(text) diff --git a/staging_service/import_specifications/file_parser.py b/staging_service/import_specifications/file_parser.py index 9347a27a..7c51af85 100644 --- a/staging_service/import_specifications/file_parser.py +++ b/staging_service/import_specifications/file_parser.py @@ -171,9 +171,7 @@ class FileTypeResolution: def __post_init__(self): if not (bool(self.parser) ^ bool(self.unsupported_type)): # xnor - raise ValueError( - "Exactly one of parser or unsupported_type must be supplied" - ) + raise ValueError("Exactly one of parser or unsupported_type must be supplied") def parse_import_specifications( diff --git a/staging_service/import_specifications/file_writers.py b/staging_service/import_specifications/file_writers.py index 02a1e455..a56b3eeb 100644 --- a/staging_service/import_specifications/file_writers.py +++ b/staging_service/import_specifications/file_writers.py @@ -89,13 +89,9 @@ def _check_import_specification(types: dict[str, dict[str, list[Any]]]): _check_string(datatype, "A data type") spec = types[datatype] if not isinstance(spec, dict): - raise ImportSpecWriteException( - f"The value for data type {datatype} must be a mapping" - ) + raise ImportSpecWriteException(f"The value for data type {datatype} must be a mapping") if _ORDER_AND_DISPLAY not in spec: - raise ImportSpecWriteException( - f"Data type {datatype} missing {_ORDER_AND_DISPLAY} key" - ) + raise ImportSpecWriteException(f"Data type {datatype} missing {_ORDER_AND_DISPLAY} key") _check_is_sequence( spec[_ORDER_AND_DISPLAY], f"Data type {datatype} {_ORDER_AND_DISPLAY} value" ) @@ -109,10 +105,7 @@ def _check_import_specification(types: dict[str, dict[str, list[Any]]]): param_ids = set() for i, id_display in enumerate(spec[_ORDER_AND_DISPLAY]): - err = ( - f"Invalid {_ORDER_AND_DISPLAY} entry for datatype {datatype} " - + f"at index {i} " - ) + err = f"Invalid {_ORDER_AND_DISPLAY} entry for datatype {datatype} " + f"at index {i} " _check_is_sequence(id_display, err + "- the entry") if len(id_display) != 2: raise ImportSpecWriteException(err + "- expected 2 item list") @@ -129,14 +122,9 @@ def _check_import_specification(types: dict[str, dict[str, list[Any]]]): err + f" does not have the same keys as {_ORDER_AND_DISPLAY}" ) for pid, v in datarow.items(): - if ( - v is not None - and not isinstance(v, numbers.Number) - and not isinstance(v, str) - ): + if v is not None and not isinstance(v, numbers.Number) and not isinstance(v, str): raise ImportSpecWriteException( - err - + f"'s value for parameter {pid} is not a number or a string" + err + f"'s value for parameter {pid} is not a number or a string" ) @@ -148,9 +136,7 @@ def _check_string(tocheck: Any, errprefix: str): def _check_is_sequence(tocheck: Any, errprefix: str): - if not ( - isinstance(tocheck, collections.abc.Sequence) and not isinstance(tocheck, str) - ): + if not (isinstance(tocheck, collections.abc.Sequence) and not isinstance(tocheck, str)): raise ImportSpecWriteException(errprefix + " is not a list") @@ -170,9 +156,7 @@ def write_tsv(folder: Path, types: dict[str, dict[str, list[Any]]]) -> dict[str, return _write_xsv(folder, types, _EXT_TSV, _SEP_TSV) -def _write_xsv( - folder: Path, types: dict[str, dict[str, list[Any]]], ext: str, sep: str -): +def _write_xsv(folder: Path, types: dict[str, dict[str, list[Any]]], ext: str, sep: str): _check_write_args(folder, types) res = {} for datatype in types: @@ -206,9 +190,7 @@ def _check_write_args(folder: Path, types: dict[str, dict[str, list[Any]]]): _check_import_specification(types) -def write_excel( - folder: Path, types: dict[str, dict[str, list[Any]]] -) -> dict[str, Path]: +def write_excel(folder: Path, types: dict[str, dict[str, list[Any]]]) -> dict[str, Path]: """ Writes import specifications to an Excel files. All the writers in this module have the same function signatures; see the module level documentation. @@ -253,9 +235,7 @@ def _expand_excel_columns_to_max_width(sheet: Worksheet): # https://stackoverflow.com/a/40935194/643675 for column_cells in sheet.columns: length = max(len(_as_text(cell.value)) for cell in column_cells) - sheet.column_dimensions[ - get_column_letter(column_cells[0].column) - ].width = length + sheet.column_dimensions[get_column_letter(column_cells[0].column)].width = length def _as_text(value): diff --git a/staging_service/import_specifications/individual_parsers.py b/staging_service/import_specifications/individual_parsers.py index 42183157..00eee18b 100644 --- a/staging_service/import_specifications/individual_parsers.py +++ b/staging_service/import_specifications/individual_parsers.py @@ -177,9 +177,7 @@ def _parse_xsv(path: Path, sep: str) -> ParseResults: try: filetype = magic.from_file(str(path), mime=True) if filetype not in _MAGIC_TEXT_FILES: - return _error( - Error(ErrorType.PARSE_FAIL, "Not a text file: " + filetype, spcsrc) - ) + return _error(Error(ErrorType.PARSE_FAIL, "Not a text file: " + filetype, spcsrc)) with open(path, newline="", encoding="utf-8") as input_: rdr = csv.reader(input_, delimiter=sep) # let parser handle quoting dthd = _csv_next(rdr, 1, None, spcsrc, "Missing data type / version header") @@ -202,24 +200,15 @@ def _parse_xsv(path: Path, sep: str) -> ParseResults: ) ) results.append( - frozendict( - { - param_ids[j]: _normalize_xsv(row[j]) - for j in range(len(row)) - } - ) + frozendict({param_ids[j]: _normalize_xsv(row[j]) for j in range(len(row))}) ) if not results: - raise _ParseException( - Error(ErrorType.PARSE_FAIL, "No non-header data in file", spcsrc) - ) + raise _ParseException(Error(ErrorType.PARSE_FAIL, "No non-header data in file", spcsrc)) return ParseResults(frozendict({datatype: ParseResult(spcsrc, tuple(results))})) except FileNotFoundError: return _error(Error(ErrorType.FILE_NOT_FOUND, source_1=spcsrc)) except IsADirectoryError: - return _error( - Error(ErrorType.PARSE_FAIL, "The given path is a directory", spcsrc) - ) + return _error(Error(ErrorType.PARSE_FAIL, "The given path is a directory", spcsrc)) except _ParseException as e: return _error(e.args[0]) @@ -255,9 +244,7 @@ def _process_excel_row( def _process_excel_tab( excel: pandas.ExcelFile, spcsrc: SpecificationSource ) -> Tuple[Optional[str], Optional[ParseResult]]: - df = excel.parse( - sheet_name=spcsrc.tab, na_values=_EXCEL_MISSING_VALUES, keep_default_na=False - ) + df = excel.parse(sheet_name=spcsrc.tab, na_values=_EXCEL_MISSING_VALUES, keep_default_na=False) if df.shape[0] < 3: # might as well not error check headers in sheets with no data return (None, None) # at this point we know that at least 4 lines are present - expecting the data type header, @@ -273,9 +260,7 @@ def _process_excel_tab( row = _process_excel_row(row, i, columns, spcsrc) if any(map(lambda x: not pandas.isna(x), row)): # skip empty rows results.append( - frozendict( - {param_ids[j]: _normalize_pandas(row[j]) for j in range(len(row))} - ) + frozendict({param_ids[j]: _normalize_pandas(row[j]) for j in range(len(row))}) ) return datatype, ParseResult(spcsrc, tuple(results)) @@ -314,9 +299,7 @@ def parse_excel(path: Path) -> ParseResults: except FileNotFoundError: return _error(Error(ErrorType.FILE_NOT_FOUND, source_1=spcsrc)) except IsADirectoryError: - return _error( - Error(ErrorType.PARSE_FAIL, "The given path is a directory", spcsrc) - ) + return _error(Error(ErrorType.PARSE_FAIL, "The given path is a directory", spcsrc)) except ValueError as e: if "Excel file format cannot be determined" in str(e): return _error( diff --git a/staging_service/metadata.py b/staging_service/metadata.py index 0b7f0af8..0a2167f9 100644 --- a/staging_service/metadata.py +++ b/staging_service/metadata.py @@ -116,9 +116,7 @@ async def _only_source(path: Path): return data["source"] -async def dir_info( - path: Path, show_hidden: bool, query: str = "", recurse=True -) -> list: +async def dir_info(path: Path, show_hidden: bool, query: str = "", recurse=True) -> list: """ only call this on a validated full path """ @@ -133,9 +131,7 @@ async def dir_info( if query == "" or specific_path.user_path.find(query) != -1: response.append(await stat_data(specific_path)) if recurse: - response.extend( - await dir_info(specific_path, show_hidden, query, recurse) - ) + response.extend(await dir_info(specific_path, show_hidden, query, recurse)) if entry.is_file(): if query == "" or specific_path.user_path.find(query) != -1: data = await stat_data(specific_path) diff --git a/staging_service/utils.py b/staging_service/utils.py index 65282d5f..835ccedc 100644 --- a/staging_service/utils.py +++ b/staging_service/utils.py @@ -31,12 +31,10 @@ async def run_command(*args): if process.returncode == 0: return stdout.decode().strip() else: - error_msg = ( - "command {cmd} failed\nreturn code: {returncode}\nerror: {error}".format( - cmd=" ".join(args), - returncode=process.returncode, - error=stderr.decode().strip(), - ) + error_msg = "command {cmd} failed\nreturn code: {returncode}\nerror: {error}".format( + cmd=" ".join(args), + returncode=process.returncode, + error=stderr.decode().strip(), ) raise HTTPInternalServerError(text=error_msg) @@ -100,15 +98,9 @@ def __init__(self): client = globus_sdk.NativeAppAuthClient(cf["client_id"]) try: - transfer_authorizer = globus_sdk.RefreshTokenAuthorizer( - cf["transfer_token"], client - ) - self.globus_transfer_client = globus_sdk.TransferClient( - authorizer=transfer_authorizer - ) - auth_authorizer = globus_sdk.RefreshTokenAuthorizer( - cf["auth_token"], client - ) + transfer_authorizer = globus_sdk.RefreshTokenAuthorizer(cf["transfer_token"], client) + self.globus_transfer_client = globus_sdk.TransferClient(authorizer=transfer_authorizer) + auth_authorizer = globus_sdk.RefreshTokenAuthorizer(cf["auth_token"], client) self.globus_auth_client = globus_sdk.AuthClient(authorizer=auth_authorizer) except globus_sdk.GlobusAPIError as error: logging.error(str(error.code) + error.raw_text) @@ -124,18 +116,14 @@ def _get_globus_identities(self, shared_directory: str): globus_id_filename = "{}.globus_id".format(shared_directory) with open(globus_id_filename, "r", encoding="utf-8") as fp: ident = fp.read() - return self.globus_auth_client.get_identities( - usernames=ident.split("\n")[0] - ) + return self.globus_auth_client.get_identities(usernames=ident.split("\n")[0]) def _get_globus_identity(self, globus_id_filename: str): """ Get the first identity for the username in the .globus_id file """ try: - return self._get_globus_identities(globus_id_filename)["identities"][0][ - "id" - ] + return self._get_globus_identities(globus_id_filename)["identities"][0]["id"] except FileNotFoundError as error: response = { "success": False, @@ -188,9 +176,7 @@ def _add_acl(self, user_identity_id: str, shared_directory_basename: str): } logging.info(response) - logging.info( - "Shared %s with %s\n", shared_directory_basename, user_identity_id - ) + logging.info("Shared %s with %s\n", shared_directory_basename, user_identity_id) logging.info(response) return response @@ -205,22 +191,16 @@ def _add_acl(self, user_identity_id: str, shared_directory_basename: str): } logging.error(response) if error.code == "Exists": - raise HTTPOk( - text=json.dumps(response), content_type="application/json" - ) from error + raise HTTPOk(text=json.dumps(response), content_type="application/json") from error - raise HTTPInternalServerError( - text=json.dumps(response), content_type="application/json" - ) + raise HTTPInternalServerError(text=json.dumps(response), content_type="application/json") def _remove_acl(self, user_identity_id: str): """ Get all ACLS and attempt to remove the correct ACL for the given user_identity """ try: - acls = self.globus_transfer_client.endpoint_acl_list(self.endpoint_id)[ - "DATA" - ] + acls = self.globus_transfer_client.endpoint_acl_list(self.endpoint_id)["DATA"] for acl in acls: if user_identity_id == acl["principal"]: if "id" in acl and acl["id"] is not None: From 4d4dc3849c97c00d00218fed14398460b4b9a201 Mon Sep 17 00:00:00 2001 From: Erik Pearson Date: Fri, 15 Sep 2023 11:47:12 -0700 Subject: [PATCH 40/40] remove extra comma [PTV-1888] --- .devcontainer/devcontainer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 96db58bd..18c196d8 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -23,7 +23,7 @@ "mikoz.autoflake-extension", "ms-azuretools.vscode-docker", "DavidAnson.vscode-markdownlint", - "matangover.mypy", + "matangover.mypy" ] } }