diff --git a/staging_service/app.py b/staging_service/app.py index cd668dc7..167e83cd 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -1,3 +1,4 @@ +from collections.abc import Callable import json import logging import os @@ -12,7 +13,7 @@ from .app_error_formatter import format_import_spec_errors from .auth2Client import KBaseAuth2 -from .autodetect.Mappings import CSV, EXCEL, TSV, JSON +from .autodetect.Mappings import CSV, EXCEL, TSV from .AutoDetectUtils import AutoDetectUtils from .globus import assert_globusid_exists, is_globusid from .import_specifications.file_parser import ( @@ -44,12 +45,7 @@ _APP_JSON = "application/json" -_IMPSPEC_FILE_TO_PARSER = { - CSV: parse_csv, - TSV: parse_tsv, - EXCEL: parse_excel, - JSON: parse_dts_manifest, -} +_IMPSPEC_FILE_TO_PARSER = {CSV: parse_csv, TSV: parse_tsv, EXCEL: parse_excel} _IMPSPEC_FILE_TO_WRITER = { CSV: write_csv, @@ -57,6 +53,10 @@ EXCEL: write_excel, } +# The constant in autodetect.Mappings isn't guaranteed to be the string we want. +JSON_EXTENSION = "json" +NO_EXTENSION = "missing extension" + @routes.get("/importer_filetypes/") async def importer_filetypes(_: web.Request) -> web.json_response: @@ -108,6 +108,19 @@ def _file_type_resolver(path: PathPy) -> FileTypeResolution: return FileTypeResolution(unsupported_type=ext) +def _make_dts_file_resolver() -> Callable[[Path], FileTypeResolution]: + """Makes a DTS file resolver.""" + + def dts_file_resolver(path: PathPy) -> FileTypeResolution: + # must be a ".json" file + suffix = path.suffix[1:] if path.suffix else NO_EXTENSION + if suffix.lower() != JSON_EXTENSION: + return FileTypeResolution(unsupported_type=suffix) + return FileTypeResolution(parser=parse_dts_manifest) + + return dts_file_resolver + + @routes.get("/bulk_specification/{query:.*}") async def bulk_specification(request: web.Request) -> web.json_response: """ @@ -115,11 +128,10 @@ async def bulk_specification(request: web.Request) -> web.json_response: Returns the contents of those files parsed into a list of dictionaries, mapped from the data type, in the `types` key. - :param request: contains a comma separated list of files, e.g. folder1/file1.txt,file2.txt - - TODO: since JSON files are rather generic and we might want to use a different JSON bulk-spec - format later, add a separate query parameter to request that the selected file is treated as a - Data Transfer Service manifest. + :param request: contains the URL parameters for the request. Expected to have the following: + * files (required) - a comma separated list of files, e.g. folder1/file1.txt,file2.txt + * dts (optional) - if present this will treat all of the given files as DTS manifest files, + and attempt to parse them accordingly. """ username = await authorize_request(request) files = parse_qs(request.query_string).get("files", []) @@ -129,10 +141,14 @@ async def bulk_specification(request: web.Request) -> web.json_response: for f in files: p = Path.validate_path(username, f) paths[PathPy(p.full_path)] = PathPy(p.user_path) + # list(dict) returns a list of the dict keys in insertion order (py3.7+) + file_type_resolver = _file_type_resolver + if "dts" in request.query: + file_type_resolver = _make_dts_file_resolver() res = parse_import_specifications( tuple(list(paths)), - _file_type_resolver, + file_type_resolver, lambda e: logging.error("Unexpected error while parsing import specs", exc_info=e), ) if res.results: diff --git a/tests/test_app.py b/tests/test_app.py index 54b6535b..f8147154 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1,13 +1,14 @@ import asyncio import configparser import hashlib +import json import os import platform +import pytest import shutil import string import time from io import BytesIO -from json import JSONDecoder from pathlib import Path from typing import Any from urllib.parse import unquote, urlencode @@ -29,7 +30,7 @@ bootstrap() -decoder = JSONDecoder() +decoder = json.JSONDecoder() config = configparser.ConfigParser() config.read(os.environ["KB_DEPLOYMENT_CONFIG"]) @@ -1049,6 +1050,209 @@ async def test_bulk_specification_success(): assert resp.status == 200 +async def test_bulk_specification_dts_success(): + async with AppClient(config) as cli: + with FileUtil() as fu: + sub_dir = "dts_folder" + dts_dir = f"testuser/{sub_dir}" + fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock + base = Path(fu.base_dir) / "testuser" / sub_dir + manifest_1 = "test_manifest_1.json" + manifest_1_dict = { + "resources": [], + "instructions": { + "protocol": "KBase narrative import", + "objects": [ + { + "data_type": "gff_metagenome", + "parameters": { + "fasta_file": "fasta_1", + "gff_file": "gff_1", + "genome_name": "mg_1", + }, + }, + { + "data_type": "gff_metagenome", + "parameters": { + "fasta_file": "fasta_2", + "gff_file": "gff_2", + "genome_name": "mg_2", + }, + }, + ], + }, + } + manifest_2 = "test_manifest_2.json" + manifest_2_dict = { + "resources": [], + "instructions": { + "protocol": "KBase narrative import", + "objects": [ + { + "data_type": "gff_genome", + "parameters": { + "fasta_file": "g_fasta_1", + "gff_file": "g_gff_1", + "genome_name": "genome_1", + }, + }, + { + "data_type": "gff_genome", + "parameters": { + "fasta_file": "g_fasta_2", + "gff_file": "g_gff_2", + "genome_name": "genome_2", + }, + }, + ], + }, + } + with open(base / manifest_1, "w", encoding="utf-8") as f: + json.dump(manifest_1_dict, f) + with open(base / manifest_2, "w", encoding="utf-8") as f: + json.dump(manifest_2_dict, f) + resp = await cli.get( + f"bulk_specification/?files={sub_dir}/{manifest_1} , {sub_dir}/{manifest_2}&dts" + ) + jsn = await resp.json() + # fails for now. will update when schema/parser is properly finished. + assert jsn == { + "errors": [ + { + "type": "cannot_parse_file", + "file": f"{dts_dir}/{manifest_1}", + "message": "No import specification data in file", + "tab": None, + }, + { + "type": "cannot_parse_file", + "file": f"{dts_dir}/{manifest_2}", + "message": "No import specification data in file", + "tab": None, + }, + ] + } + # soon will be this: + # assert json == { + # "types": { + # "gff_genome": [ + # {"fasta_file": "g_fasta_1", "gff_file": "g_gff_1", "genome_name": "genome_1"}, + # {"fasta_file": "g_fasta_2", "gff_file": "g_gff_2", "genome_name": "genome_2"} + # ], + # "gff_metagenome": [ + # {"fasta_file": "fasta_1", "gff_file": "gff_1", "genome_name": "mg_1"}, + # {"fasta_file": "fasta_2", "gff_file": "gff_2", "genome_name": "mg_2"} + # ] + # }, + # "files": { + # "gff_metagenome": {"file": f"testuser/{manifest_1}", "tab": None}, + # "gff_genome": {"file": f"testuser/{manifest_2}", "tab": None} + # } + # } + assert resp.status == 400 + + +async def test_bulk_specification_dts_fail_json_without_dts(): + async with AppClient(config) as cli: + with FileUtil() as fu: + sub_dir = "dts_folder" + dts_dir = f"testuser/{sub_dir}" + fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock + base = Path(fu.base_dir) / "testuser" / sub_dir + manifest_1 = "test_manifest_1.json" + manifest_1_dict = { + "resources": [], + "instructions": { + "protocol": "KBase narrative import", + "objects": [ + { + "data_type": "gff_metagenome", + "parameters": { + "fasta_file": "fasta_1", + "gff_file": "gff_1", + "genome_name": "mg_1", + }, + } + ], + }, + } + with open(base / manifest_1, "w", encoding="utf-8") as f: + json.dump(manifest_1_dict, f) + resp = await cli.get(f"bulk_specification/?files={sub_dir}/{manifest_1}") + jsn = await resp.json() + assert jsn == { + "errors": [ + { + "type": "cannot_parse_file", + "file": f"{dts_dir}/{manifest_1}", + "message": "json is not a supported file type for import specifications", + "tab": None, + } + ] + } + assert resp.status == 400 + + +async def test_bulk_specification_dts_fail_wrong_format(): + async with AppClient(config) as cli: + with FileUtil() as fu: + sub_dir = "dts_folder" + dts_dir = f"testuser/{sub_dir}" + fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock + base = Path(fu.base_dir) / "testuser" / sub_dir + manifest = "test_manifest.json" + manifest_data = ["wrong", "format"] + with open(base / manifest, "w", encoding="utf-8") as f: + json.dump(manifest_data, f) + resp = await cli.get(f"bulk_specification/?files={sub_dir}/{manifest}&dts") + jsn = await resp.json() + assert jsn == { + "errors": [ + { + "type": "cannot_parse_file", + "file": f"{dts_dir}/{manifest}", + "message": "Manifest is not a dictionary", + "tab": None, + } + ] + } + assert resp.status == 400 + + +@pytest.mark.parametrize( + "manifest,expected", + [ + ("test_manifest.foo", "foo"), + ("json", app.NO_EXTENSION), + (".json", app.NO_EXTENSION), + ("some_manifest", app.NO_EXTENSION), + ], +) +async def test_bulk_specification_dts_fail_wrong_extension(manifest: str, expected: str): + async with AppClient(config) as cli: + with FileUtil() as fu: + sub_dir = "dts_folder" + dts_dir = f"testuser/{sub_dir}" + fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock + base = Path(fu.base_dir) / "testuser" / sub_dir + manifest_data = {"resources": [], "instructions": {}} + with open(base / manifest, "w", encoding="utf-8") as f: + json.dump(manifest_data, f) + resp = await cli.get(f"bulk_specification/?files={sub_dir}/{manifest}&dts") + jsn = await resp.json() + assert jsn == { + "errors": [ + { + "type": "cannot_parse_file", + "file": f"{dts_dir}/{manifest}", + "message": f"{expected} is not a supported file type for import specifications", + "tab": None, + } + ] + } + assert resp.status == 400 + + async def test_bulk_specification_fail_no_files(): async with AppClient(config) as cli: for f in ["", "?files=", "?files= , ,, , "]: