From ccfbc7f8dfe429ee9046b38c98fb2d63c72b2c34 Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Wed, 27 Nov 2024 16:26:11 -0500 Subject: [PATCH 01/14] add dts flag to bulk_specification endpoint --- staging_service/app.py | 30 ++++++++++++------- .../import_specifications/file_parser.py | 2 ++ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/staging_service/app.py b/staging_service/app.py index cd668dc7..6b4751be 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -30,7 +30,7 @@ parse_csv, parse_excel, parse_tsv, - parse_dts_manifest, + parse_dts_manifest ) from .JGIMetadata import read_metadata_for from .metadata import add_upa, dir_info, similar, some_metadata @@ -47,8 +47,7 @@ _IMPSPEC_FILE_TO_PARSER = { CSV: parse_csv, TSV: parse_tsv, - EXCEL: parse_excel, - JSON: parse_dts_manifest, + EXCEL: parse_excel } _IMPSPEC_FILE_TO_WRITER = { @@ -107,6 +106,15 @@ def _file_type_resolver(path: PathPy) -> FileTypeResolution: ext = path.name return FileTypeResolution(unsupported_type=ext) +def _extract_file_paths(params: dict[str, list|None], key: str, username: str) -> dict[PathPy, PathPy]: + files = params.get(key, []) + files = files[0].split(",") if files else [] + files = [f.strip() for f in files if f.strip()] + paths = {} + for f in files: + p = Path.validate_path(username, f) + paths[PathPy(p.full_path)] = PathPy(p.user_path) + return paths @routes.get("/bulk_specification/{query:.*}") async def bulk_specification(request: web.Request) -> web.json_response: @@ -122,17 +130,17 @@ async def bulk_specification(request: web.Request) -> web.json_response: Data Transfer Service manifest. """ username = await authorize_request(request) - files = parse_qs(request.query_string).get("files", []) - files = files[0].split(",") if files else [] - files = [f.strip() for f in files if f.strip()] - paths = {} - for f in files: - p = Path.validate_path(username, f) - paths[PathPy(p.full_path)] = PathPy(p.user_path) + params = parse_qs(request.query_string) + paths = _extract_file_paths(params, "files", username) + as_dts = params.get("dts") == "1" + # list(dict) returns a list of the dict keys in insertion order (py3.7+) + file_type_resolver = _file_type_resolver + if as_dts: + file_type_resolver = lambda: FileTypeResolution(parser=parse_dts_manifest) # noqa: E731 res = parse_import_specifications( tuple(list(paths)), - _file_type_resolver, + file_type_resolver, lambda e: logging.error("Unexpected error while parsing import specs", exc_info=e), ) if res.results: diff --git a/staging_service/import_specifications/file_parser.py b/staging_service/import_specifications/file_parser.py index 7c51af85..29f5805b 100644 --- a/staging_service/import_specifications/file_parser.py +++ b/staging_service/import_specifications/file_parser.py @@ -186,6 +186,8 @@ def parse_import_specifications( file_type_resolver - a callable that when given a file path, returns the type of the file or a parser for the file. log_error - callable for logging an exception. + dts_paths - JSON file paths that are specifically for the DTS importer and should not be + resolved any other way """ if not paths: return ParseResults(errors=tuple([Error(ErrorType.NO_FILES_PROVIDED)])) From f38203727a8c1008a42c54dd8c0ba03ff983485f Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Wed, 27 Nov 2024 16:41:25 -0500 Subject: [PATCH 02/14] some cleanup --- staging_service/app.py | 26 +++++++------------ .../import_specifications/file_parser.py | 2 -- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/staging_service/app.py b/staging_service/app.py index 6b4751be..a953af5f 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -12,7 +12,7 @@ from .app_error_formatter import format_import_spec_errors from .auth2Client import KBaseAuth2 -from .autodetect.Mappings import CSV, EXCEL, TSV, JSON +from .autodetect.Mappings import CSV, EXCEL, TSV from .AutoDetectUtils import AutoDetectUtils from .globus import assert_globusid_exists, is_globusid from .import_specifications.file_parser import ( @@ -30,7 +30,7 @@ parse_csv, parse_excel, parse_tsv, - parse_dts_manifest + parse_dts_manifest, ) from .JGIMetadata import read_metadata_for from .metadata import add_upa, dir_info, similar, some_metadata @@ -106,16 +106,6 @@ def _file_type_resolver(path: PathPy) -> FileTypeResolution: ext = path.name return FileTypeResolution(unsupported_type=ext) -def _extract_file_paths(params: dict[str, list|None], key: str, username: str) -> dict[PathPy, PathPy]: - files = params.get(key, []) - files = files[0].split(",") if files else [] - files = [f.strip() for f in files if f.strip()] - paths = {} - for f in files: - p = Path.validate_path(username, f) - paths[PathPy(p.full_path)] = PathPy(p.user_path) - return paths - @routes.get("/bulk_specification/{query:.*}") async def bulk_specification(request: web.Request) -> web.json_response: """ @@ -124,14 +114,16 @@ async def bulk_specification(request: web.Request) -> web.json_response: type, in the `types` key. :param request: contains a comma separated list of files, e.g. folder1/file1.txt,file2.txt - - TODO: since JSON files are rather generic and we might want to use a different JSON bulk-spec - format later, add a separate query parameter to request that the selected file is treated as a - Data Transfer Service manifest. """ username = await authorize_request(request) params = parse_qs(request.query_string) - paths = _extract_file_paths(params, "files", username) + files = params.get("files", []) + files = files[0].split(",") if files else [] + files = [f.strip() for f in files if f.strip()] + paths = {} + for f in files: + p = Path.validate_path(username, f) + paths[PathPy(p.full_path)] = PathPy(p.user_path) as_dts = params.get("dts") == "1" # list(dict) returns a list of the dict keys in insertion order (py3.7+) diff --git a/staging_service/import_specifications/file_parser.py b/staging_service/import_specifications/file_parser.py index 29f5805b..7c51af85 100644 --- a/staging_service/import_specifications/file_parser.py +++ b/staging_service/import_specifications/file_parser.py @@ -186,8 +186,6 @@ def parse_import_specifications( file_type_resolver - a callable that when given a file path, returns the type of the file or a parser for the file. log_error - callable for logging an exception. - dts_paths - JSON file paths that are specifically for the DTS importer and should not be - resolved any other way """ if not paths: return ParseResults(errors=tuple([Error(ErrorType.NO_FILES_PROVIDED)])) From 5fd2dc75b4eac5e3bcf43ab8fe6d64baf219c4f1 Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Mon, 2 Dec 2024 15:56:44 -0500 Subject: [PATCH 03/14] steps toward tests --- staging_service/app.py | 5 ++++- tests/test_app.py | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/staging_service/app.py b/staging_service/app.py index a953af5f..9227705b 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -113,7 +113,10 @@ async def bulk_specification(request: web.Request) -> web.json_response: Returns the contents of those files parsed into a list of dictionaries, mapped from the data type, in the `types` key. - :param request: contains a comma separated list of files, e.g. folder1/file1.txt,file2.txt + :param request: contains the URL parameters for the request. Expected to have the following: + * files (required) - a comma separated list of files, e.g. folder1/file1.txt,file2.txt + * dts (optional) - if present, and has the value "1", this will treat all of the given + files as DTS manifest files, and attempt to parse them accordingly. """ username = await authorize_request(request) params = parse_qs(request.query_string) diff --git a/tests/test_app.py b/tests/test_app.py index 54b6535b..b128a40f 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1,13 +1,13 @@ import asyncio import configparser import hashlib +import json import os import platform import shutil import string import time from io import BytesIO -from json import JSONDecoder from pathlib import Path from typing import Any from urllib.parse import unquote, urlencode @@ -29,7 +29,7 @@ bootstrap() -decoder = JSONDecoder() +decoder = json.JSONDecoder() config = configparser.ConfigParser() config.read(os.environ["KB_DEPLOYMENT_CONFIG"]) @@ -1048,6 +1048,37 @@ async def test_bulk_specification_success(): } assert resp.status == 200 +async def test_bulk_specification_dts_success(): + async with AppClient(config) as cli: + with FileUtil() as fu: + fu.make_dir("testuser/dts_folder") # testuser is hardcoded in the auth mock + base = Path(fu.base_dir) / "testuser" + manifest = "genome_manifest.json" + manifest_dict = { + "resources": [], + "instructions": { + "protocol": "KBase narrative import", + "" + } + } + with open(base / manifest, "w", encoding="utf-8") as f: + json.dump(manifest_dict, f) + f.writelines( + [ + "Data type: genomes; Columns: 3; Version: 1\n", + "spec1\tspec2\t spec3 \n", + "Spec 1\t Spec 2\t Spec 3\n", + "val1 \t ꔆ \t 7\n", + "val3\tval4\t1\n", + ] + ) + + +async def test_bulk_specification_dts_fail_json_not_dts(): + pass + +async def test_bulk_specification_dts_fail_wrong_format(): + pass async def test_bulk_specification_fail_no_files(): async with AppClient(config) as cli: From c4e7c635f9a4dabae09e3e278e7c8f390f175980 Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Mon, 2 Dec 2024 17:15:11 -0500 Subject: [PATCH 04/14] spruce up endpoint, add tests --- staging_service/app.py | 5 +- tests/test_app.py | 139 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 126 insertions(+), 18 deletions(-) diff --git a/staging_service/app.py b/staging_service/app.py index 9227705b..39f8e984 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -127,12 +127,13 @@ async def bulk_specification(request: web.Request) -> web.json_response: for f in files: p = Path.validate_path(username, f) paths[PathPy(p.full_path)] = PathPy(p.user_path) - as_dts = params.get("dts") == "1" + as_dts = params.get("dts", ["0"])[0] == "1" + # list(dict) returns a list of the dict keys in insertion order (py3.7+) file_type_resolver = _file_type_resolver if as_dts: - file_type_resolver = lambda: FileTypeResolution(parser=parse_dts_manifest) # noqa: E731 + file_type_resolver = lambda x: FileTypeResolution(parser=parse_dts_manifest) # noqa: E731 res = parse_import_specifications( tuple(list(paths)), file_type_resolver, diff --git a/tests/test_app.py b/tests/test_app.py index b128a40f..fa1e5f09 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1053,29 +1053,136 @@ async def test_bulk_specification_dts_success(): with FileUtil() as fu: fu.make_dir("testuser/dts_folder") # testuser is hardcoded in the auth mock base = Path(fu.base_dir) / "testuser" - manifest = "genome_manifest.json" - manifest_dict = { + manifest_1 = "test_manifest_1.json" + manifest_1_dict = { "resources": [], "instructions": { "protocol": "KBase narrative import", - "" + "objects": [ + { + "data_type": "gff_metagenome", + "parameters": { + "fasta_file": "fasta_1", + "gff_file": "gff_1", + "genome_name": "mg_1", + } + }, + { + "data_type": "gff_metagenome", + "parameters": { + "fasta_file": "fasta_2", + "gff_file": "gff_2", + "genome_name": "mg_2", + } + } + ] } } - with open(base / manifest, "w", encoding="utf-8") as f: - json.dump(manifest_dict, f) - f.writelines( - [ - "Data type: genomes; Columns: 3; Version: 1\n", - "spec1\tspec2\t spec3 \n", - "Spec 1\t Spec 2\t Spec 3\n", - "val1 \t ꔆ \t 7\n", - "val3\tval4\t1\n", + manifest_2 = "test_manifest_2.json" + manifest_2_dict = { + "resources": [], + "instructions": { + "protocol": "KBase narrative import", + "objects": [ + { + "data_type": "gff_genome", + "parameters": { + "fasta_file": "g_fasta_1", + "gff_file": "g_gff_1", + "genome_name": "genome_1", + } + }, + { + "data_type": "gff_genome", + "parameters": { + "fasta_file": "g_fasta_2", + "gff_file": "g_gff_2", + "genome_name": "genome_2", + } + } ] - ) - + } + } + with open(base / manifest_1, "w", encoding="utf-8") as f: + json.dump(manifest_1_dict, f) + with open(base / manifest_2, "w", encoding="utf-8") as f: + json.dump(manifest_2_dict, f) + resp = await cli.get(f"bulk_specification/?files={manifest_1} , {manifest_2}&dts=1") + jsn = await resp.json() + # fails for now + assert jsn == { + "errors": [ + { + "type": "cannot_parse_file", + "file": f"testuser/{manifest_1}", + "message": "No import specification data in file", + "tab": None + }, + { + "type": "cannot_parse_file", + "file": f"testuser/{manifest_2}", + "message": "No import specification data in file", + "tab": None + } + ] + } + # soon will be this: + # assert json == { + # "types": { + # "gff_genome": [ + # {"fasta_file": "g_fasta_1", "gff_file": "g_gff_1", "genome_name": "genome_1"}, + # {"fasta_file": "g_fasta_2", "gff_file": "g_gff_2", "genome_name": "genome_2"} + # ], + # "gff_metagenome": [ + # {"fasta_file": "fasta_1", "gff_file": "gff_1", "genome_name": "mg_1"}, + # {"fasta_file": "fasta_2", "gff_file": "gff_2", "genome_name": "mg_2"} + # ] + # }, + # "files": { + # "gff_metagenome": {"file": f"testuser/{manifest_1}", "tab": None}, + # "gff_genome": {"file": f"testuser/{manifest_2}", "tab": None} + # } + # } + assert resp.status == 400 -async def test_bulk_specification_dts_fail_json_not_dts(): - pass +async def test_bulk_specification_dts_fail_json_without_dts(): + async with AppClient(config) as cli: + with FileUtil() as fu: + fu.make_dir("testuser/dts_folder") # testuser is hardcoded in the auth mock + base = Path(fu.base_dir) / "testuser" + manifest_1 = "test_manifest_1.json" + manifest_1_dict = { + "resources": [], + "instructions": { + "protocol": "KBase narrative import", + "objects": [ + { + "data_type": "gff_metagenome", + "parameters": { + "fasta_file": "fasta_1", + "gff_file": "gff_1", + "genome_name": "mg_1", + } + } + ] + } + } + with open(base / manifest_1, "w", encoding="utf-8") as f: + json.dump(manifest_1_dict, f) + resp = await cli.get(f"bulk_specification/?files={manifest_1}&dts=0") + jsn = await resp.json() + # fails for now + assert jsn == { + "errors": [ + { + "type": "cannot_parse_file", + "file": f"testuser/{manifest_1}", + "message": "json is not a supported file type for import specifications", + "tab": None + } + ] + } + assert resp.status == 400 async def test_bulk_specification_dts_fail_wrong_format(): pass From 9385c36acb0b9628def78633eaf4ebc421bc0991 Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Tue, 3 Dec 2024 16:39:25 -0500 Subject: [PATCH 05/14] ruff formatting --- staging_service/app.py | 20 +++++++++++++------- tests/test_app.py | 38 +++++++++++++++++++++----------------- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/staging_service/app.py b/staging_service/app.py index 39f8e984..b89dc049 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -1,3 +1,4 @@ +from collections.abc import Callable import json import logging import os @@ -44,11 +45,7 @@ _APP_JSON = "application/json" -_IMPSPEC_FILE_TO_PARSER = { - CSV: parse_csv, - TSV: parse_tsv, - EXCEL: parse_excel -} +_IMPSPEC_FILE_TO_PARSER = {CSV: parse_csv, TSV: parse_tsv, EXCEL: parse_excel} _IMPSPEC_FILE_TO_WRITER = { CSV: write_csv, @@ -106,6 +103,16 @@ def _file_type_resolver(path: PathPy) -> FileTypeResolution: ext = path.name return FileTypeResolution(unsupported_type=ext) + +def _make_dts_file_resolver() -> Callable[[Path], FileTypeResolution]: + """Makes a DTS file resolver.""" + + def dts_file_resolver(_: PathPy): + return FileTypeResolution(parser=parse_dts_manifest) + + return dts_file_resolver + + @routes.get("/bulk_specification/{query:.*}") async def bulk_specification(request: web.Request) -> web.json_response: """ @@ -129,11 +136,10 @@ async def bulk_specification(request: web.Request) -> web.json_response: paths[PathPy(p.full_path)] = PathPy(p.user_path) as_dts = params.get("dts", ["0"])[0] == "1" - # list(dict) returns a list of the dict keys in insertion order (py3.7+) file_type_resolver = _file_type_resolver if as_dts: - file_type_resolver = lambda x: FileTypeResolution(parser=parse_dts_manifest) # noqa: E731 + file_type_resolver = _make_dts_file_resolver() res = parse_import_specifications( tuple(list(paths)), file_type_resolver, diff --git a/tests/test_app.py b/tests/test_app.py index fa1e5f09..5d0dbfcf 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1048,6 +1048,7 @@ async def test_bulk_specification_success(): } assert resp.status == 200 + async def test_bulk_specification_dts_success(): async with AppClient(config) as cli: with FileUtil() as fu: @@ -1065,7 +1066,7 @@ async def test_bulk_specification_dts_success(): "fasta_file": "fasta_1", "gff_file": "gff_1", "genome_name": "mg_1", - } + }, }, { "data_type": "gff_metagenome", @@ -1073,10 +1074,10 @@ async def test_bulk_specification_dts_success(): "fasta_file": "fasta_2", "gff_file": "gff_2", "genome_name": "mg_2", - } - } - ] - } + }, + }, + ], + }, } manifest_2 = "test_manifest_2.json" manifest_2_dict = { @@ -1090,7 +1091,7 @@ async def test_bulk_specification_dts_success(): "fasta_file": "g_fasta_1", "gff_file": "g_gff_1", "genome_name": "genome_1", - } + }, }, { "data_type": "gff_genome", @@ -1098,10 +1099,10 @@ async def test_bulk_specification_dts_success(): "fasta_file": "g_fasta_2", "gff_file": "g_gff_2", "genome_name": "genome_2", - } - } - ] - } + }, + }, + ], + }, } with open(base / manifest_1, "w", encoding="utf-8") as f: json.dump(manifest_1_dict, f) @@ -1116,14 +1117,14 @@ async def test_bulk_specification_dts_success(): "type": "cannot_parse_file", "file": f"testuser/{manifest_1}", "message": "No import specification data in file", - "tab": None + "tab": None, }, { "type": "cannot_parse_file", "file": f"testuser/{manifest_2}", "message": "No import specification data in file", - "tab": None - } + "tab": None, + }, ] } # soon will be this: @@ -1145,6 +1146,7 @@ async def test_bulk_specification_dts_success(): # } assert resp.status == 400 + async def test_bulk_specification_dts_fail_json_without_dts(): async with AppClient(config) as cli: with FileUtil() as fu: @@ -1162,10 +1164,10 @@ async def test_bulk_specification_dts_fail_json_without_dts(): "fasta_file": "fasta_1", "gff_file": "gff_1", "genome_name": "mg_1", - } + }, } - ] - } + ], + }, } with open(base / manifest_1, "w", encoding="utf-8") as f: json.dump(manifest_1_dict, f) @@ -1178,15 +1180,17 @@ async def test_bulk_specification_dts_fail_json_without_dts(): "type": "cannot_parse_file", "file": f"testuser/{manifest_1}", "message": "json is not a supported file type for import specifications", - "tab": None + "tab": None, } ] } assert resp.status == 400 + async def test_bulk_specification_dts_fail_wrong_format(): pass + async def test_bulk_specification_fail_no_files(): async with AppClient(config) as cli: for f in ["", "?files=", "?files= , ,, , "]: From 0f1d35fbd9abf403d2a47ccf54262a1e7204a9e4 Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Tue, 3 Dec 2024 16:48:14 -0500 Subject: [PATCH 06/14] add fail tests --- tests/test_app.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index 5d0dbfcf..cca9c59a 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1110,7 +1110,7 @@ async def test_bulk_specification_dts_success(): json.dump(manifest_2_dict, f) resp = await cli.get(f"bulk_specification/?files={manifest_1} , {manifest_2}&dts=1") jsn = await resp.json() - # fails for now + # fails for now. will update when schema/parser is properly finished. assert jsn == { "errors": [ { @@ -1173,7 +1173,6 @@ async def test_bulk_specification_dts_fail_json_without_dts(): json.dump(manifest_1_dict, f) resp = await cli.get(f"bulk_specification/?files={manifest_1}&dts=0") jsn = await resp.json() - # fails for now assert jsn == { "errors": [ { @@ -1188,7 +1187,27 @@ async def test_bulk_specification_dts_fail_json_without_dts(): async def test_bulk_specification_dts_fail_wrong_format(): - pass + async with AppClient(config) as cli: + with FileUtil() as fu: + fu.make_dir("testuser/dts_folder") # testuser is hardcoded in the auth mock + base = Path(fu.base_dir) / "testuser" + manifest = "test_manifest.json" + manifest_data = [ "wrong", "format" ] + with open(base / manifest, "w", encoding="utf-8") as f: + json.dump(manifest_data, f) + resp = await cli.get(f"bulk_specification/?files={manifest}&dts=1") + jsn = await resp.json() + assert jsn == { + "errors": [ + { + "type": "cannot_parse_file", + "file": f"testuser/{manifest}", + "message": "Manifest is not a dictionary", + "tab": None + } + ] + } + assert resp.status == 400 async def test_bulk_specification_fail_no_files(): From ffb2a4f9eac0b52a491e783c2f1646a34bafbb4c Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Tue, 3 Dec 2024 16:55:25 -0500 Subject: [PATCH 07/14] ruff format tests --- tests/test_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index cca9c59a..c1fcd8d8 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1192,7 +1192,7 @@ async def test_bulk_specification_dts_fail_wrong_format(): fu.make_dir("testuser/dts_folder") # testuser is hardcoded in the auth mock base = Path(fu.base_dir) / "testuser" manifest = "test_manifest.json" - manifest_data = [ "wrong", "format" ] + manifest_data = ["wrong", "format"] with open(base / manifest, "w", encoding="utf-8") as f: json.dump(manifest_data, f) resp = await cli.get(f"bulk_specification/?files={manifest}&dts=1") @@ -1203,7 +1203,7 @@ async def test_bulk_specification_dts_fail_wrong_format(): "type": "cannot_parse_file", "file": f"testuser/{manifest}", "message": "Manifest is not a dictionary", - "tab": None + "tab": None, } ] } From 8b4af54e8333afc6acf08c8131b4fddd0f4eff2a Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Mon, 9 Dec 2024 13:41:20 -0500 Subject: [PATCH 08/14] change dts as a kvp to just presence --- staging_service/app.py | 10 ++++------ tests/test_app.py | 6 +++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/staging_service/app.py b/staging_service/app.py index b89dc049..6c091850 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -122,23 +122,21 @@ async def bulk_specification(request: web.Request) -> web.json_response: :param request: contains the URL parameters for the request. Expected to have the following: * files (required) - a comma separated list of files, e.g. folder1/file1.txt,file2.txt - * dts (optional) - if present, and has the value "1", this will treat all of the given - files as DTS manifest files, and attempt to parse them accordingly. + * dts (optional) - if present this will treat all of the given files as DTS manifest files, + and attempt to parse them accordingly. """ username = await authorize_request(request) - params = parse_qs(request.query_string) - files = params.get("files", []) + files = parse_qs(request.query_string).get("files", []) files = files[0].split(",") if files else [] files = [f.strip() for f in files if f.strip()] paths = {} for f in files: p = Path.validate_path(username, f) paths[PathPy(p.full_path)] = PathPy(p.user_path) - as_dts = params.get("dts", ["0"])[0] == "1" # list(dict) returns a list of the dict keys in insertion order (py3.7+) file_type_resolver = _file_type_resolver - if as_dts: + if "dts" in request.query: file_type_resolver = _make_dts_file_resolver() res = parse_import_specifications( tuple(list(paths)), diff --git a/tests/test_app.py b/tests/test_app.py index c1fcd8d8..ff007f84 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1108,7 +1108,7 @@ async def test_bulk_specification_dts_success(): json.dump(manifest_1_dict, f) with open(base / manifest_2, "w", encoding="utf-8") as f: json.dump(manifest_2_dict, f) - resp = await cli.get(f"bulk_specification/?files={manifest_1} , {manifest_2}&dts=1") + resp = await cli.get(f"bulk_specification/?files={manifest_1} , {manifest_2}&dts") jsn = await resp.json() # fails for now. will update when schema/parser is properly finished. assert jsn == { @@ -1171,7 +1171,7 @@ async def test_bulk_specification_dts_fail_json_without_dts(): } with open(base / manifest_1, "w", encoding="utf-8") as f: json.dump(manifest_1_dict, f) - resp = await cli.get(f"bulk_specification/?files={manifest_1}&dts=0") + resp = await cli.get(f"bulk_specification/?files={manifest_1}") jsn = await resp.json() assert jsn == { "errors": [ @@ -1195,7 +1195,7 @@ async def test_bulk_specification_dts_fail_wrong_format(): manifest_data = ["wrong", "format"] with open(base / manifest, "w", encoding="utf-8") as f: json.dump(manifest_data, f) - resp = await cli.get(f"bulk_specification/?files={manifest}&dts=1") + resp = await cli.get(f"bulk_specification/?files={manifest}&dts") jsn = await resp.json() assert jsn == { "errors": [ From 6874ba6c546bce6dbca2daba549020153c1741ac Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Mon, 9 Dec 2024 14:25:38 -0500 Subject: [PATCH 09/14] put test files in the right place --- tests/test_app.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index ff007f84..6a150bc1 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1052,8 +1052,10 @@ async def test_bulk_specification_success(): async def test_bulk_specification_dts_success(): async with AppClient(config) as cli: with FileUtil() as fu: - fu.make_dir("testuser/dts_folder") # testuser is hardcoded in the auth mock - base = Path(fu.base_dir) / "testuser" + sub_dir = "dts_folder" + dts_dir = f"testuser/{sub_dir}" + fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock + base = Path(fu.base_dir) / "testuser" / sub_dir manifest_1 = "test_manifest_1.json" manifest_1_dict = { "resources": [], @@ -1108,20 +1110,20 @@ async def test_bulk_specification_dts_success(): json.dump(manifest_1_dict, f) with open(base / manifest_2, "w", encoding="utf-8") as f: json.dump(manifest_2_dict, f) - resp = await cli.get(f"bulk_specification/?files={manifest_1} , {manifest_2}&dts") + resp = await cli.get(f"bulk_specification/?files={sub_dir}/{manifest_1} , {sub_dir}/{manifest_2}&dts") jsn = await resp.json() # fails for now. will update when schema/parser is properly finished. assert jsn == { "errors": [ { "type": "cannot_parse_file", - "file": f"testuser/{manifest_1}", + "file": f"{dts_dir}/{manifest_1}", "message": "No import specification data in file", "tab": None, }, { "type": "cannot_parse_file", - "file": f"testuser/{manifest_2}", + "file": f"{dts_dir}/{manifest_2}", "message": "No import specification data in file", "tab": None, }, @@ -1150,8 +1152,10 @@ async def test_bulk_specification_dts_success(): async def test_bulk_specification_dts_fail_json_without_dts(): async with AppClient(config) as cli: with FileUtil() as fu: - fu.make_dir("testuser/dts_folder") # testuser is hardcoded in the auth mock - base = Path(fu.base_dir) / "testuser" + sub_dir = "dts_folder" + dts_dir = f"testuser/{sub_dir}" + fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock + base = Path(fu.base_dir) / "testuser" / sub_dir manifest_1 = "test_manifest_1.json" manifest_1_dict = { "resources": [], @@ -1171,13 +1175,13 @@ async def test_bulk_specification_dts_fail_json_without_dts(): } with open(base / manifest_1, "w", encoding="utf-8") as f: json.dump(manifest_1_dict, f) - resp = await cli.get(f"bulk_specification/?files={manifest_1}") + resp = await cli.get(f"bulk_specification/?files={sub_dir}/{manifest_1}") jsn = await resp.json() assert jsn == { "errors": [ { "type": "cannot_parse_file", - "file": f"testuser/{manifest_1}", + "file": f"{dts_dir}/{manifest_1}", "message": "json is not a supported file type for import specifications", "tab": None, } @@ -1189,19 +1193,21 @@ async def test_bulk_specification_dts_fail_json_without_dts(): async def test_bulk_specification_dts_fail_wrong_format(): async with AppClient(config) as cli: with FileUtil() as fu: - fu.make_dir("testuser/dts_folder") # testuser is hardcoded in the auth mock - base = Path(fu.base_dir) / "testuser" + sub_dir = "dts_folder" + dts_dir = f"testuser/{sub_dir}" + fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock + base = Path(fu.base_dir) / "testuser" / sub_dir manifest = "test_manifest.json" manifest_data = ["wrong", "format"] with open(base / manifest, "w", encoding="utf-8") as f: json.dump(manifest_data, f) - resp = await cli.get(f"bulk_specification/?files={manifest}&dts") + resp = await cli.get(f"bulk_specification/?files={sub_dir}/{manifest}&dts") jsn = await resp.json() assert jsn == { "errors": [ { "type": "cannot_parse_file", - "file": f"testuser/{manifest}", + "file": f"{dts_dir}/{manifest}", "message": "Manifest is not a dictionary", "tab": None, } From a4e3949d9b133039a88f7824d7f9f3a06c46cf6f Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Mon, 9 Dec 2024 15:13:38 -0500 Subject: [PATCH 10/14] enforce JSON file extensions only for dts import specs --- staging_service/app.py | 9 ++++++--- tests/test_app.py | 26 ++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/staging_service/app.py b/staging_service/app.py index 6c091850..c4b2f8b2 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -106,10 +106,13 @@ def _file_type_resolver(path: PathPy) -> FileTypeResolution: def _make_dts_file_resolver() -> Callable[[Path], FileTypeResolution]: """Makes a DTS file resolver.""" - - def dts_file_resolver(_: PathPy): + def dts_file_resolver(path: PathPy): + # must be a ".json" file + file_parts = str(path).split(".") + ext = file_parts[-1] + if len(file_parts) < 2 or ext.lower() != "json": + return FileTypeResolution(unsupported_type=ext) return FileTypeResolution(parser=parse_dts_manifest) - return dts_file_resolver diff --git a/tests/test_app.py b/tests/test_app.py index 6a150bc1..53a38392 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1216,6 +1216,32 @@ async def test_bulk_specification_dts_fail_wrong_format(): assert resp.status == 400 +async def test_bulk_specification_dts_fail_wrong_extension(): + async with AppClient(config) as cli: + with FileUtil() as fu: + sub_dir = "dts_folder" + dts_dir = f"testuser/{sub_dir}" + fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock + base = Path(fu.base_dir) / "testuser" / sub_dir + manifest = "test_manifest.foo" + manifest_data = {"resources": [], "instructions": {}} + with open(base / manifest, "w", encoding="utf-8") as f: + json.dump(manifest_data, f) + resp = await cli.get(f"bulk_specification/?files={sub_dir}/{manifest}&dts") + jsn = await resp.json() + assert jsn == { + "errors": [ + { + "type": "cannot_parse_file", + "file": f"{dts_dir}/{manifest}", + "message": "foo is not a supported file type for import specifications", + "tab": None, + } + ] + } + assert resp.status == 400 + + async def test_bulk_specification_fail_no_files(): async with AppClient(config) as cli: for f in ["", "?files=", "?files= , ,, , "]: From b3613ea6cc3cb043f265364fb36d07876d946995 Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Mon, 9 Dec 2024 15:14:04 -0500 Subject: [PATCH 11/14] ruff formatting --- staging_service/app.py | 2 ++ tests/test_app.py | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/staging_service/app.py b/staging_service/app.py index c4b2f8b2..494ce46d 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -106,6 +106,7 @@ def _file_type_resolver(path: PathPy) -> FileTypeResolution: def _make_dts_file_resolver() -> Callable[[Path], FileTypeResolution]: """Makes a DTS file resolver.""" + def dts_file_resolver(path: PathPy): # must be a ".json" file file_parts = str(path).split(".") @@ -113,6 +114,7 @@ def dts_file_resolver(path: PathPy): if len(file_parts) < 2 or ext.lower() != "json": return FileTypeResolution(unsupported_type=ext) return FileTypeResolution(parser=parse_dts_manifest) + return dts_file_resolver diff --git a/tests/test_app.py b/tests/test_app.py index 53a38392..6e7e450f 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1110,7 +1110,9 @@ async def test_bulk_specification_dts_success(): json.dump(manifest_1_dict, f) with open(base / manifest_2, "w", encoding="utf-8") as f: json.dump(manifest_2_dict, f) - resp = await cli.get(f"bulk_specification/?files={sub_dir}/{manifest_1} , {sub_dir}/{manifest_2}&dts") + resp = await cli.get( + f"bulk_specification/?files={sub_dir}/{manifest_1} , {sub_dir}/{manifest_2}&dts" + ) jsn = await resp.json() # fails for now. will update when schema/parser is properly finished. assert jsn == { From bb76d499ce95540d233fb8260175a32e01f68335 Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Mon, 9 Dec 2024 17:04:37 -0500 Subject: [PATCH 12/14] update json checking, add tests --- staging_service/app.py | 10 +++++----- tests/test_app.py | 15 ++++++++++++--- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/staging_service/app.py b/staging_service/app.py index 494ce46d..cd65d1f1 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -13,7 +13,7 @@ from .app_error_formatter import format_import_spec_errors from .auth2Client import KBaseAuth2 -from .autodetect.Mappings import CSV, EXCEL, TSV +from .autodetect.Mappings import CSV, EXCEL, JSON, TSV from .AutoDetectUtils import AutoDetectUtils from .globus import assert_globusid_exists, is_globusid from .import_specifications.file_parser import ( @@ -53,6 +53,7 @@ EXCEL: write_excel, } +NO_EXTENSION = "missing extension" @routes.get("/importer_filetypes/") async def importer_filetypes(_: web.Request) -> web.json_response: @@ -109,10 +110,9 @@ def _make_dts_file_resolver() -> Callable[[Path], FileTypeResolution]: def dts_file_resolver(path: PathPy): # must be a ".json" file - file_parts = str(path).split(".") - ext = file_parts[-1] - if len(file_parts) < 2 or ext.lower() != "json": - return FileTypeResolution(unsupported_type=ext) + suffix = path.suffix[1:] if path.suffix else NO_EXTENSION + if suffix.lower() != JSON.lower(): + return FileTypeResolution(unsupported_type=suffix) return FileTypeResolution(parser=parse_dts_manifest) return dts_file_resolver diff --git a/tests/test_app.py b/tests/test_app.py index 6e7e450f..f8147154 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -4,6 +4,7 @@ import json import os import platform +import pytest import shutil import string import time @@ -1218,14 +1219,22 @@ async def test_bulk_specification_dts_fail_wrong_format(): assert resp.status == 400 -async def test_bulk_specification_dts_fail_wrong_extension(): +@pytest.mark.parametrize( + "manifest,expected", + [ + ("test_manifest.foo", "foo"), + ("json", app.NO_EXTENSION), + (".json", app.NO_EXTENSION), + ("some_manifest", app.NO_EXTENSION), + ], +) +async def test_bulk_specification_dts_fail_wrong_extension(manifest: str, expected: str): async with AppClient(config) as cli: with FileUtil() as fu: sub_dir = "dts_folder" dts_dir = f"testuser/{sub_dir}" fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock base = Path(fu.base_dir) / "testuser" / sub_dir - manifest = "test_manifest.foo" manifest_data = {"resources": [], "instructions": {}} with open(base / manifest, "w", encoding="utf-8") as f: json.dump(manifest_data, f) @@ -1236,7 +1245,7 @@ async def test_bulk_specification_dts_fail_wrong_extension(): { "type": "cannot_parse_file", "file": f"{dts_dir}/{manifest}", - "message": "foo is not a supported file type for import specifications", + "message": f"{expected} is not a supported file type for import specifications", "tab": None, } ] From a1d2b02c5441b89ce95aabd6abe43ba0d853dfd4 Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Mon, 9 Dec 2024 17:07:46 -0500 Subject: [PATCH 13/14] make constants more clear --- staging_service/app.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/staging_service/app.py b/staging_service/app.py index cd65d1f1..598179e8 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -13,7 +13,7 @@ from .app_error_formatter import format_import_spec_errors from .auth2Client import KBaseAuth2 -from .autodetect.Mappings import CSV, EXCEL, JSON, TSV +from .autodetect.Mappings import CSV, EXCEL, TSV from .AutoDetectUtils import AutoDetectUtils from .globus import assert_globusid_exists, is_globusid from .import_specifications.file_parser import ( @@ -53,8 +53,10 @@ EXCEL: write_excel, } +JSON_EXTENSION = "json" NO_EXTENSION = "missing extension" + @routes.get("/importer_filetypes/") async def importer_filetypes(_: web.Request) -> web.json_response: """ @@ -108,10 +110,10 @@ def _file_type_resolver(path: PathPy) -> FileTypeResolution: def _make_dts_file_resolver() -> Callable[[Path], FileTypeResolution]: """Makes a DTS file resolver.""" - def dts_file_resolver(path: PathPy): + def dts_file_resolver(path: PathPy) -> FileTypeResolution: # must be a ".json" file suffix = path.suffix[1:] if path.suffix else NO_EXTENSION - if suffix.lower() != JSON.lower(): + if suffix.lower() != JSON_EXTENSION: return FileTypeResolution(unsupported_type=suffix) return FileTypeResolution(parser=parse_dts_manifest) From 15be5832ec6864298eec34ae53762da3cbfa03dc Mon Sep 17 00:00:00 2001 From: Bill Riehl Date: Mon, 9 Dec 2024 17:09:17 -0500 Subject: [PATCH 14/14] add comment --- staging_service/app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/staging_service/app.py b/staging_service/app.py index 598179e8..167e83cd 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -53,6 +53,7 @@ EXCEL: write_excel, } +# The constant in autodetect.Mappings isn't guaranteed to be the string we want. JSON_EXTENSION = "json" NO_EXTENSION = "missing extension"