Skip to content

Commit

Permalink
Merge pull request #208 from kbase/dts-manifest-endpoint
Browse files Browse the repository at this point in the history
Add dts flag to bulk_specification endpoint
  • Loading branch information
briehl authored Dec 10, 2024
2 parents a028bda + 15be583 commit afee534
Show file tree
Hide file tree
Showing 2 changed files with 235 additions and 15 deletions.
42 changes: 29 additions & 13 deletions staging_service/app.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections.abc import Callable
import json
import logging
import os
Expand All @@ -12,7 +13,7 @@

from .app_error_formatter import format_import_spec_errors
from .auth2Client import KBaseAuth2
from .autodetect.Mappings import CSV, EXCEL, TSV, JSON
from .autodetect.Mappings import CSV, EXCEL, TSV
from .AutoDetectUtils import AutoDetectUtils
from .globus import assert_globusid_exists, is_globusid
from .import_specifications.file_parser import (
Expand Down Expand Up @@ -44,19 +45,18 @@

_APP_JSON = "application/json"

_IMPSPEC_FILE_TO_PARSER = {
CSV: parse_csv,
TSV: parse_tsv,
EXCEL: parse_excel,
JSON: parse_dts_manifest,
}
_IMPSPEC_FILE_TO_PARSER = {CSV: parse_csv, TSV: parse_tsv, EXCEL: parse_excel}

_IMPSPEC_FILE_TO_WRITER = {
CSV: write_csv,
TSV: write_tsv,
EXCEL: write_excel,
}

# The constant in autodetect.Mappings isn't guaranteed to be the string we want.
JSON_EXTENSION = "json"
NO_EXTENSION = "missing extension"


@routes.get("/importer_filetypes/")
async def importer_filetypes(_: web.Request) -> web.json_response:
Expand Down Expand Up @@ -108,18 +108,30 @@ def _file_type_resolver(path: PathPy) -> FileTypeResolution:
return FileTypeResolution(unsupported_type=ext)


def _make_dts_file_resolver() -> Callable[[Path], FileTypeResolution]:
"""Makes a DTS file resolver."""

def dts_file_resolver(path: PathPy) -> FileTypeResolution:
# must be a ".json" file
suffix = path.suffix[1:] if path.suffix else NO_EXTENSION
if suffix.lower() != JSON_EXTENSION:
return FileTypeResolution(unsupported_type=suffix)
return FileTypeResolution(parser=parse_dts_manifest)

return dts_file_resolver


@routes.get("/bulk_specification/{query:.*}")
async def bulk_specification(request: web.Request) -> web.json_response:
"""
Takes a `files` query parameter with a list of comma separated import specification file paths.
Returns the contents of those files parsed into a list of dictionaries, mapped from the data
type, in the `types` key.
:param request: contains a comma separated list of files, e.g. folder1/file1.txt,file2.txt
TODO: since JSON files are rather generic and we might want to use a different JSON bulk-spec
format later, add a separate query parameter to request that the selected file is treated as a
Data Transfer Service manifest.
:param request: contains the URL parameters for the request. Expected to have the following:
* files (required) - a comma separated list of files, e.g. folder1/file1.txt,file2.txt
* dts (optional) - if present this will treat all of the given files as DTS manifest files,
and attempt to parse them accordingly.
"""
username = await authorize_request(request)
files = parse_qs(request.query_string).get("files", [])
Expand All @@ -129,10 +141,14 @@ async def bulk_specification(request: web.Request) -> web.json_response:
for f in files:
p = Path.validate_path(username, f)
paths[PathPy(p.full_path)] = PathPy(p.user_path)

# list(dict) returns a list of the dict keys in insertion order (py3.7+)
file_type_resolver = _file_type_resolver
if "dts" in request.query:
file_type_resolver = _make_dts_file_resolver()
res = parse_import_specifications(
tuple(list(paths)),
_file_type_resolver,
file_type_resolver,
lambda e: logging.error("Unexpected error while parsing import specs", exc_info=e),
)
if res.results:
Expand Down
208 changes: 206 additions & 2 deletions tests/test_app.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import asyncio
import configparser
import hashlib
import json
import os
import platform
import pytest
import shutil
import string
import time
from io import BytesIO
from json import JSONDecoder
from pathlib import Path
from typing import Any
from urllib.parse import unquote, urlencode
Expand All @@ -29,7 +30,7 @@

bootstrap()

decoder = JSONDecoder()
decoder = json.JSONDecoder()

config = configparser.ConfigParser()
config.read(os.environ["KB_DEPLOYMENT_CONFIG"])
Expand Down Expand Up @@ -1049,6 +1050,209 @@ async def test_bulk_specification_success():
assert resp.status == 200


async def test_bulk_specification_dts_success():
async with AppClient(config) as cli:
with FileUtil() as fu:
sub_dir = "dts_folder"
dts_dir = f"testuser/{sub_dir}"
fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock
base = Path(fu.base_dir) / "testuser" / sub_dir
manifest_1 = "test_manifest_1.json"
manifest_1_dict = {
"resources": [],
"instructions": {
"protocol": "KBase narrative import",
"objects": [
{
"data_type": "gff_metagenome",
"parameters": {
"fasta_file": "fasta_1",
"gff_file": "gff_1",
"genome_name": "mg_1",
},
},
{
"data_type": "gff_metagenome",
"parameters": {
"fasta_file": "fasta_2",
"gff_file": "gff_2",
"genome_name": "mg_2",
},
},
],
},
}
manifest_2 = "test_manifest_2.json"
manifest_2_dict = {
"resources": [],
"instructions": {
"protocol": "KBase narrative import",
"objects": [
{
"data_type": "gff_genome",
"parameters": {
"fasta_file": "g_fasta_1",
"gff_file": "g_gff_1",
"genome_name": "genome_1",
},
},
{
"data_type": "gff_genome",
"parameters": {
"fasta_file": "g_fasta_2",
"gff_file": "g_gff_2",
"genome_name": "genome_2",
},
},
],
},
}
with open(base / manifest_1, "w", encoding="utf-8") as f:
json.dump(manifest_1_dict, f)
with open(base / manifest_2, "w", encoding="utf-8") as f:
json.dump(manifest_2_dict, f)
resp = await cli.get(
f"bulk_specification/?files={sub_dir}/{manifest_1} , {sub_dir}/{manifest_2}&dts"
)
jsn = await resp.json()
# fails for now. will update when schema/parser is properly finished.
assert jsn == {
"errors": [
{
"type": "cannot_parse_file",
"file": f"{dts_dir}/{manifest_1}",
"message": "No import specification data in file",
"tab": None,
},
{
"type": "cannot_parse_file",
"file": f"{dts_dir}/{manifest_2}",
"message": "No import specification data in file",
"tab": None,
},
]
}
# soon will be this:
# assert json == {
# "types": {
# "gff_genome": [
# {"fasta_file": "g_fasta_1", "gff_file": "g_gff_1", "genome_name": "genome_1"},
# {"fasta_file": "g_fasta_2", "gff_file": "g_gff_2", "genome_name": "genome_2"}
# ],
# "gff_metagenome": [
# {"fasta_file": "fasta_1", "gff_file": "gff_1", "genome_name": "mg_1"},
# {"fasta_file": "fasta_2", "gff_file": "gff_2", "genome_name": "mg_2"}
# ]
# },
# "files": {
# "gff_metagenome": {"file": f"testuser/{manifest_1}", "tab": None},
# "gff_genome": {"file": f"testuser/{manifest_2}", "tab": None}
# }
# }
assert resp.status == 400


async def test_bulk_specification_dts_fail_json_without_dts():
async with AppClient(config) as cli:
with FileUtil() as fu:
sub_dir = "dts_folder"
dts_dir = f"testuser/{sub_dir}"
fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock
base = Path(fu.base_dir) / "testuser" / sub_dir
manifest_1 = "test_manifest_1.json"
manifest_1_dict = {
"resources": [],
"instructions": {
"protocol": "KBase narrative import",
"objects": [
{
"data_type": "gff_metagenome",
"parameters": {
"fasta_file": "fasta_1",
"gff_file": "gff_1",
"genome_name": "mg_1",
},
}
],
},
}
with open(base / manifest_1, "w", encoding="utf-8") as f:
json.dump(manifest_1_dict, f)
resp = await cli.get(f"bulk_specification/?files={sub_dir}/{manifest_1}")
jsn = await resp.json()
assert jsn == {
"errors": [
{
"type": "cannot_parse_file",
"file": f"{dts_dir}/{manifest_1}",
"message": "json is not a supported file type for import specifications",
"tab": None,
}
]
}
assert resp.status == 400


async def test_bulk_specification_dts_fail_wrong_format():
async with AppClient(config) as cli:
with FileUtil() as fu:
sub_dir = "dts_folder"
dts_dir = f"testuser/{sub_dir}"
fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock
base = Path(fu.base_dir) / "testuser" / sub_dir
manifest = "test_manifest.json"
manifest_data = ["wrong", "format"]
with open(base / manifest, "w", encoding="utf-8") as f:
json.dump(manifest_data, f)
resp = await cli.get(f"bulk_specification/?files={sub_dir}/{manifest}&dts")
jsn = await resp.json()
assert jsn == {
"errors": [
{
"type": "cannot_parse_file",
"file": f"{dts_dir}/{manifest}",
"message": "Manifest is not a dictionary",
"tab": None,
}
]
}
assert resp.status == 400


@pytest.mark.parametrize(
"manifest,expected",
[
("test_manifest.foo", "foo"),
("json", app.NO_EXTENSION),
(".json", app.NO_EXTENSION),
("some_manifest", app.NO_EXTENSION),
],
)
async def test_bulk_specification_dts_fail_wrong_extension(manifest: str, expected: str):
async with AppClient(config) as cli:
with FileUtil() as fu:
sub_dir = "dts_folder"
dts_dir = f"testuser/{sub_dir}"
fu.make_dir(dts_dir) # testuser is hardcoded in the auth mock
base = Path(fu.base_dir) / "testuser" / sub_dir
manifest_data = {"resources": [], "instructions": {}}
with open(base / manifest, "w", encoding="utf-8") as f:
json.dump(manifest_data, f)
resp = await cli.get(f"bulk_specification/?files={sub_dir}/{manifest}&dts")
jsn = await resp.json()
assert jsn == {
"errors": [
{
"type": "cannot_parse_file",
"file": f"{dts_dir}/{manifest}",
"message": f"{expected} is not a supported file type for import specifications",
"tab": None,
}
]
}
assert resp.status == 400


async def test_bulk_specification_fail_no_files():
async with AppClient(config) as cli:
for f in ["", "?files=", "?files= , ,, , "]:
Expand Down

0 comments on commit afee534

Please sign in to comment.