Skip to content

Commit

Permalink
updates to config, validator, etc.
Browse files Browse the repository at this point in the history
  • Loading branch information
briehl committed Dec 12, 2024
1 parent af30bf2 commit ebf25e7
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 44 deletions.
19 changes: 9 additions & 10 deletions staging_service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import sys
from collections import defaultdict
from pathlib import Path as PathPy
from typing import Any
from urllib.parse import parse_qs, unquote

import aiohttp_cors
Expand Down Expand Up @@ -44,7 +43,7 @@
VERSION = "1.3.6"

_DATATYPE_MAPPINGS = None
_DTS_MANIFEST_SCHEMA = None
_DTS_MANIFEST_VALIDATOR: jsonschema.Draft202012Validator | None = None

_APP_JSON = "application/json"

Expand Down Expand Up @@ -122,7 +121,7 @@ def dts_file_resolver(path: PathPy) -> FileTypeResolution:
suffix = path.suffix[1:] if path.suffix else NO_EXTENSION
if suffix.lower() != JSON_EXTENSION:
return FileTypeResolution(unsupported_type=suffix)
return FileTypeResolution(parser=lambda p: parse_dts_manifest(p, _DTS_MANIFEST_SCHEMA))
return FileTypeResolution(parser=lambda p: parse_dts_manifest(p, _DTS_MANIFEST_VALIDATOR))

return dts_file_resolver

Expand Down Expand Up @@ -609,14 +608,14 @@ async def authorize_request(request):
return username


def load_and_validate_schema(schema_path: PathPy) -> dict[str, Any]:
def load_and_validate_schema(schema_path: PathPy) -> jsonschema.Draft202012Validator:
with open(schema_path) as schema_file:
dts_schema = json.load(schema_file)
try:
jsonschema.Draft202012Validator.check_schema(dts_schema)
except jsonschema.exceptions.SchemaError as err:
raise Exception(f"Schema file {schema_path} is not a valid JSON schema: {err.message}")
return dts_schema
raise Exception(f"Schema file {schema_path} is not a valid JSON schema: {err.message}") from err
return jsonschema.Draft202012Validator(dts_schema)


def inject_config_dependencies(config):
Expand Down Expand Up @@ -651,7 +650,7 @@ def inject_config_dependencies(config):
Path._DATA_DIR = DATA_DIR
Path._META_DIR = META_DIR
Path._CONCIERGE_PATH = CONCIERGE_PATH
Path._DTS_MANIFEST_SCHEMA_PATH = DTS_MANIFEST_SCHEMA_PATH
_DTS_MANIFEST_SCHEMA_PATH = DTS_MANIFEST_SCHEMA_PATH

if Path._DATA_DIR is None:
raise Exception("Please provide DATA_DIR in the config file ")
Expand All @@ -662,12 +661,12 @@ def inject_config_dependencies(config):
if Path._CONCIERGE_PATH is None:
raise Exception("Please provide CONCIERGE_PATH in the config file ")

if Path._DTS_MANIFEST_SCHEMA_PATH is None:
if _DTS_MANIFEST_SCHEMA_PATH is None:
raise Exception("Please provide DTS_MANIFEST_SCHEMA in the config file")

global _DTS_MANIFEST_SCHEMA
global _DTS_MANIFEST_VALIDATOR
# will raise an Exception if the schema is invalid
_DTS_MANIFEST_SCHEMA = load_and_validate_schema(DTS_MANIFEST_SCHEMA_PATH)
_DTS_MANIFEST_VALIDATOR = load_and_validate_schema(DTS_MANIFEST_SCHEMA_PATH)

if FILE_EXTENSION_MAPPINGS is None:
raise Exception("Please provide FILE_EXTENSION_MAPPINGS in the config file ")
Expand Down
11 changes: 3 additions & 8 deletions staging_service/import_specifications/individual_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@

import csv
import json
import jsonschema
import math
import re
from pathlib import Path
from typing import Any, Optional, Tuple, Union

import jsonschema.exceptions
from jsonschema import Draft202012Validator
import magic
import pandas
from frozendict import frozendict
Expand Down Expand Up @@ -321,7 +320,7 @@ def parse_excel(path: Path) -> ParseResults:
return _error(Error(ErrorType.PARSE_FAIL, "No non-header data in file", spcsrc))


def parse_dts_manifest(path: Path, dts_manifest_schema: dict) -> ParseResults:
def parse_dts_manifest(path: Path, validator: Draft202012Validator) -> ParseResults:
"""
Parse the provided DTS manifest file. Expected to be JSON, and will fail otherwise.
The manifest should have this format, with expected keys included:
Expand All @@ -344,10 +343,7 @@ def parse_dts_manifest(path: Path, dts_manifest_schema: dict) -> ParseResults:
and its value will be a Tuple of frozendicts of the parameters. Also, in keeping
with the xsv parsers, each parameter value is expected to be a PRIMITIVE_TYPE.
Note that the dts_manifest_schema is expected to be valid, and may throw an
unexpected exception otherwise.
TODO: include further details here, and in separate documentation - ADR?
TODO: include further details in separate documentation
"""
spcsrc = SpecificationSource(path)
errors = []
Expand All @@ -358,7 +354,6 @@ def parse_dts_manifest(path: Path, dts_manifest_schema: dict) -> ParseResults:
manifest_json = json.load(manifest)
if not isinstance(manifest_json, dict):
return _error(Error(ErrorType.PARSE_FAIL, "Manifest is not a dictionary", spcsrc))
validator = jsonschema.Draft202012Validator(dts_manifest_schema)
for err in validator.iter_errors(manifest_json):
err_str = err.message
err_path = list(err.absolute_path)
Expand Down
46 changes: 20 additions & 26 deletions tests/import_specifications/test_individual_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import uuid
from collections.abc import Callable, Generator
from pathlib import Path
from typing import Any

# TODO update to C impl when fixed: https://github.com/Marco-Sulla/python-frozendict/issues/26
from frozendict import frozendict
Expand All @@ -21,6 +20,8 @@
)
from tests.test_app import FileUtil
from tests.test_utils import bootstrap_config
from jsonschema import Draft202012Validator


_TEST_DATA_DIR = (Path(__file__).parent / "test_data").resolve()

Expand All @@ -36,11 +37,11 @@ def temp_dir_fixture() -> Generator[Path, None, None]:


@pytest.fixture(scope="module")
def dts_schema() -> Generator[dict[str, Any], None, None]:
def dts_validator() -> Generator[Draft202012Validator, None, None]:
config = bootstrap_config()
with open(config["staging_service"]["DTS_MANIFEST_SCHEMA"]) as dts_schema_file:
schema = json.load(dts_schema_file)
yield schema
yield Draft202012Validator(schema)


##########################################
Expand Down Expand Up @@ -778,9 +779,9 @@ def test_excel_parse_fail_unequal_rows():
)


def test_dts_manifest_parse_success(dts_schema: dict[str, Any]):
def test_dts_manifest_parse_success(dts_validator: Draft202012Validator):
f = _get_test_file("manifest_small.json")
res = parse_dts_manifest(f, dts_schema)
res = parse_dts_manifest(f, dts_validator)
# fails for now
assert res.results is None
assert res.errors == tuple(
Expand All @@ -805,25 +806,25 @@ def manifest_writer(input_json: dict | list) -> Path:
return manifest_writer


def _dts_manifest_parse_fail(input_file: Path, schema: dict, errors: list[Error]):
def _dts_manifest_parse_fail(input_file: Path, validator: Draft202012Validator, errors: list[Error]):
"""
Tests a failing DTS manifest parse.
input_file - the path to the input file. Might be a directory or not exist.
errors - a list of Error objects expected to be in the order generated by
the call to parse_dts_manifest.
"""
res = parse_dts_manifest(input_file, schema)
res = parse_dts_manifest(input_file, validator)
assert res.results is None
assert res.errors == tuple(errors)


def test_dts_manifest_non_json(temp_dir: Generator[Path, None, None], dts_schema: dict[str, Any]):
def test_dts_manifest_non_json(temp_dir: Generator[Path, None, None], dts_validator: Draft202012Validator):
test_file = temp_dir / str(uuid.uuid4())
with open(test_file, "w", encoding="utf-8") as outfile:
outfile.write("totally not json")
_dts_manifest_parse_fail(
test_file,
dts_schema,
dts_validator,
[
Error(
ErrorType.PARSE_FAIL, "File must be in JSON format", SpecificationSource(test_file)
Expand All @@ -833,12 +834,12 @@ def test_dts_manifest_non_json(temp_dir: Generator[Path, None, None], dts_schema


def test_dts_manifest_non_dict(
write_dts_manifest: Callable[[dict | list], Path], dts_schema: dict[str, Any]
write_dts_manifest: Callable[[dict | list], Path], dts_validator: Draft202012Validator
):
manifest_path = write_dts_manifest(["wrong_format"])
_dts_manifest_parse_fail(
manifest_path,
dts_schema,
dts_validator,
[
Error(
ErrorType.PARSE_FAIL,
Expand All @@ -849,23 +850,23 @@ def test_dts_manifest_non_dict(
)


def test_dts_manifest_not_found(temp_dir: Generator[Path, None, None], dts_schema: dict[str, Any]):
def test_dts_manifest_not_found(temp_dir: Generator[Path, None, None], dts_validator: Draft202012Validator):
manifest_path = temp_dir / "not_a_file"
_dts_manifest_parse_fail(
manifest_path,
dts_schema,
dts_validator,
[Error(ErrorType.FILE_NOT_FOUND, source_1=SpecificationSource(manifest_path))],
)


def test_dts_manifest_file_is_directory(
temp_dir: Generator[Path, None, None], dts_schema: dict[str, Any]
temp_dir: Generator[Path, None, None], dts_validator: Draft202012Validator
):
test_file = temp_dir / "testdir.json"
os.makedirs(test_file, exist_ok=True)
_dts_manifest_parse_fail(
test_file,
dts_schema,
dts_validator,
[
Error(
ErrorType.PARSE_FAIL,
Expand All @@ -876,20 +877,13 @@ def test_dts_manifest_file_is_directory(
)


@pytest.mark.parametrize("bad_schema", [None, 1, [], {"foo"}])
def test_dts_manifest_bad_schema(bad_schema):
f = _get_test_file("manifest_small.json")
with pytest.raises(Exception):
parse_dts_manifest(f, bad_schema)


def test_dts_manifest_no_top_level_keys(
write_dts_manifest: Callable[[dict | list], Path], dts_schema: dict[str, Any]
write_dts_manifest: Callable[[dict | list], Path], dts_validator: Draft202012Validator
):
manifest_path = write_dts_manifest({"missing": "stuff"})
_dts_manifest_parse_fail(
manifest_path,
dts_schema,
dts_validator,
[
Error(
ErrorType.PARSE_FAIL,
Expand All @@ -906,7 +900,7 @@ def test_dts_manifest_no_top_level_keys(


def test_dts_manifest_fail_with_path(
write_dts_manifest: Callable[[dict | list], Path], dts_schema: dict[str, Any]
write_dts_manifest: Callable[[dict | list], Path], dts_validator: Draft202012Validator
):
manifest_path = write_dts_manifest(
{
Expand All @@ -919,7 +913,7 @@ def test_dts_manifest_fail_with_path(
)
_dts_manifest_parse_fail(
manifest_path,
dts_schema,
dts_validator,
[
Error(
ErrorType.PARSE_FAIL,
Expand Down
41 changes: 41 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import json
import os
import platform
import uuid
import jsonschema
import pytest
import shutil
import string
Expand Down Expand Up @@ -1264,6 +1266,45 @@ def test_bulk_specification_dts_fail_bad_schema():
pass


def test_load_and_validate_schema_good():
# TODO: update this after updating how config is handled
schema_file = config["staging_service"]["DTS_MANIFEST_SCHEMA"]
validator = app.load_and_validate_schema(schema_file)
assert isinstance(validator, jsonschema.Draft202012Validator)


def test_load_and_validate_schema_missing_file():
not_real_file = Path("not_real")
while not_real_file.exists():
not_real_file = Path(str(uuid.uuid4()))
with pytest.raises(FileNotFoundError, match="No such file or directory"):
app.load_and_validate_schema(not_real_file)


def test_load_and_validate_schema_malformed_file(tmp_path: Path):
# TODO: migrate FileUtil and import_specifications.test_individual_parsers.temp_path_fixture
# into conftest.py, and resolve everywhere else that FileUtil gets used.
# Until then, the built-in tmp_path is appropriate for these tests
wrong_schema = "not valid json"
schema_file = tmp_path / f"{uuid.uuid4()}.json"
schema_file.write_text(wrong_schema, encoding="utf-8")
with pytest.raises(json.JSONDecodeError, match="Expecting value: line 1 column 1"):
app.load_and_validate_schema(schema_file)


def test_load_and_validate_schema_bad(tmp_path: Path):
invalid = {
"properties": {
"some_prop": { "type": "not_real"}
}
}
schema_file = tmp_path / f"{uuid.uuid4()}.json"
schema_file.write_text(json.dumps(invalid), encoding="utf-8")
exp_err = f"Schema file {schema_file} is not a valid JSON schema: 'not_real' is not valid"
with pytest.raises(Exception, match=exp_err):
app.load_and_validate_schema(schema_file)


async def test_bulk_specification_fail_no_files():
async with AppClient(config) as cli:
for f in ["", "?files=", "?files= , ,, , "]:
Expand Down

0 comments on commit ebf25e7

Please sign in to comment.