From 2789d547e65eb34562eb0e0cca278e7474710b8a Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Wed, 23 Mar 2022 10:36:44 -0400 Subject: [PATCH] feat: Alternative Schema Locations (#1753) - Refactors * Migrate from `pkg_resources` to `importlib`. See importlib's migration guide. https://importlib-resources.readthedocs.io/en/latest/migration.html * Add importlib_resources as core dependency for Pythons older than 3.9. * `load_schema` will handle functionality for finding the schema locations locally in multiple places. * `load_schema` will not handle `version` identification (that is done through `validate` only). - Features * Support overriding the paths for finding schemas, using the `pyhf` installed location as a base via `pyhf.schema.variables.path`. * Add support for offline access to currently supported version of schemas (via extra `load_schema` commands). * Add `SchemaNotFound` exception for when a schema cannot be found locally (outside of `jsonschema.RefResolver` calls) - Python API * `pyhf.schema` introduced as a new API for all things schema-related. * `pyhf.schema.version`, `pyhf.schema.path`, `pyhf.schema.load_schema`, and `pyhf.schema.validate` are migrated over from `pyhf.utils`. --- docs/api.rst | 17 +- setup.cfg | 1 + src/pyhf/__init__.py | 2 + src/pyhf/exceptions/__init__.py | 6 + src/pyhf/patchset.py | 3 +- src/pyhf/pdf.py | 4 +- src/pyhf/readxml.py | 6 +- src/pyhf/schema/__init__.py | 68 ++++ src/pyhf/schema/loader.py | 41 +++ src/pyhf/schema/validator.py | 41 +++ src/pyhf/schema/variables.py | 13 + src/pyhf/utils.py | 65 +--- src/pyhf/workspace.py | 8 +- src/pyhf/writexml.py | 4 +- tests/conftest.py | 6 +- tests/constraints.txt | 1 + .../{test_utils.py => test_contrib_utils.py} | 0 tests/contrib/test_viz.py | 18 +- tests/test_export.py | 2 +- tests/test_modifiers.py | 4 +- tests/test_patchset.py | 18 +- tests/test_public_api_repr.py | 10 +- tests/test_schema.py | 48 ++- .../test_schema/customschema/1.1.0/defs.json | 315 ++++++++++++++++++ .../customschema/1.1.0/jsonpatch.json | 5 + .../customschema/1.1.0/measurement.json | 5 + .../test_schema/customschema/1.1.0/model.json | 5 + .../customschema/1.1.0/patchset.json | 5 + .../customschema/1.1.0/workspace.json | 5 + tests/test_schema/customschema/custom.json | 74 ++++ tests/test_scripts.py | 14 +- tests/test_utils.py | 12 - tests/test_workspace.py | 21 +- 33 files changed, 719 insertions(+), 128 deletions(-) create mode 100644 src/pyhf/schema/__init__.py create mode 100644 src/pyhf/schema/loader.py create mode 100644 src/pyhf/schema/validator.py create mode 100644 src/pyhf/schema/variables.py rename tests/contrib/{test_utils.py => test_contrib_utils.py} (100%) create mode 100644 tests/test_schema/customschema/1.1.0/defs.json create mode 100644 tests/test_schema/customschema/1.1.0/jsonpatch.json create mode 100644 tests/test_schema/customschema/1.1.0/measurement.json create mode 100644 tests/test_schema/customschema/1.1.0/model.json create mode 100644 tests/test_schema/customschema/1.1.0/patchset.json create mode 100644 tests/test_schema/customschema/1.1.0/workspace.json create mode 100644 tests/test_schema/customschema/custom.json diff --git a/docs/api.rst b/docs/api.rst index 73ba56f9fb..9a4fa772f3 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -17,6 +17,7 @@ Top-Level readxml writexml compat + schema Probability Distribution Functions (PDFs) ----------------------------------------- @@ -158,6 +159,20 @@ Fits and Tests intervals.upperlimit utils.all_pois_floating + +Schema +------ + +.. currentmodule:: pyhf.schema + +.. autosummary:: + :toctree: _generated/ + :nosignatures: + + Schema + load_schema + validate + Exceptions ---------- @@ -194,8 +209,6 @@ Utilities :toctree: _generated/ :nosignatures: - load_schema - validate options_from_eqdelimstring digest citation diff --git a/setup.cfg b/setup.cfg index 6fb6a89da8..300457c02b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,6 +41,7 @@ install_requires = jsonschema>=3.0.0 # for utils jsonpatch>=1.15 pyyaml>=5.1 # for parsing CLI equal-delimited options + importlib_resources>=1.3.0; python_version < "3.9" # for resources in schema [options.packages.find] where = src diff --git a/src/pyhf/__init__.py b/src/pyhf/__init__.py index 4ed8fce762..cb3c0405e4 100644 --- a/src/pyhf/__init__.py +++ b/src/pyhf/__init__.py @@ -6,6 +6,7 @@ from pyhf.pdf import Model from pyhf.workspace import Workspace +from pyhf import schema from pyhf import simplemodels from pyhf import infer from pyhf import compat @@ -28,6 +29,7 @@ "patchset", "pdf", "probability", + "schema", "set_backend", "simplemodels", "tensor", diff --git a/src/pyhf/exceptions/__init__.py b/src/pyhf/exceptions/__init__.py index 63959799c2..1cbbb4b83c 100644 --- a/src/pyhf/exceptions/__init__.py +++ b/src/pyhf/exceptions/__init__.py @@ -44,6 +44,12 @@ class InvalidNameReuse(Exception): pass +class SchemaNotFound(FileNotFoundError): + """ + SchemaNotFound is raised when a given schema does not exist in the local file system. + """ + + class InvalidSpecification(Exception): """ InvalidSpecification is raised when a specification does not validate against the given schema. diff --git a/src/pyhf/patchset.py b/src/pyhf/patchset.py index f1f6de4cd7..b72118835b 100644 --- a/src/pyhf/patchset.py +++ b/src/pyhf/patchset.py @@ -5,6 +5,7 @@ import jsonpatch from pyhf import exceptions from pyhf import utils +from pyhf import schema from pyhf.workspace import Workspace log = logging.getLogger(__name__) @@ -164,7 +165,7 @@ def __init__(self, spec, **config_kwargs): # run jsonschema validation of input specification against the (provided) schema log.info(f"Validating spec against schema: {self.schema}") - utils.validate(spec, self.schema, version=self._version) + schema.validate(spec, self.schema, version=self._version) # set properties based on metadata self._metadata = spec['metadata'] diff --git a/src/pyhf/pdf.py b/src/pyhf/pdf.py index 61a8a30ef9..8ca28427ac 100644 --- a/src/pyhf/pdf.py +++ b/src/pyhf/pdf.py @@ -8,7 +8,7 @@ import pyhf from pyhf.tensor.manager import get_backend from pyhf import exceptions -from pyhf import utils +from pyhf import schema from pyhf import events from pyhf import probability as prob from pyhf.constraints import gaussian_constraint_combined, poisson_constraint_combined @@ -666,7 +666,7 @@ def __init__( # run jsonschema validation of input specification against the (provided) schema if validate: log.info(f"Validating spec against schema: {self.schema:s}") - utils.validate(self.spec, self.schema, version=self.version) + schema.validate(self.spec, self.schema, version=self.version) # build up our representation of the specification poi_name = config_kwargs.pop('poi_name', 'mu') self.config = _ModelConfig(self.spec, **config_kwargs) diff --git a/src/pyhf/readxml.py b/src/pyhf/readxml.py index 2e6dec5170..eeff9f1cf9 100644 --- a/src/pyhf/readxml.py +++ b/src/pyhf/readxml.py @@ -1,4 +1,4 @@ -from pyhf import utils +from pyhf import schema from pyhf import compat import logging @@ -364,9 +364,9 @@ def parse(configfile, rootdir, track_progress=False): {'name': channel_name, 'data': channel_spec['data']} for channel_name, channel_spec in channels.items() ], - 'version': utils.SCHEMA_VERSION, + 'version': schema.version, } - utils.validate(result, 'workspace.json') + schema.validate(result, 'workspace.json') return result diff --git a/src/pyhf/schema/__init__.py b/src/pyhf/schema/__init__.py new file mode 100644 index 0000000000..36c5160f58 --- /dev/null +++ b/src/pyhf/schema/__init__.py @@ -0,0 +1,68 @@ +""" +See :class:`~pyhf.schema.Schema` for documentation. +""" +import pathlib +import sys +from pyhf.schema.loader import load_schema +from pyhf.schema.validator import validate +from pyhf.schema import variables + +__all__ = [ + "load_schema", + "validate", + "path", + "version", +] + + +def __dir__(): + return __all__ + + +class Schema(sys.modules[__name__].__class__): + """ + A module-level wrapper around :mod:`pyhf.schema` which will provide additional functionality for interacting with schemas. + + Example: + >>> import pyhf.schema + >>> import pathlib + >>> curr_path = pyhf.schema.path + >>> curr_path # doctest: +ELLIPSIS + PosixPath('.../pyhf/schemas') + >>> pyhf.schema(pathlib.Path('/home/root/my/new/path')) + >>> pyhf.schema.path + PosixPath('/home/root/my/new/path') + >>> pyhf.schema(curr_path) + >>> pyhf.schema.path # doctest: +ELLIPSIS + PosixPath('.../pyhf/schemas') + + """ + + def __call__(self, new_path: pathlib.Path): + """ + Change the local search path for finding schemas locally. + + Args: + new_path (pathlib.Path): Path to folder containing the schemas + + Returns: + None + """ + variables.schemas = new_path + + @property + def path(self): + """ + The local path for schemas. + """ + return variables.schemas + + @property + def version(self): + """ + The default version used for finding schemas. + """ + return variables.SCHEMA_VERSION + + +sys.modules[__name__].__class__ = Schema diff --git a/src/pyhf/schema/loader.py b/src/pyhf/schema/loader.py new file mode 100644 index 0000000000..1419496d91 --- /dev/null +++ b/src/pyhf/schema/loader.py @@ -0,0 +1,41 @@ +from pathlib import Path +import sys +import json +import pyhf.exceptions +from pyhf.schema import variables + +# importlib.resources.as_file wasn't added until Python 3.9 +# c.f. https://docs.python.org/3.9/library/importlib.html#importlib.resources.as_file +if sys.version_info >= (3, 9): + from importlib import resources +else: + import importlib_resources as resources + + +def load_schema(schema_id: str): + """ + Get a schema by relative path from cache, or load it into the cache and return. + + Args: + schema_id (str): Relative path to schema from :attr:`pyhf.schema.path` + + Returns: + schema (dict): The loaded schema. + """ + try: + return variables.SCHEMA_CACHE[ + f'{Path(variables.SCHEMA_BASE).joinpath(schema_id)}' + ] + except KeyError: + pass + + ref = variables.schemas.joinpath(schema_id) + with resources.as_file(ref) as path: + if not path.exists(): + raise pyhf.exceptions.SchemaNotFound( + f'The schema {schema_id} was not found. Do you have the right version or the right path? {path}' + ) + with path.open() as json_schema: + schema = json.load(json_schema) + variables.SCHEMA_CACHE[schema['$id']] = schema + return variables.SCHEMA_CACHE[schema['$id']] diff --git a/src/pyhf/schema/validator.py b/src/pyhf/schema/validator.py new file mode 100644 index 0000000000..6c0e210fad --- /dev/null +++ b/src/pyhf/schema/validator.py @@ -0,0 +1,41 @@ +import jsonschema +import pyhf.exceptions +from pyhf.schema.loader import load_schema +from pyhf.schema import variables +from typing import Union + + +def validate(spec: dict, schema_name: str, version: Union[str, None] = None): + """ + Validate a provided specification against a schema. + + Args: + spec (dict): The specification to validate. + schema_name (str): The name of the schema to use. + version (None or str): The version to use if not the default from :attr:`pyhf.schema.version`. + + Returns: + None: schema validated fine + + Raises: + pyhf.exceptions.InvalidSpecification: the specification is invalid + """ + + version = version or variables.SCHEMA_VERSION + + schema = load_schema(f'{version}/{schema_name}') + + # note: trailing slash needed for RefResolver to resolve correctly + resolver = jsonschema.RefResolver( + base_uri=f"file://{variables.schemas}/", + referrer=f"{version}/{schema_name}", + store=variables.SCHEMA_CACHE, + ) + validator = jsonschema.Draft6Validator( + schema, resolver=resolver, format_checker=None + ) + + try: + return validator.validate(spec) + except jsonschema.ValidationError as err: + raise pyhf.exceptions.InvalidSpecification(err, schema_name) diff --git a/src/pyhf/schema/variables.py b/src/pyhf/schema/variables.py new file mode 100644 index 0000000000..80c0a0dd06 --- /dev/null +++ b/src/pyhf/schema/variables.py @@ -0,0 +1,13 @@ +import sys + +# importlib.resources.as_file wasn't added until Python 3.9 +# c.f. https://docs.python.org/3.9/library/importlib.html#importlib.resources.as_file +if sys.version_info >= (3, 9): + from importlib import resources +else: + import importlib_resources as resources +schemas = resources.files('pyhf') / "schemas" + +SCHEMA_CACHE = {} +SCHEMA_BASE = "https://scikit-hep.org/pyhf/schemas/" +SCHEMA_VERSION = '1.0.0' diff --git a/src/pyhf/utils.py b/src/pyhf/utils.py index 4547166459..01c20e9275 100644 --- a/src/pyhf/utils.py +++ b/src/pyhf/utils.py @@ -1,24 +1,22 @@ import json -import jsonschema -import pkg_resources -from pathlib import Path import yaml import click import hashlib -from pyhf.exceptions import InvalidSpecification +import sys -SCHEMA_CACHE = {} -SCHEMA_BASE = "https://scikit-hep.org/pyhf/schemas/" -SCHEMA_VERSION = '1.0.0' +# importlib.resources.as_file wasn't added until Python 3.9 +# c.f. https://docs.python.org/3.9/library/importlib.html#importlib.resources.as_file +if sys.version_info >= (3, 9): + from importlib import resources +else: + import importlib_resources as resources __all__ = [ "EqDelimStringParamType", "citation", "digest", - "load_schema", "options_from_eqdelimstring", - "validate", ] @@ -26,44 +24,6 @@ def __dir__(): return __all__ -def load_schema(schema_id, version=None): - global SCHEMA_CACHE - if not version: - version = SCHEMA_VERSION - try: - return SCHEMA_CACHE[f'{SCHEMA_BASE}{Path(version).joinpath(schema_id)}'] - except KeyError: - pass - - path = pkg_resources.resource_filename( - __name__, str(Path('schemas').joinpath(version, schema_id)) - ) - with open(path) as json_schema: - schema = json.load(json_schema) - SCHEMA_CACHE[schema['$id']] = schema - return SCHEMA_CACHE[schema['$id']] - - -# load the defs.json as it is included by $ref -load_schema('defs.json') - - -def validate(spec, schema_name, version=None): - schema = load_schema(schema_name, version=version) - try: - resolver = jsonschema.RefResolver( - base_uri=f"file://{pkg_resources.resource_filename(__name__, 'schemas/'):s}", - referrer=schema_name, - store=SCHEMA_CACHE, - ) - validator = jsonschema.Draft6Validator( - schema, resolver=resolver, format_checker=None - ) - return validator.validate(spec) - except jsonschema.ValidationError as err: - raise InvalidSpecification(err, schema_name) - - def options_from_eqdelimstring(opts): document = '\n'.join( f"{opt.split('=', 1)[0]}: {opt.split('=', 1)[1]}" for opt in opts @@ -140,14 +100,9 @@ def citation(oneline=False): Returns: citation (:obj:`str`): The citation for this software """ - path = Path( - pkg_resources.resource_filename( - __name__, str(Path('data').joinpath('citation.bib')) - ) - ) - with path.open() as fp: - # remove end-of-file newline if there is one - data = fp.read().strip() + ref = resources.files('pyhf') / 'data' / 'citation.bib' + with resources.as_file(ref) as path: + data = path.read_text().strip() if oneline: data = ''.join(data.splitlines()) diff --git a/src/pyhf/workspace.py b/src/pyhf/workspace.py index ae38173de0..6339f71cc4 100644 --- a/src/pyhf/workspace.py +++ b/src/pyhf/workspace.py @@ -10,7 +10,7 @@ import copy import collections from pyhf import exceptions -from pyhf import utils +from pyhf import schema from pyhf.pdf import Model from pyhf.mixins import _ChannelSummaryMixin @@ -307,7 +307,7 @@ def __init__(self, spec, validate: bool = True, **config_kwargs): # run jsonschema validation of input specification against the (provided) schema if validate: log.info(f"Validating spec against schema: {self.schema}") - utils.validate(self, self.schema, version=self.version) + schema.validate(self, self.schema, version=self.version) self.measurement_names = [] for measurement in self.get('measurements', []): @@ -386,7 +386,7 @@ def get_measurement(self, measurement_name=None, measurement_index=None): else: raise exceptions.InvalidMeasurement("No measurements have been defined.") - utils.validate(measurement, 'measurement.json', self.version) + schema.validate(measurement, 'measurement.json', self.version) return measurement def model( @@ -811,7 +811,7 @@ def build(cls, model, data, name='measurement', validate: bool = True): """ workspace = copy.deepcopy(dict(channels=model.spec['channels'])) - workspace['version'] = utils.SCHEMA_VERSION + workspace['version'] = schema.version workspace['measurements'] = [ { 'name': name, diff --git a/src/pyhf/writexml.py b/src/pyhf/writexml.py index 3116c4f2e2..fb118f5b81 100644 --- a/src/pyhf/writexml.py +++ b/src/pyhf/writexml.py @@ -2,13 +2,13 @@ from pathlib import Path import shutil -import pkg_resources import xml.etree.ElementTree as ET import numpy as np import uproot from pyhf.mixins import _ChannelSummaryMixin +from pyhf.schema import path as schema_path _ROOT_DATA_FILE = None @@ -276,7 +276,7 @@ def writexml(spec, specdir, data_rootdir, resultprefix): global _ROOT_DATA_FILE shutil.copyfile( - pkg_resources.resource_filename(__name__, 'schemas/HistFactorySchema.dtd'), + schema_path.joinpath('HistFactorySchema.dtd'), Path(specdir).parent.joinpath('HistFactorySchema.dtd'), ) combination = ET.Element( diff --git a/tests/conftest.py b/tests/conftest.py index 1d2650ee68..c32f9007d4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -123,7 +123,7 @@ def interpcode(request): @pytest.fixture(scope='function') -def datadir(tmpdir, request): +def datadir(tmp_path, request): """ Fixture responsible for searching a folder with the same name of test module and, if available, moving all contents to a temporary directory so @@ -134,9 +134,9 @@ def datadir(tmpdir, request): test_dir = pathlib.Path(request.module.__file__).with_suffix('') if test_dir.is_dir(): - distutils.dir_util.copy_tree(test_dir, tmpdir.strpath) + distutils.dir_util.copy_tree(test_dir, str(tmp_path)) # shutil is nicer, but doesn't work: https://bugs.python.org/issue20849 # Once pyhf is Python 3.8+ only then the below can be used. # shutil.copytree(test_dir, tmpdir) - return tmpdir + return tmp_path diff --git a/tests/constraints.txt b/tests/constraints.txt index 8e9232ec37..ea8752f2a0 100644 --- a/tests/constraints.txt +++ b/tests/constraints.txt @@ -5,6 +5,7 @@ tqdm==4.56.0 jsonschema==3.0.0 jsonpatch==1.15 pyyaml==5.1 +importlib_resources==1.3.0 # xmlio uproot==4.1.1 # minuit diff --git a/tests/contrib/test_utils.py b/tests/contrib/test_contrib_utils.py similarity index 100% rename from tests/contrib/test_utils.py rename to tests/contrib/test_contrib_utils.py diff --git a/tests/contrib/test_viz.py b/tests/contrib/test_viz.py index 50bb8416cd..5c04ad99b6 100644 --- a/tests/contrib/test_viz.py +++ b/tests/contrib/test_viz.py @@ -13,7 +13,7 @@ def test_brazil_band_collection(datadir): - data = json.load(open(datadir.join("hypotest_results.json"))) + data = json.load(datadir.joinpath("hypotest_results.json").open()) fig = Figure() ax = fig.subplots() @@ -31,7 +31,7 @@ def test_brazil_band_collection(datadir): assert brazil_band_collection.clb is None assert brazil_band_collection.axes == ax - data = json.load(open(datadir.join("tail_probs_hypotest_results.json"))) + data = json.load(datadir.joinpath("tail_probs_hypotest_results.json").open()) fig = Figure() ax = fig.subplots() @@ -52,7 +52,7 @@ def test_brazil_band_collection(datadir): @pytest.mark.mpl_image_compare def test_plot_results(datadir): - data = json.load(open(datadir.join("hypotest_results.json"))) + data = json.load(datadir.joinpath("hypotest_results.json").open()) fig = Figure() ax = fig.subplots() @@ -66,7 +66,7 @@ def test_plot_results(datadir): @pytest.mark.mpl_image_compare def test_plot_results_no_axis(datadir): - data = json.load(open(datadir.join("hypotest_results.json"))) + data = json.load(datadir.joinpath("hypotest_results.json").open()) matplotlib.use("agg") # Use non-gui backend fig, ax = plt.subplots() @@ -78,7 +78,7 @@ def test_plot_results_no_axis(datadir): @pytest.mark.mpl_image_compare def test_plot_results_components(datadir): - data = json.load(open(datadir.join("tail_probs_hypotest_results.json"))) + data = json.load(datadir.joinpath("tail_probs_hypotest_results.json").open()) fig = Figure() ax = fig.subplots() @@ -90,7 +90,7 @@ def test_plot_results_components(datadir): @pytest.mark.mpl_image_compare def test_plot_results_components_no_clb(datadir): - data = json.load(open(datadir.join("tail_probs_hypotest_results.json"))) + data = json.load(datadir.joinpath("tail_probs_hypotest_results.json").open()) fig = Figure() ax = fig.subplots() @@ -110,7 +110,7 @@ def test_plot_results_components_no_clb(datadir): @pytest.mark.mpl_image_compare def test_plot_results_components_no_clsb(datadir): - data = json.load(open(datadir.join("tail_probs_hypotest_results.json"))) + data = json.load(datadir.joinpath("tail_probs_hypotest_results.json").open()) fig = Figure() ax = fig.subplots() @@ -130,7 +130,7 @@ def test_plot_results_components_no_clsb(datadir): @pytest.mark.mpl_image_compare def test_plot_results_components_no_cls(datadir): - data = json.load(open(datadir.join("tail_probs_hypotest_results.json"))) + data = json.load(datadir.joinpath("tail_probs_hypotest_results.json").open()) fig = Figure() ax = fig.subplots() @@ -158,7 +158,7 @@ def test_plot_results_components_data_structure(datadir): """ test results should have format of: [CLs_obs, [CLsb, CLb], [CLs_exp band]] """ - data = json.load(open(datadir.join("hypotest_results.json"))) + data = json.load(datadir.joinpath("hypotest_results.json").open()) fig = Figure() ax = fig.subplots() diff --git a/tests/test_export.py b/tests/test_export.py index 6b9b21968c..649170c360 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -421,7 +421,7 @@ def test_integer_data(datadir, mocker): """ Test that a spec with only integer data will be written correctly """ - with open(datadir.join("workspace_integer_data.json")) as spec_file: + with open(datadir.joinpath("workspace_integer_data.json")) as spec_file: spec = json.load(spec_file) channel_spec = spec["channels"][0] mocker.patch("pyhf.writexml._ROOT_DATA_FILE") diff --git a/tests/test_modifiers.py b/tests/test_modifiers.py index 9cf4fffe07..82debbf421 100644 --- a/tests/test_modifiers.py +++ b/tests/test_modifiers.py @@ -184,11 +184,11 @@ def test_invalid_bin_wise_modifier(datadir, patch_file): Test that bin-wise modifiers will raise an exception if their data shape differs from their sample's. """ - spec = json.load(open(datadir.join("spec.json"))) + spec = json.load(open(datadir.joinpath("spec.json"))) assert pyhf.Model(spec) - patch = JsonPatch.from_string(open(datadir.join(patch_file)).read()) + patch = JsonPatch.from_string(open(datadir.joinpath(patch_file)).read()) bad_spec = patch.apply(spec) with pytest.raises(pyhf.exceptions.InvalidModifier): diff --git a/tests/test_patchset.py b/tests/test_patchset.py index 4fc7f2e027..b4e3d36724 100644 --- a/tests/test_patchset.py +++ b/tests/test_patchset.py @@ -12,7 +12,7 @@ ids=['patchset_good.json', 'patchset_good_2_patches.json'], ) def patchset(datadir, request): - spec = json.load(open(datadir.join(request.param))) + spec = json.load(open(datadir.joinpath(request.param))) return pyhf.PatchSet(spec) @@ -32,7 +32,7 @@ def patch(): ], ) def test_patchset_invalid_spec(datadir, patchset_file): - patchsetspec = json.load(open(datadir.join(patchset_file))) + patchsetspec = json.load(open(datadir.joinpath(patchset_file))) with pytest.raises(pyhf.exceptions.InvalidSpecification): pyhf.PatchSet(patchsetspec) @@ -46,7 +46,7 @@ def test_patchset_invalid_spec(datadir, patchset_file): ], ) def test_patchset_bad(datadir, patchset_file): - patchsetspec = json.load(open(datadir.join(patchset_file))) + patchsetspec = json.load(open(datadir.joinpath(patchset_file))) with pytest.raises(pyhf.exceptions.InvalidPatchSet): pyhf.PatchSet(patchsetspec) @@ -97,20 +97,20 @@ def test_patchset_repr(patchset): def test_patchset_verify(datadir): - patchset = pyhf.PatchSet(json.load(open(datadir.join('example_patchset.json')))) - ws = pyhf.Workspace(json.load(open(datadir.join('example_bkgonly.json')))) + patchset = pyhf.PatchSet(json.load(open(datadir.joinpath('example_patchset.json')))) + ws = pyhf.Workspace(json.load(open(datadir.joinpath('example_bkgonly.json')))) assert patchset.verify(ws) is None def test_patchset_verify_failure(datadir): - patchset = pyhf.PatchSet(json.load(open(datadir.join('example_patchset.json')))) + patchset = pyhf.PatchSet(json.load(open(datadir.joinpath('example_patchset.json')))) with pytest.raises(pyhf.exceptions.PatchSetVerificationError): assert patchset.verify({}) def test_patchset_apply(datadir): - patchset = pyhf.PatchSet(json.load(open(datadir.join('example_patchset.json')))) - ws = pyhf.Workspace(json.load(open(datadir.join('example_bkgonly.json')))) + patchset = pyhf.PatchSet(json.load(open(datadir.joinpath('example_patchset.json')))) + ws = pyhf.Workspace(json.load(open(datadir.joinpath('example_bkgonly.json')))) with mock.patch('pyhf.patchset.PatchSet.verify') as m: assert m.call_count == 0 assert patchset.apply(ws, 'patch_channel1_signal_syst1') @@ -135,7 +135,7 @@ def test_patch_equality(patch): def test_patchset_get_string_values(datadir): patchset = pyhf.PatchSet( - json.load(open(datadir.join('patchset_good_stringvalues.json'))) + json.load(open(datadir.joinpath('patchset_good_stringvalues.json'))) ) assert patchset["Gtt_2100_5000_800"] assert patchset["Gbb_2200_5000_800"] diff --git a/tests/test_public_api_repr.py b/tests/test_public_api_repr.py index ccb4738d63..a9245e7658 100644 --- a/tests/test_public_api_repr.py +++ b/tests/test_public_api_repr.py @@ -24,6 +24,7 @@ def test_top_level_public_api(): "patchset", "pdf", "probability", + "schema", "set_backend", "simplemodels", "tensor", @@ -243,9 +244,16 @@ def test_utils_public_api(): "EqDelimStringParamType", "citation", "digest", - "load_schema", "options_from_eqdelimstring", + ] + + +def test_schema_public_api(): + assert dir(pyhf.schema) == [ + "load_schema", + "path", "validate", + "version", ] diff --git a/tests/test_schema.py b/tests/test_schema.py index ec1d7098ad..f820c12a8e 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -3,6 +3,42 @@ import json +@pytest.mark.parametrize('version', ['1.0.0']) +@pytest.mark.parametrize( + 'schema', ['defs.json', 'measurement.json', 'model.json', 'workspace.json'] +) +def test_get_schema(version, schema): + assert pyhf.schema.load_schema(f'{version}/{schema}') + + +def test_load_missing_schema(): + with pytest.raises(IOError): + pyhf.schema.load_schema('fake_schema.json') + + +def test_schema_attributes(): + assert hasattr(pyhf.schema, 'version') + assert hasattr(pyhf.schema, 'path') + assert pyhf.schema.version + assert pyhf.schema.path + + +def test_schema_callable(): + assert callable(pyhf.schema) + + +def test_schema_changeable(datadir): + with pytest.raises(pyhf.exceptions.SchemaNotFound): + pyhf.Workspace(json.load(open(datadir / 'customschema' / 'custom.json'))) + + old_path = pyhf.schema.path + pyhf.schema(datadir / 'customschema') + assert pyhf.schema.path != old_path + assert pyhf.schema.path == datadir / 'customschema' + assert pyhf.Workspace(json.load(open(datadir / 'customschema' / 'custom.json'))) + pyhf.schema(old_path) + + def test_no_channels(): spec = {'channels': []} with pytest.raises(pyhf.exceptions.InvalidSpecification): @@ -444,7 +480,7 @@ def test_normsys_additional_properties(): ids=['add', 'replace', 'test', 'remove', 'move', 'copy'], ) def test_jsonpatch(patch): - pyhf.utils.validate([patch], 'jsonpatch.json') + pyhf.schema.validate([patch], 'jsonpatch.json') @pytest.mark.parametrize( @@ -470,13 +506,13 @@ def test_jsonpatch(patch): ) def test_jsonpatch_fail(patch): with pytest.raises(pyhf.exceptions.InvalidSpecification): - pyhf.utils.validate([patch], 'jsonpatch.json') + pyhf.schema.validate([patch], 'jsonpatch.json') @pytest.mark.parametrize('patchset_file', ['patchset_good.json']) def test_patchset(datadir, patchset_file): - patchset = json.load(open(datadir.join(patchset_file))) - pyhf.utils.validate(patchset, 'patchset.json') + patchset = json.load(open(datadir.joinpath(patchset_file))) + pyhf.schema.validate(patchset, 'patchset.json') @pytest.mark.parametrize( @@ -495,6 +531,6 @@ def test_patchset(datadir, patchset_file): ], ) def test_patchset_fail(datadir, patchset_file): - patchset = json.load(open(datadir.join(patchset_file))) + patchset = json.load(open(datadir.joinpath(patchset_file))) with pytest.raises(pyhf.exceptions.InvalidSpecification): - pyhf.utils.validate(patchset, 'patchset.json') + pyhf.schema.validate(patchset, 'patchset.json') diff --git a/tests/test_schema/customschema/1.1.0/defs.json b/tests/test_schema/customschema/1.1.0/defs.json new file mode 100644 index 0000000000..dd63e8d32a --- /dev/null +++ b/tests/test_schema/customschema/1.1.0/defs.json @@ -0,0 +1,315 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$id": "1.1.0/defs.json", + "definitions": { + "workspace": { + "type": "object", + "properties": { + "channels": { "type": "array", "items": {"$ref": "#/definitions/channel"}, "minItems": 1 }, + "measurements": { "type": "array", "items": {"$ref": "#/definitions/measurement"}, "minItems": 1 }, + "observations": { "type": "array", "items": {"$ref": "#/definitions/observation" }, "minItems": 1 }, + "version": { "const": "1.1.0" } + }, + "additionalProperties": false, + "required": ["channels", "measurements", "observations", "version"] + }, + "model": { + "type": "object", + "properties": { + "channels": { "type": "array", "items": {"$ref": "#/definitions/channel"}, "minItems": 1 }, + "parameters": { "type": "array", "items": {"$ref": "#/definitions/parameter"} } + }, + "additionalProperties": false, + "required": ["channels"] + }, + "observation": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "data": { "type": "array", "items": {"type": "number"}, "minItems": 1 } + }, + "required": ["name", "data"], + "additionalProperties": false + }, + "measurement": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "config": { "$ref": "#/definitions/config" } + }, + "required": ["name", "config"], + "additionalProperties": false + }, + "config": { + "type": "object", + "properties": { + "poi": { "type" : "string" }, + "parameters": { "type": "array", "items": {"$ref": "#/definitions/parameter"} } + }, + "required": ["poi", "parameters"], + "additionalProperties": false + }, + "parameter": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "inits": { "type": "array", "items": {"type": "number"}, "minItems": 1 }, + "bounds": { "type": "array", "items": {"type": "array", "items": {"type": "number", "minItems": 2, "maxItems": 2}}, "minItems": 1 }, + "auxdata": { "type": "array", "items": {"type": "number"}, "minItems": 1 }, + "factors": { "type": "array", "items": {"type": "number"}, "minItems": 1 }, + "sigmas": { "type": "array", "items": {"type": "number"}, "minItems": 1}, + "fixed": { "type": "boolean" } + }, + "required": ["name"], + "additionalProperties": false + }, + "channel": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "samples": { "type": "array", "items": {"$ref": "#/definitions/sample"}, "minItems": 1 } + }, + "required": ["name", "samples"], + "additionalProperties": false + }, + "sample": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "data": { "type": "array", "items": {"type": "number"}, "minItems": 1 }, + "modifiers": { + "type": "array", + "items": { + "anyOf": [ + { "$ref": "#/definitions/modifier/histosys" }, + { "$ref": "#/definitions/modifier/lumi" }, + { "$ref": "#/definitions/modifier/normfactor" }, + { "$ref": "#/definitions/modifier/normsys" }, + { "$ref": "#/definitions/modifier/shapefactor" }, + { "$ref": "#/definitions/modifier/shapesys" }, + { "$ref": "#/definitions/modifier/staterror" } + ] + } + } + }, + "required": ["name", "data", "modifiers"], + "additionalProperties": false + }, + "modifier": { + "histosys": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "type": { "const": "histosys" }, + "data": { + "type": "object", + "properties": { + "lo_data": { "type": "array", "items": {"type": "number"}, "minItems": 1 }, + "hi_data": { "type": "array", "items": {"type": "number"}, "minItems": 1 } + }, + "required": ["lo_data", "hi_data"], + "additionalProperties": false + } + }, + "required": ["name", "type", "data"], + "additionalProperties": false + }, + "lumi": { + "type": "object", + "properties": { + "name": { "const": "lumi" }, + "type": { "const": "lumi" }, + "data": { "type": "null" } + }, + "required": ["name", "type", "data"], + "additionalProperties": false + }, + "normfactor": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "type": { "const": "normfactor" }, + "data": { "type": "null" } + }, + "required": ["name", "type", "data"], + "additionalProperties": false + }, + "normsys": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "type": { "const": "normsys" }, + "data": { + "type": "object", + "properties": { + "lo": { "type": "number" }, + "hi": { "type": "number"} + }, + "required": ["lo", "hi"], + "additionalProperties": false + } + }, + "required": ["name", "type", "data"], + "additionalProperties": false + }, + "shapefactor": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "type": { "const": "shapefactor" }, + "data": { "type": "null" } + }, + "required": ["name", "type", "data"], + "additionalProperties": false + }, + "shapesys": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "type": { "const": "shapesys" }, + "data": { "type": "array", "items": {"type": "number"}, "minItems": 1 } + }, + "required": ["name", "type", "data"], + "additionalProperties": false + }, + "staterror": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "type": { "const": "staterror" }, + "data": { "type": "array", "items": {"type": "number"}, "minItems": 1 } + }, + "required": ["name", "type", "data"], + "additionalProperties": false + } + }, + "jsonpatch": { + "description": "an array of patch operations (copied from http://json.schemastore.org/json-patch)", + "type": "array", + "items": { + "$ref": "#/definitions/jsonpatch/operation" + }, + "operation": { + "type": "object", + "required": [ "op", "path" ], + "allOf": [ { "$ref": "#/definitions/jsonpatch/path" } ], + "oneOf": [ + { + "required": [ "value" ], + "properties": { + "op": { + "description": "The operation to perform.", + "type": "string", + "enum": [ "add", "replace", "test" ] + }, + "value": { + "description": "The value to add, replace or test." + } + } + }, + { + "properties": { + "op": { + "description": "The operation to perform.", + "type": "string", + "enum": [ "remove" ] + } + } + }, + { + "required": [ "from" ], + "properties": { + "op": { + "description": "The operation to perform.", + "type": "string", + "enum": [ "move", "copy" ] + }, + "from": { + "description": "A JSON Pointer path pointing to the location to move/copy from.", + "type": "string" + } + } + } + ] + }, + "path": { + "properties": { + "path": { + "description": "A JSON Pointer path.", + "type": "string" + } + } + } + }, + "patchset": { + "description": "A set of JSONPatch patches which modify a pyhf workspace", + "type": "object", + "properties": { + "patches": { "$ref": "#/definitions/patchset/patches" }, + "metadata": { "$ref": "#/definitions/patchset/metadata" }, + "version": { "const": "1.1.0" } + }, + "additionalProperties": false, + "required": ["patches", "metadata", "version"], + "references": { + "type": "object", + "properties": { + "hepdata": { "type": "string", "pattern": "^ins[0-9]{7}$" } + }, + "additionalProperties": false, + "minProperties": 1 + }, + "digests": { + "type": "object", + "properties": { + "md5": { "type": "string", "pattern": "^[a-f0-9]{32}$" }, + "sha256": { "type": "string", "pattern": "^[a-fA-F0-9]{64}$" } + }, + "additionalProperties": false, + "minProperties": 1 + }, + "patches": { + "type": "array", + "items": { "$ref": "#/definitions/patchset/patch" }, + "minItems": 1 + }, + "patch": { + "type": "object", + "properties": { + "patch": { "$ref": "#/definitions/jsonpatch" }, + "metadata": { + "type": "object", + "properties": { + "name": { "type": "string", "pattern": "^[a-zA-Z0-9_]+$" }, + "values": { + "type": "array", + "items": { + "anyOf": [{"type": "number"}, {"type": "string"}] + } + } + }, + "required": ["name", "values"], + "additionalProperties": true + } + }, + "required": ["metadata", "patch"], + "additionalProperties": false + }, + "metadata": { + "type": "object", + "properties": { + "digests": { "$ref": "#/definitions/patchset/digests" }, + "labels": { + "type": "array", + "items": { "type": "string", "pattern": "^[a-zA-Z0-9_]+$" }, + "minItems": 1 + }, + "description": { "type": "string" }, + "references": { "$ref": "#/definitions/patchset/references" } + }, + "required": ["references", "digests", "labels", "description"], + "additionalProperties": true + } + } + } +} diff --git a/tests/test_schema/customschema/1.1.0/jsonpatch.json b/tests/test_schema/customschema/1.1.0/jsonpatch.json new file mode 100644 index 0000000000..63b4496bc7 --- /dev/null +++ b/tests/test_schema/customschema/1.1.0/jsonpatch.json @@ -0,0 +1,5 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$id": "1.1.0/jsonpatch.json", + "$ref": "defs.json#/definitions/jsonpatch" +} diff --git a/tests/test_schema/customschema/1.1.0/measurement.json b/tests/test_schema/customschema/1.1.0/measurement.json new file mode 100644 index 0000000000..124d84a522 --- /dev/null +++ b/tests/test_schema/customschema/1.1.0/measurement.json @@ -0,0 +1,5 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$id": "1.1.0/measurement.json", + "$ref": "defs.json#/definitions/measurement" +} diff --git a/tests/test_schema/customschema/1.1.0/model.json b/tests/test_schema/customschema/1.1.0/model.json new file mode 100644 index 0000000000..f44f47edc6 --- /dev/null +++ b/tests/test_schema/customschema/1.1.0/model.json @@ -0,0 +1,5 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$id": "1.1.0/model.json", + "$ref": "defs.json#/definitions/model" +} diff --git a/tests/test_schema/customschema/1.1.0/patchset.json b/tests/test_schema/customschema/1.1.0/patchset.json new file mode 100644 index 0000000000..c7f5596dc5 --- /dev/null +++ b/tests/test_schema/customschema/1.1.0/patchset.json @@ -0,0 +1,5 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$id": "1.1.0/patchset.json", + "$ref": "defs.json#/definitions/patchset" +} diff --git a/tests/test_schema/customschema/1.1.0/workspace.json b/tests/test_schema/customschema/1.1.0/workspace.json new file mode 100644 index 0000000000..5e91630381 --- /dev/null +++ b/tests/test_schema/customschema/1.1.0/workspace.json @@ -0,0 +1,5 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$id": "1.1.0/workspace.json", + "$ref": "defs.json#/definitions/workspace" +} diff --git a/tests/test_schema/customschema/custom.json b/tests/test_schema/customschema/custom.json new file mode 100644 index 0000000000..f35ea110aa --- /dev/null +++ b/tests/test_schema/customschema/custom.json @@ -0,0 +1,74 @@ +{ + "channels": [ + { + "name": "singlechannel", + "samples": [ + { + "name": "signal", + "data": [ + 10 + ], + "modifiers": [ + { + "name": "mu", + "type": "normfactor", + "data": null + } + ] + }, + { + "name": "background", + "data": [ + 5 + ], + "modifiers": [ + { + "name": "uncorr_bkguncrt", + "type": "shapesys", + "data": [ + 1 + ] + } + ] + } + ] + } + ], + "measurements": [ + { + "config": { + "parameters": [ + { + "auxdata": [ + 1 + ], + "bounds": [ + [ + 0.915, + 1.085 + ] + ], + "inits": [ + 1 + ], + "name": "lumi", + "sigmas": [ + 0.017 + ] + } + ], + "poi": "mu_SIG" + }, + "name": "NormalMeasurement" + } + ], + "observations": [ + { + "data": [ + 5 + ], + "name": "singlechannel" + } + ], + "version": "1.1.0" +} diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 5b643a77f5..6dffc7ff61 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -63,7 +63,7 @@ def test_import_prepHistFactory(tmpdir, script_runner): parsed_xml = json.loads(temp.read()) spec = {'channels': parsed_xml['channels']} - pyhf.utils.validate(spec, 'model.json') + pyhf.schema.validate(spec, 'model.json') def test_import_prepHistFactory_withProgress(tmpdir, script_runner): @@ -647,7 +647,7 @@ def test_missing_contrib_download(caplog): def test_patchset_inspect(datadir, script_runner): - command = f'pyhf patchset inspect {datadir.join("example_patchset.json").strpath}' + command = f'pyhf patchset inspect {datadir.joinpath("example_patchset.json")}' ret = script_runner.run(*shlex.split(command)) assert 'patch_channel1_signal_syst1' in ret.stdout @@ -656,7 +656,7 @@ def test_patchset_inspect(datadir, script_runner): @pytest.mark.parametrize('with_metadata', [False, True]) def test_patchset_extract(datadir, tmpdir, script_runner, output_file, with_metadata): temp = tmpdir.join("extracted_output.json") - command = f'pyhf patchset extract {datadir.join("example_patchset.json").strpath} --name patch_channel1_signal_syst1' + command = f'pyhf patchset extract {datadir.joinpath("example_patchset.json")} --name patch_channel1_signal_syst1' if output_file: command += f" --output-file {temp.strpath}" if with_metadata: @@ -674,12 +674,14 @@ def test_patchset_extract(datadir, tmpdir, script_runner, output_file, with_meta else: assert ( extracted_output - == json.load(datadir.join("example_patchset.json"))['patches'][0]['patch'] + == json.load(datadir.joinpath("example_patchset.json").open())['patches'][ + 0 + ]['patch'] ) def test_patchset_verify(datadir, script_runner): - command = f'pyhf patchset verify {datadir.join("example_bkgonly.json").strpath} {datadir.join("example_patchset.json").strpath}' + command = f'pyhf patchset verify {datadir.joinpath("example_bkgonly.json")} {datadir.joinpath("example_patchset.json")}' ret = script_runner.run(*shlex.split(command)) assert ret.success @@ -689,7 +691,7 @@ def test_patchset_verify(datadir, script_runner): @pytest.mark.parametrize('output_file', [False, True]) def test_patchset_apply(datadir, tmpdir, script_runner, output_file): temp = tmpdir.join("patched_output.json") - command = f'pyhf patchset apply {datadir.join("example_bkgonly.json").strpath} {datadir.join("example_patchset.json").strpath} --name patch_channel1_signal_syst1' + command = f'pyhf patchset apply {datadir.joinpath("example_bkgonly.json")} {datadir.joinpath("example_patchset.json")} --name patch_channel1_signal_syst1' if output_file: command += f" --output-file {temp.strpath}" diff --git a/tests/test_utils.py b/tests/test_utils.py index cb376c4e58..a086fad3c8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,18 +2,6 @@ import pyhf -@pytest.mark.parametrize( - 'schema', ['defs.json', 'measurement.json', 'model.json', 'workspace.json'] -) -def test_get_schema(schema): - assert pyhf.utils.load_schema(schema) - - -def test_load_missing_schema(): - with pytest.raises(IOError): - pyhf.utils.load_schema('fake_schema.json') - - @pytest.mark.parametrize( 'opts,obj', [ diff --git a/tests/test_workspace.py b/tests/test_workspace.py index 70a10bbaed..4ef6eed51a 100644 --- a/tests/test_workspace.py +++ b/tests/test_workspace.py @@ -6,6 +6,7 @@ import logging import pyhf.workspace import pyhf.utils +import pyhf.schema import copy @@ -112,14 +113,14 @@ def test_get_workspace_measurement_priority(workspace_factory): def test_get_measurement_schema_validation(mocker, workspace_factory): - mocker.patch('pyhf.utils.validate', return_value=None) - assert pyhf.utils.validate.called is False + mocker.patch('pyhf.schema.validate', return_value=None) + assert pyhf.schema.validate.called is False w = workspace_factory() - assert pyhf.utils.validate.call_count == 1 - assert pyhf.utils.validate.call_args[0][1] == 'workspace.json' + assert pyhf.schema.validate.call_count == 1 + assert pyhf.schema.validate.call_args[0][1] == 'workspace.json' w.get_measurement() - assert pyhf.utils.validate.call_count == 2 - assert pyhf.utils.validate.call_args[0][1] == 'measurement.json' + assert pyhf.schema.validate.call_count == 2 + assert pyhf.schema.validate.call_args[0][1] == 'measurement.json' def test_get_workspace_repr(workspace_factory): @@ -883,7 +884,7 @@ def test_workspace_poiless(datadir): """ Test that a workspace with a measurement with empty POI string is treated as POI-less """ - spec = json.load(open(datadir.join("poiless.json"))) + spec = json.load(open(datadir.joinpath("poiless.json"))) ws = pyhf.Workspace(spec) model = ws.model() @@ -903,9 +904,9 @@ def test_wspace_unexpected_keyword_argument(simplemodels_model_data): def test_workspace_without_validation(mocker, simplemodels_model_data): model, data = simplemodels_model_data - mocker.patch('pyhf.utils.validate') + mocker.patch('pyhf.schema.validate') ws = pyhf.Workspace.build(model, data, validate=False) - assert pyhf.utils.validate.called is False + assert pyhf.schema.validate.called is False pyhf.Workspace(dict(ws), validate=False) - assert pyhf.utils.validate.called is False + assert pyhf.schema.validate.called is False