From b146429bb0266019812ddaf0033f7014fa383329 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Thu, 27 Jul 2023 17:31:54 -0400 Subject: [PATCH] feat: add a schema for serialization Signed-off-by: Henry Schreiner --- pyproject.toml | 4 + src/uhi/resources/histogram.json | 200 +++++++++++++++++++++++++++++++ src/uhi/schema.py | 28 +++++ tests/resources/reg.json | 31 +++++ 4 files changed, 263 insertions(+) create mode 100644 src/uhi/resources/histogram.json create mode 100644 src/uhi/schema.py create mode 100644 tests/resources/reg.json diff --git a/pyproject.toml b/pyproject.toml index c9dfd69..7278592 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,10 @@ Documentation = "https://uhi.readthedocs.io/en/latest/" Changelog = "https://github.com/scikit-hep/uhi/releases" [project.optional-dependencies] +schema = [ + "fastjsonschema", + "importlib-resources; python_version<'3.9'", +] docs = [ "sphinx>=4.0", "sphinx-book-theme>=0.0.40", diff --git a/src/uhi/resources/histogram.json b/src/uhi/resources/histogram.json new file mode 100644 index 0000000..1b8124e --- /dev/null +++ b/src/uhi/resources/histogram.json @@ -0,0 +1,200 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/scikit-hep/uhi/.json", + "type": "object", + "patternProperties": { + ".+": { + "type": "object", + "required": ["axes", "storage"], + "properties": { + "title": { "type": "string" }, + "name": { "type": "string" }, + "metadata": { "type": "object" }, + "axes": { + "type": "array", + "items": { + "oneOf": [ + { "$ref": "#/$defs/regular_axis" }, + { "$ref": "#/$defs/variable_axis" }, + { "$ref": "#/$defs/str_category_axis" }, + { "$ref": "#/$defs/int_category_axis" }, + { "$ref": "#/$defs/boolean_axis" } + ] + } + }, + "storage": { + "oneOf": [ + { "$ref": "#/$defs/int_storage" }, + { "$ref": "#/$defs/double_storage" }, + { "$ref": "#/$defs/weighted_storage" }, + { "$ref": "#/$defs/mean_storage" }, + { "$ref": "#/$defs/weighted_mean_storage" } + ] + } + } + } + }, + "$defs": { + "regular_axis": { + "type": "object", + "required": ["type", "lower", "upper", "bins", "underflow", "overflow"], + "properties": { + "type": { "type": "string", "pattern": "regular" }, + "lower": { "type": "number" }, + "upper": { "type": "number" }, + "bins": { "type": "number" }, + "underflow": { "type": "boolean" }, + "overflow": { "type": "boolean" }, + "circular": { "type": "boolean" }, + "name": { "type": "string" }, + "title": { "type": "string" }, + "metadata": { "type": "object" } + } + }, + "variable_axis": { + "type": "object", + "required": ["type", "edges", "underflow", "overflow"], + "properties": { + "type": { "type": "string", "pattern": "variable" }, + "edges": { + "oneOf": [ + { + "type": "array", + "items": { "type": "number" } + }, + { + "type": "string" + } + ] + }, + "underflow": { "type": "boolean" }, + "overflow": { "type": "boolean" }, + "circular": { "type": "boolean" }, + "name": { "type": "string" }, + "title": { "type": "string" }, + "metadata": { "type": "object" } + } + }, + "str_category_axis": { + "type": "object", + "required": ["type", "categories", "flow"], + "properties": { + "type": { "type": "string", "pattern": "str_category" }, + "categories": { "type": "array", "items": { "type": "string" } }, + "flow": { "type": "boolean" }, + "name": { "type": "string" }, + "title": { "type": "string" }, + "metadata": { "type": "object" } + } + }, + "int_category_axis": { + "type": "object", + "required": ["type", "categories", "flow"], + "properties": { + "type": { "type": "string", "pattern": "int_category" }, + "categories": { "type": "array", "items": { "type": "number" } }, + "flow": { "type": "boolean" }, + "name": { "type": "string" }, + "title": { "type": "string" }, + "metadata": { "type": "object" } + } + }, + "boolean_axis": { + "type": "object", + "required": ["type"], + "properties": { + "type": { "type": "string", "pattern": "boolean" }, + "name": { "type": "string" }, + "title": { "type": "string" }, + "metadata": { "type": "object" } + } + }, + "int_storage": { + "type": "object", + "properties": { + "type": { "type": "string", "pattern": "int" }, + "data": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "number" } } + ] + } + } + }, + "double_storage": { + "type": "object", + "properties": { + "type": { "type": "string", "pattern": "double" }, + "data": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "number" } } + ] + } + } + }, + "weighted_storage": { + "type": "object", + "properties": { + "type": { "type": "string", "pattern": "int" }, + "data": { + "oneOf": [ + { "type": "string" }, + { + "type": "object", + "properties": { + "values": { "type": "array", "items": { "type": "number" } }, + "variances": { "type": "array", "items": { "type": "number" } } + } + } + ] + } + } + }, + "mean_storage": { + "type": "object", + "properties": { + "type": { "type": "string", "pattern": "int" }, + "data": { + "oneOf": [ + { "type": "string" }, + { + "type": "object", + "properties": { + "counts": { "type": "array", "items": { "type": "number" } }, + "values": { "type": "array", "items": { "type": "number" } }, + "variances": { "type": "array", "items": { "type": "number" } } + } + } + ] + } + } + }, + "weighted_mean_storage": { + "type": "object", + "properties": { + "type": { "type": "string", "pattern": "int" }, + "data": { + "oneOf": [ + { "type": "string" }, + { + "type": "object", + "properties": { + "sum_of_weights": { + "type": "array", + "items": { "type": "number" } + }, + "sum_of_weights_squared": { + "type": "array", + "items": { "type": "number" } + }, + "values": { "type": "array", "items": { "type": "number" } }, + "variances": { "type": "array", "items": { "type": "number" } } + } + } + ] + } + } + } + } +} diff --git a/src/uhi/schema.py b/src/uhi/schema.py new file mode 100644 index 0000000..6e729e8 --- /dev/null +++ b/src/uhi/schema.py @@ -0,0 +1,28 @@ +import fastjsonschema + +import json +import sys + + +if sys.version_info < (3, 9): + import importlib_resources as resources +else: + from importlib import resources + +histogram_file = resources.files("uhi") / "resources/histogram.json" + +with histogram_file.open(encoding="utf-8") as f: + histogram_schema = fastjsonschema.compile(json.load(f)) + + +def validate(path: str) -> None: + + with open(path, encoding="utf-8") as f: + example = json.load(f) + + histogram_schema(example) + + +if __name__ == "__main__": + + validate(*sys.argv[1:]) diff --git a/tests/resources/reg.json b/tests/resources/reg.json new file mode 100644 index 0000000..8b928d4 --- /dev/null +++ b/tests/resources/reg.json @@ -0,0 +1,31 @@ +{ + "one": { + "title": "One", + "metadata": {}, + "axes": [ + { + "type": "regular", + "lower": 0, + "upper": 5, + "bins": 3, + "underflow": true, + "overflow": true, + "circular": false + } + ], + "storage": { "type": "int", "data": [1, 2, 3, 4, 5] } + }, + "two": { + "axes": [ + { + "type": "regular", + "lower": 0, + "upper": 5, + "bins": 5, + "underflow": true, + "overflow": true + } + ], + "storage": { "type": "double", "data": "something" } + } +}