From da174fd63e21e5bdc180a0fd83ef3d8ef1246e0b Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 1 Mar 2022 11:59:53 +0000 Subject: [PATCH 01/10] ENH: Raise error if JSON objects contain duplicate keys --- .pre-commit-config.yaml | 2 +- twined/utils/load_json.py | 24 +++++++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f56d8df..9bc4e1e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -67,7 +67,7 @@ repos: - '^hotfix/([a-z][a-z0-9]*)(-[a-z0-9]+)*$' - '^refactor/([a-z][a-z0-9]*)(-[a-z0-9]+)*$' - '^review/([a-z][a-z0-9]*)(-[a-z0-9]+)*$' - - '^release/(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$' + - '^enhancement/([a-z][a-z0-9]*)(-[a-z0-9]+)*$' - repo: https://github.com/octue/pre-commit-hooks rev: 0.5.0 diff --git a/twined/utils/load_json.py b/twined/utils/load_json.py index 264708a..cb78544 100644 --- a/twined/utils/load_json.py +++ b/twined/utils/load_json.py @@ -33,7 +33,7 @@ def check(kind): if isinstance(source, io.IOBase): logger.debug("Detected source is a file-like object, loading contents...") check("file-like") - return json.load(source, *args, **kwargs) + return json.load(source, object_pairs_hook=raise_error_if_duplicate_keys, *args, **kwargs) elif not isinstance(source, str): logger.debug("Source is not a string, bypassing (returning raw data)") @@ -44,9 +44,27 @@ def check(kind): logger.debug("Detected source is name of a *.json file, loading from %s", source) check("filename") with open(source) as f: - return json.load(f, *args, **kwargs) + return json.load(f, object_pairs_hook=raise_error_if_duplicate_keys, *args, **kwargs) else: logger.debug("Detected source is string containing json data, parsing...") check("string") - return json.loads(source, *args, **kwargs) + return json.loads(source, object_pairs_hook=raise_error_if_duplicate_keys, *args, **kwargs) + + +def raise_error_if_duplicate_keys(pairs): + """Raise an error if any of the given key-value pairs have the same key. + + :param list(tuple) pairs: a JSON object converted to a list of key-value pairs + :raise KeyError: if any of the pairs have the same key + :return dict: + """ + result = {} + + for key, value in pairs: + if key in result: + raise KeyError(f"Duplicate key detected: {key!r}.") + + result[key] = value + + return result From 4908e94ab9731deb91581ee4271ecdd4dfa43c3c Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 1 Mar 2022 12:03:09 +0000 Subject: [PATCH 02/10] ENH: Expect datasets as a dictionary in manifests BREAKING CHANGE: This changes the schema for manifests and manifest.json files --- setup.py | 2 +- tests/test_manifest_strands.py | 54 ++++++------ twined/exceptions.py | 4 - twined/schema/manifest_schema.json | 127 +++++++++++++++++------------ twined/twine.py | 12 +-- 5 files changed, 103 insertions(+), 96 deletions(-) diff --git a/setup.py b/setup.py index 2fc16e7..439cb2b 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name="twined", - version="0.1.2", + version="0.1.3", py_modules=[], install_requires=["jsonschema ~= 3.2.0", "python-dotenv"], url="https://www.github.com/octue/twined", diff --git a/tests/test_manifest_strands.py b/tests/test_manifest_strands.py index a879e15..ed504d8 100644 --- a/tests/test_manifest_strands.py +++ b/tests/test_manifest_strands.py @@ -83,8 +83,8 @@ class TestManifestStrands(BaseTestCase): INPUT_MANIFEST_WITH_CORRECT_FILE_TAGS = """ { "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", - "datasets": [ - { + "datasets": { + "met_mast_data": { "id": "7ead7669-8162-4f64-8cd5-4abe92509e17", "name": "met_mast_data", "tags": {}, @@ -122,7 +122,7 @@ class TestManifestStrands(BaseTestCase): } ] } - ] + } } """ @@ -145,8 +145,8 @@ def test_valid_manifest_files(self): valid_configuration_manifest = """ { "id": "3ead7669-8162-4f64-8cd5-4abe92509e17", - "datasets": [ - { + "datasets": { + "configuration_files_data": { "id": "34ad7669-8162-4f64-8cd5-4abe92509e17", "name": "configuration_files_data", "tags": {}, @@ -182,15 +182,15 @@ def test_valid_manifest_files(self): } ] } - ] + } } """ valid_input_manifest = """ { "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", - "datasets": [ - { + "datasets": { + "met_mast_data": { "id": "7ead7669-8162-4f64-8cd5-4abe92509e17", "name": "met_mast_data", "tags": {}, @@ -226,15 +226,15 @@ def test_valid_manifest_files(self): } ] } - ] + } } """ valid_output_manifest = """ { "id": "2ead7669-8162-4f64-8cd5-4abe92509e17", - "datasets": [ - { + "datasets": { + "output_files_data": { "id": "1ead7669-8162-4f64-8cd5-4abe92509e17", "name": "output_files_data", "tags": {}, @@ -270,7 +270,7 @@ def test_valid_manifest_files(self): } ] } - ] + } } """ @@ -348,8 +348,8 @@ def test_error_raised_when_required_tags_missing_for_validate_input_manifest(sel input_manifest = """ { "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", - "datasets": [ - { + "datasets": { + "met_mast_data": { "id": "7ead7669-8162-4f64-8cd5-4abe92509e17", "name": "met_mast_data", "tags": {}, @@ -367,7 +367,7 @@ def test_error_raised_when_required_tags_missing_for_validate_input_manifest(sel } ] } - ] + } } """ @@ -383,8 +383,8 @@ def test_validate_input_manifest_raises_error_if_required_tags_are_not_of_requir input_manifest = """ { "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", - "datasets": [ - { + "datasets": { + "met_mast_data": { "id": "7ead7669-8162-4f64-8cd5-4abe92509e17", "name": "met_mast_data", "tags": {}, @@ -402,7 +402,7 @@ def test_validate_input_manifest_raises_error_if_required_tags_are_not_of_requir } ] } - ] + } } """ @@ -519,8 +519,8 @@ def test_validate_input_manifest_with_required_tags_in_several_datasets(self): input_manifest = """ { "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", - "datasets": [ - { + "datasets": { + "first_dataset": { "id": "7ead7669-8162-4f64-8cd5-4abe92509e19", "name": "first_dataset", "tags": {}, @@ -541,7 +541,7 @@ def test_validate_input_manifest_with_required_tags_in_several_datasets(self): } ] }, - { + "second_dataset": { "id": "7ead7669-8162-4f64-8cd5-4abe92509e18", "name": "second_dataset", "tags": {}, @@ -562,7 +562,7 @@ def test_validate_input_manifest_with_required_tags_in_several_datasets(self): } ] } - ] + } } """ @@ -574,28 +574,28 @@ def test_error_raised_if_multiple_datasets_have_same_name(self): input_manifest = """ { "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", - "datasets": [ - { + "datasets": { + "met_mast_data": { "id": "7ead7669-8162-4f64-8cd5-4abe92509e19", "name": "met_mast_data", "tags": {}, "labels": [], "files": [] }, - { + "met_mast_data": { "id": "7ead7669-8162-4f64-8cd5-4abe92509e18", "name": "met_mast_data", "tags": {}, "labels": [], "files": [] } - ] + } } """ twine = Twine(source=self.TWINE_WITH_INPUT_MANIFEST_WITH_TAG_TEMPLATE) - with self.assertRaises(exceptions.DatasetNameIsNotUnique): + with self.assertRaises(KeyError): twine.validate_input_manifest(source=input_manifest) diff --git a/twined/exceptions.py b/twined/exceptions.py index 03a01c9..fdcb25d 100644 --- a/twined/exceptions.py +++ b/twined/exceptions.py @@ -124,10 +124,6 @@ class InvalidManifestContents(InvalidManifest, ValidationError): """Raised when the manifest files are missing or do not match tags, sequences, clusters, extensions etc as required""" -class DatasetNameIsNotUnique(InvalidManifest): - """Raise when a dataset's name is not unique within its manifest.""" - - # --------------------- Exceptions relating to access of data using the Twine instance ------------------------ # TODO This is related to filtering files from a manifest. Determine whether this belongs here, diff --git a/twined/schema/manifest_schema.json b/twined/schema/manifest_schema.json index fdb2131..808ce94 100644 --- a/twined/schema/manifest_schema.json +++ b/twined/schema/manifest_schema.json @@ -21,62 +21,81 @@ "type": "string" }, "datasets": { - "type": "array", - "items": { - "type": "object", - "properties": { - "id": { - "description": "ID of the dataset, typically a uuid", - "type": "string" - }, - "name": { - "description": "Name of the dataset", - "type": "string" - }, - "tags": {"$ref": "#/$defs/tags"}, - "labels": {"$ref": "#/$defs/labels"}, - "files": { - "type": "array", - "items": { - "type": "object", - "properties": { - "id": { - "description": "A file id", - "type": "string" - }, - "path": { - "description": "Path at which the file can be found", - "type": "string" - }, - "extension": { - "description": "The file extension (not including a '.')", - "type": "string" + "type": "object", + "patternProperties": { + ".+": { + "type": "object", + "properties": { + "id": { + "description": "ID of the dataset, typically a uuid", + "type": "string" + }, + "name": { + "description": "Name of the dataset", + "type": "string" + }, + "tags": { + "$ref": "#/$defs/tags" + }, + "labels": { + "$ref": "#/$defs/labels" + }, + "files": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "description": "A file id", + "type": "string" + }, + "path": { + "description": "Path at which the file can be found", + "type": "string" + }, + "extension": { + "description": "The file extension (not including a '.')", + "type": "string" + }, + "sequence": { + "description": "The ordering on the file, if any, within its group/cluster", + "type": [ + "integer", + "null" + ] + }, + "cluster": { + "description": "The group, or cluster, to which the file belongs", + "type": "integer" + }, + "posix_timestamp": { + "description": "A posix based timestamp associated with the file. This may, but need not be, the created or modified time. ", + "type": "number" + }, + "tags": { + "$ref": "#/$defs/tags" + }, + "labels": { + "$ref": "#/$defs/labels" + } }, - "sequence": { - "description": "The ordering on the file, if any, within its group/cluster", - "type": ["integer", "null"] - }, - "cluster": { - "description": "The group, or cluster, to which the file belongs", - "type": "integer" - }, - "posix_timestamp": { - "description": "A posix based timestamp associated with the file. This may, but need not be, the created or modified time. ", - "type": "number" - }, - "tags": {"$ref": "#/$defs/tags"}, - "labels": {"$ref": "#/$defs/labels"} - }, - "required": [ - "id", - "path", - "tags", - "labels" - ] + "required": [ + "id", + "path", + "tags", + "labels" + ] + } } - } - }, - "required": ["id", "name", "tags", "labels", "files"] + }, + "required": [ + "id", + "name", + "tags", + "labels", + "files" + ] + } } } }, diff --git a/twined/twine.py b/twined/twine.py index 34e497d..100de88 100644 --- a/twined/twine.py +++ b/twined/twine.py @@ -190,19 +190,11 @@ def _validate_dataset_file_tags(self, manifest_kind, manifest): manifest_schema = getattr(self, manifest_kind) for dataset_schema in manifest_schema["datasets"]: - datasets = [dataset for dataset in manifest["datasets"] if dataset["name"] == dataset_schema["key"]] + dataset = manifest["datasets"].get(dataset_schema["key"]) - if not datasets: + if not dataset: continue - if len(datasets) > 1: - raise exceptions.DatasetNameIsNotUnique( - f"There is more than one dataset named {dataset_schema['key']!r} - ensure each dataset within a " - f"manifest is uniquely named." - ) - - dataset = datasets.pop(0) - file_tags_template = dataset_schema.get("file_tags_template") if not file_tags_template: From fa0cc93e2dabcd8a3a25a4cd50d1c50ab0e87ad1 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 1 Mar 2022 12:04:11 +0000 Subject: [PATCH 03/10] FIX: Use correct method to transform manifests during validation --- twined/twine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/twined/twine.py b/twined/twine.py index 100de88..3333dbf 100644 --- a/twined/twine.py +++ b/twined/twine.py @@ -165,9 +165,9 @@ def _validate_manifest(self, kind, source, cls=None, **kwargs): # TODO elegant way of cleaning up this nasty serialisation hack to manage conversion of outbound manifests to primitive inbound = True - if hasattr(data, "serialise"): + if hasattr(data, "to_primitive"): inbound = False - data = data.serialise() + data = data.to_primitive() self._validate_against_schema(kind, data) self._validate_dataset_file_tags(manifest_kind=kind, manifest=data) From c087be4cb023302ce13fb4f3afb1993c4039f4ba Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 1 Mar 2022 12:08:07 +0000 Subject: [PATCH 04/10] TST: Test error is raised if datasets not given as dict in manifest --- tests/test_manifest_strands.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tests/test_manifest_strands.py b/tests/test_manifest_strands.py index ed504d8..9956c4d 100644 --- a/tests/test_manifest_strands.py +++ b/tests/test_manifest_strands.py @@ -1,6 +1,5 @@ import copy import os -import unittest from unittest.mock import patch from jsonschema.validators import RefResolver @@ -598,6 +597,24 @@ def test_error_raised_if_multiple_datasets_have_same_name(self): with self.assertRaises(KeyError): twine.validate_input_manifest(source=input_manifest) + def test_error_raised_if_datasets_not_given_as_dictionary(self): + """Test that an error is raised if datasets are not given as a dictionary.""" + input_manifest = """ + { + "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", + "datasets": [ + { + "id": "7ead7669-8162-4f64-8cd5-4abe92509e19", + "name": "met_mast_data", + "tags": {}, + "labels": [], + "files": [] + } + ] + } + """ -if __name__ == "__main__": - unittest.main() + twine = Twine(source=self.TWINE_WITH_INPUT_MANIFEST_WITH_TAG_TEMPLATE) + + with self.assertRaises(exceptions.InvalidManifestContents): + twine.validate_input_manifest(source=input_manifest) From f177507d1dfe5c6f43e2059c998fc2f3e9c27475 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 1 Mar 2022 12:26:08 +0000 Subject: [PATCH 05/10] ENH: Expect datasets as dictionary in twine.json BREAKING CHANGE: This changes the schema for twine.json files --- tests/data/apps/empty_app/twine.json | 4 +-- tests/data/apps/example_app/twine.json | 17 ++++----- tests/data/apps/simple_app/twine.json | 2 +- tests/test_manifest_strands.py | 48 +++++++++++--------------- tests/test_twine.py | 24 +++++++++++-- twined/schema/twine_schema.json | 34 ++++++++---------- twined/twine.py | 4 +-- 7 files changed, 68 insertions(+), 65 deletions(-) diff --git a/tests/data/apps/empty_app/twine.json b/tests/data/apps/empty_app/twine.json index 8c410a5..ad44e51 100644 --- a/tests/data/apps/empty_app/twine.json +++ b/tests/data/apps/empty_app/twine.json @@ -12,7 +12,7 @@ "credentials": [ ], "input_manifest": { - "datasets": [] + "datasets": {} }, "input_values_schema": { "$schema": "http://json-schema.org/2019-09/schema#", @@ -23,7 +23,7 @@ } }, "output_manifest": { - "datasets": [] + "datasets": {} }, "output_values_schema": { "title": "Output Values", diff --git a/tests/data/apps/example_app/twine.json b/tests/data/apps/example_app/twine.json index 1a14fc8..1f93e78 100644 --- a/tests/data/apps/example_app/twine.json +++ b/tests/data/apps/example_app/twine.json @@ -32,16 +32,14 @@ } ], "input_manifest": { - "datasets": [ - { - "key": "met_mast_data", + "datasets": { + "met_mast_data": { "purpose": "A dataset containing meteorological mast data" }, - { - "key": "scada_data", + "scada_data": { "purpose": "A dataset containing scada data" } - ] + } }, "input_values_schema": { "$schema": "http://json-schema.org/2019-09/schema#", @@ -57,12 +55,11 @@ } }, "output_manifest": { - "datasets": [ - { - "key": "production_data", + "datasets": { + "production_data": { "purpose": "A dataset containing production data" } - ] + } }, "output_values_schema": { "title": "Output Values", diff --git a/tests/data/apps/simple_app/twine.json b/tests/data/apps/simple_app/twine.json index 31fd99c..c3ceffa 100644 --- a/tests/data/apps/simple_app/twine.json +++ b/tests/data/apps/simple_app/twine.json @@ -68,6 +68,6 @@ } }, "output_manifest": { - "datasets": [] + "datasets": {} } } diff --git a/tests/test_manifest_strands.py b/tests/test_manifest_strands.py index 9956c4d..ba7c1fa 100644 --- a/tests/test_manifest_strands.py +++ b/tests/test_manifest_strands.py @@ -13,32 +13,28 @@ class TestManifestStrands(BaseTestCase): VALID_MANIFEST_STRAND = """ { "configuration_manifest": { - "datasets": [ - { - "key": "configuration_files_data", + "datasets": { + "configuration_files_data": { "purpose": "A dataset containing files used in configuration" } - ] + } }, "input_manifest": { - "datasets": [ - { - "key": "met_mast_data", + "datasets": { + "met_mast_data": { "purpose": "A dataset containing meteorological mast data" }, - { - "key": "scada_data", + "scada_data": { "purpose": "A dataset containing scada data" } - ] + } }, "output_manifest": { - "datasets": [ - { - "key": "output_files_data", + "datasets": { + "output_files_data": { "purpose": "A dataset containing output results" } - ] + } } } """ @@ -46,9 +42,8 @@ class TestManifestStrands(BaseTestCase): TWINE_WITH_INPUT_MANIFEST_WITH_TAG_TEMPLATE = """ { "input_manifest": { - "datasets": [ - { - "key": "met_mast_data", + "datasets": { + "met_mast_data": { "purpose": "A dataset containing meteorological mast data", "file_tags_template": { "type": "object", @@ -74,7 +69,7 @@ class TestManifestStrands(BaseTestCase): ] } } - ] + } } } """ @@ -430,15 +425,14 @@ def test_validate_input_manifest_with_required_tags_for_remote_tag_template_sche """ { "input_manifest": { - "datasets": [ - { - "key": "met_mast_data", + "datasets": { + "met_mast_data": { "purpose": "A dataset containing meteorological mast data", "file_tags_template": { "$ref": "%s" } } - ] + } } } """ @@ -480,9 +474,8 @@ def test_validate_input_manifest_with_required_tags_in_several_datasets(self): twine_with_input_manifest_with_required_tags_for_multiple_datasets = """ { "input_manifest": { - "datasets": [ - { - "key": "first_dataset", + "datasets": { + "first_dataset": { "purpose": "A dataset containing meteorological mast data", "file_tags_template": { "type": "object", @@ -496,8 +489,7 @@ def test_validate_input_manifest_with_required_tags_in_several_datasets(self): } } }, - { - "key": "second_dataset", + "second_dataset": { "file_tags_template": { "type": "object", "properties": { @@ -510,7 +502,7 @@ def test_validate_input_manifest_with_required_tags_in_several_datasets(self): } } } - ] + } } } """ diff --git a/tests/test_twine.py b/tests/test_twine.py index 5389823..2e9824e 100644 --- a/tests/test_twine.py +++ b/tests/test_twine.py @@ -1,5 +1,4 @@ import os -import unittest from twined import Twine, exceptions from .base import BaseTestCase @@ -63,6 +62,25 @@ def test_broken_json_twine(self): with self.assertRaises(exceptions.InvalidTwineJson): Twine(source=invalid_json_twine) + def test_error_raised_if_datasets_not_given_as_dictionary_in_manifest_strands(self): + """Test that an error is raised if datasets are not given as a dictionary in the manifest strands.""" + for manifest_strand in ("configuration_manifest", "input_manifest", "output_manifest"): + with self.subTest(manifest_strand=manifest_strand): + invalid_twine = ( + """ + { + "%s": { + "datasets": [ + { + "key": "met_mast_data", + "purpose": "A dataset containing meteorological mast data" + } + ] + } + } + """ + % manifest_strand + ) -if __name__ == "__main__": - unittest.main() + with self.assertRaises(exceptions.InvalidTwineContents): + Twine(source=invalid_twine) diff --git a/twined/schema/twine_schema.json b/twined/schema/twine_schema.json index 6e94a2e..1a923b5 100644 --- a/twined/schema/twine_schema.json +++ b/twined/schema/twine_schema.json @@ -38,27 +38,23 @@ "type": "object", "properties": { "datasets": { - "type": "array", + "type": "object", "description": "A list of entries, each describing a dataset that should be attached to / made available to the digital twin", - "items": { - "type": "object", - "properties": { - "key": { - "description": "A textual key identifying this dataset within the application/twin", - "type": "string" - }, - "purpose": { - "description": "What data this dataset contains, eg 'the set of data files from the energy production calculation process'", - "type": "string", - "default": "" - }, - "file_tags_template": { - "$ref": "#/$defs/file_tags_template" + "patternProperties": { + ".+": { + "description": "A dataset description mapped to a textual key uniquely identifying the dataset within the application/twin", + "type": "object", + "properties": { + "purpose": { + "description": "What data this dataset contains, eg 'the set of data files from the energy production calculation process'", + "type": "string", + "default": "" + }, + "file_tags_template": { + "$ref": "#/$defs/file_tags_template" + } } - }, - "required": [ - "key" - ] + } } } }, diff --git a/twined/twine.py b/twined/twine.py index 3333dbf..7eec2e8 100644 --- a/twined/twine.py +++ b/twined/twine.py @@ -189,8 +189,8 @@ def _validate_dataset_file_tags(self, manifest_kind, manifest): # This is the manifest schema included in the twine.json file, not the schema for manifest.json files. manifest_schema = getattr(self, manifest_kind) - for dataset_schema in manifest_schema["datasets"]: - dataset = manifest["datasets"].get(dataset_schema["key"]) + for dataset_name, dataset_schema in manifest_schema["datasets"].items(): + dataset = manifest["datasets"].get(dataset_name) if not dataset: continue From c27ef2178d9fd93d48aad5f1f7703bde192ef79d Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 1 Mar 2022 12:28:04 +0000 Subject: [PATCH 06/10] OPS: Increase version number --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 439cb2b..79c367d 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name="twined", - version="0.1.3", + version="0.2.0", py_modules=[], install_requires=["jsonschema ~= 3.2.0", "python-dotenv"], url="https://www.github.com/octue/twined", From 8e702590c523a7ce6dc35a59ed079ab8b093799c Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 1 Mar 2022 12:40:27 +0000 Subject: [PATCH 07/10] ENH: Update description in manifest schema skipci --- twined/schema/manifest_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/twined/schema/manifest_schema.json b/twined/schema/manifest_schema.json index 808ce94..4bf3df2 100644 --- a/twined/schema/manifest_schema.json +++ b/twined/schema/manifest_schema.json @@ -31,7 +31,7 @@ "type": "string" }, "name": { - "description": "Name of the dataset", + "description": "Name of the dataset (the same as its key in the 'datasets' field).", "type": "string" }, "tags": { From 063d284a09026cab6fd6851be500c8f037d3e870 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 1 Mar 2022 13:51:15 +0000 Subject: [PATCH 08/10] ENH: Convert old manifest datasets format to new one if provided --- tests/test_manifest_strands.py | 23 +++++++++++++++++++---- twined/twine.py | 11 +++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/tests/test_manifest_strands.py b/tests/test_manifest_strands.py index ba7c1fa..01a511e 100644 --- a/tests/test_manifest_strands.py +++ b/tests/test_manifest_strands.py @@ -589,8 +589,10 @@ def test_error_raised_if_multiple_datasets_have_same_name(self): with self.assertRaises(KeyError): twine.validate_input_manifest(source=input_manifest) - def test_error_raised_if_datasets_not_given_as_dictionary(self): - """Test that an error is raised if datasets are not given as a dictionary.""" + def test_deprecation_warning_issued_and_datasets_format_translated_if_datasets_given_as_list(self): + """Test that, if datasets are given as a list (the old format), a deprecation warning is issued and the list + is translated to a dictionary (the new format). + """ input_manifest = """ { "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", @@ -608,5 +610,18 @@ def test_error_raised_if_datasets_not_given_as_dictionary(self): twine = Twine(source=self.TWINE_WITH_INPUT_MANIFEST_WITH_TAG_TEMPLATE) - with self.assertRaises(exceptions.InvalidManifestContents): - twine.validate_input_manifest(source=input_manifest) + with self.assertWarns(DeprecationWarning): + manifest = twine.validate_input_manifest(source=input_manifest) + + self.assertEqual( + manifest["datasets"], + { + "met_mast_data": { + "id": "7ead7669-8162-4f64-8cd5-4abe92509e19", + "name": "met_mast_data", + "tags": {}, + "labels": [], + "files": [], + } + }, + ) diff --git a/twined/twine.py b/twined/twine.py index 7eec2e8..14a33d1 100644 --- a/twined/twine.py +++ b/twined/twine.py @@ -1,6 +1,7 @@ import json as jsonlib import logging import os +import warnings import pkg_resources from dotenv import load_dotenv from jsonschema import ValidationError, validate as jsonschema_validate @@ -169,6 +170,16 @@ def _validate_manifest(self, kind, source, cls=None, **kwargs): inbound = False data = data.to_primitive() + if isinstance(data["datasets"], list): + data["datasets"] = {dataset["name"]: dataset for dataset in data["datasets"]} + warnings.warn( + message=( + "Datasets belonging to a manifest should be provided as a dictionary mapping their name to" + "themselves. Support for providing a list of datasets will be phased out soon." + ), + category=DeprecationWarning, + ) + self._validate_against_schema(kind, data) self._validate_dataset_file_tags(manifest_kind=kind, manifest=data) From eba4c082cc85c93f22f8cbfa973080e8176f90b4 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 1 Mar 2022 14:08:32 +0000 Subject: [PATCH 09/10] ENH: Convert old twine.json manifest strands to new format if given --- tests/test_twine.py | 22 +++++++++++++++++----- twined/twine.py | 23 ++++++++++++++++++----- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/tests/test_twine.py b/tests/test_twine.py index 2e9824e..0df9042 100644 --- a/tests/test_twine.py +++ b/tests/test_twine.py @@ -1,6 +1,6 @@ import os -from twined import Twine, exceptions +from twined import MANIFEST_STRANDS, Twine, exceptions from .base import BaseTestCase @@ -63,8 +63,10 @@ def test_broken_json_twine(self): Twine(source=invalid_json_twine) def test_error_raised_if_datasets_not_given_as_dictionary_in_manifest_strands(self): - """Test that an error is raised if datasets are not given as a dictionary in the manifest strands.""" - for manifest_strand in ("configuration_manifest", "input_manifest", "output_manifest"): + """Test that, if datasets are given as a list in the manifest strands, a deprecation warning is issued and the + list (the old form) is converted to a dictionary (the new form). + """ + for manifest_strand in MANIFEST_STRANDS: with self.subTest(manifest_strand=manifest_strand): invalid_twine = ( """ @@ -82,5 +84,15 @@ def test_error_raised_if_datasets_not_given_as_dictionary_in_manifest_strands(se % manifest_strand ) - with self.assertRaises(exceptions.InvalidTwineContents): - Twine(source=invalid_twine) + with self.assertWarns(DeprecationWarning): + twine = Twine(source=invalid_twine) + + self.assertEqual( + getattr(twine, manifest_strand)["datasets"], + { + "met_mast_data": { + "key": "met_mast_data", + "purpose": "A dataset containing meteorological mast data", + } + }, + ) diff --git a/twined/twine.py b/twined/twine.py index 14a33d1..5cb2255 100644 --- a/twined/twine.py +++ b/twined/twine.py @@ -53,14 +53,27 @@ def _load_twine(self, source=None): """Load twine from a *.json filename, file-like or a json string and validates twine contents.""" if source is None: # If loading an unspecified twine, return an empty one rather than raising error (like in _load_data()) - raw = {} + raw_twine = {} logger.warning("No twine source specified. Loading empty twine.") else: - raw = self._load_json("twine", source, allowed_kinds=("file-like", "filename", "string", "object")) + raw_twine = self._load_json("twine", source, allowed_kinds=("file-like", "filename", "string", "object")) + + for strand in set(MANIFEST_STRANDS) & raw_twine.keys(): + if isinstance(raw_twine[strand]["datasets"], list): + raw_twine[strand]["datasets"] = {dataset["key"]: dataset for dataset in raw_twine[strand]["datasets"]} + + warnings.warn( + message=( + f"Datasets in the {strand!r} strand of the `twine.json` file should be provided as a " + "dictionary mapping their name to themselves. Support for providing a list of datasets will be " + "phased out soon." + ), + category=DeprecationWarning, + ) - self._validate_against_schema("twine", raw) - self._validate_twine_version(twine_file_twined_version=raw.get("twined_version", None)) - return raw + self._validate_against_schema("twine", raw_twine) + self._validate_twine_version(twine_file_twined_version=raw_twine.get("twined_version", None)) + return raw_twine def _load_json(self, kind, source, **kwargs): """Load data from either a *.json file, an open file pointer or a json string. Directly returns any other data.""" From 38cb2dfeac6a09f2447aa2a4ecb5c5f46d326529 Mon Sep 17 00:00:00 2001 From: cortadocodes Date: Tue, 1 Mar 2022 14:56:42 +0000 Subject: [PATCH 10/10] ENH: Update JSON key description and test name skipci --- tests/test_twine.py | 2 +- twined/schema/twine_schema.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_twine.py b/tests/test_twine.py index 0df9042..d638a0d 100644 --- a/tests/test_twine.py +++ b/tests/test_twine.py @@ -62,7 +62,7 @@ def test_broken_json_twine(self): with self.assertRaises(exceptions.InvalidTwineJson): Twine(source=invalid_json_twine) - def test_error_raised_if_datasets_not_given_as_dictionary_in_manifest_strands(self): + def test_deprecation_warning_issued_if_datasets_not_given_as_dictionary_in_manifest_strands(self): """Test that, if datasets are given as a list in the manifest strands, a deprecation warning is issued and the list (the old form) is converted to a dictionary (the new form). """ diff --git a/twined/schema/twine_schema.json b/twined/schema/twine_schema.json index 1a923b5..06877a9 100644 --- a/twined/schema/twine_schema.json +++ b/twined/schema/twine_schema.json @@ -42,7 +42,7 @@ "description": "A list of entries, each describing a dataset that should be attached to / made available to the digital twin", "patternProperties": { ".+": { - "description": "A dataset description mapped to a textual key uniquely identifying the dataset within the application/twin", + "description": "A dataset representation whose property name/key uniquely identifies the dataset to the service", "type": "object", "properties": { "purpose": {