diff --git a/bindings/python/dlite-entity.i b/bindings/python/dlite-entity.i index a01b06d51..4b26293a8 100644 --- a/bindings/python/dlite-entity.i +++ b/bindings/python/dlite-entity.i @@ -841,7 +841,7 @@ environment variable). It is an error message if the instance cannot be found. -Note: seting `check_storages` to false is normally a good idea if calling +Note: setting `check_storages` to false is normally a good idea if calling this function from a storage plugin. Otherwise you may easily end up in an infinite recursive loop that will exhaust the call stack. ") dlite_swig_get_instance; diff --git a/bindings/python/dlite-jstore-python.i b/bindings/python/dlite-jstore-python.i new file mode 100644 index 000000000..b2d8b26eb --- /dev/null +++ b/bindings/python/dlite-jstore-python.i @@ -0,0 +1,263 @@ +/* -*- Python -*- (not really, but good for syntax highlighting) */ + +/* Python-spesific extensions to dlite-storage.i */ + +%pythoncode %{ +import json +from typing import Mapping, Sequence + + +def format_dict( + d, id=None, soft7=True, single=None, with_uuid=None, with_meta=False, + with_parent=True, urikey=False, +): + """Return a copy of `d` formatted according to the given options. + + Arguments: + d: Input dict. This should be a dict-representation of a DLite + instance. + id: If given, return dict-representation of this id. + Otherwise, return dict-representation of the store. + soft7: Whether to use soft7 formatting. + single: Whether to return in single-instance format. + If None, single-instance format is used for metadata and + multi-instance format for data instances. + with_uuid: Whether to include UUID in the dict. The default + is true if `single=True` and URI is None, otherwise it + is false. + with_meta: Whether to always include "meta" (even for metadata) + with_parent: Whether to include parent info for transactions. + urikey: Whether the URI is the preferred keys in multi-instance + format. + + Notes: + This method works with the dict-representation and does not + access instances. The only exception is when `d` corresponds to + a data instance who's dimensions is a list of dimension lengths. + In this case the metadata is needed to get dimension names. + + """ + if not id and single and "properties" not in d and len(d) != 1: + raise _dlite.DLiteLookupError( + "`id` must be given for `single=True` unless there is only one item" + ) + + if id and "properties" not in d: + uuid = _dlite.get_uuid(id) + key = id if id in d else uuid if uuid in d else None + if not key: + raise _dlite.DLiteLookupError(f"no such key in store: {id}") + return format_dict( + d[key], id=id, soft7=soft7, single=single, with_uuid=with_uuid, + with_meta=with_meta, with_parent=with_parent, urikey=urikey) + + dct = {} + + if "properties" not in d: + if single: + if len(d) != 1: + raise dlite.DLiteValueError( + "Not possible to return single-instance format, use `id`" + ) + else: + for k, v in d.items(): + vid = v.get("uri", v.get("identity", k)) + key = vid if urikey else v.get("uuid", _dlite.get_uuid(vid)) + dct[key] = format_dict( + v, id=k, soft7=soft7, single=True, with_uuid=with_uuid, + with_meta=with_meta, with_parent=with_parent + ) + return dct + + uri = d.get("uri", d.get("identity")) + uuid = d.get("uuid", _dlite.get_uuid(uri) if uri else None) + if id and not uuid: + if _dlite.get_uuid_version(id) == 5: + uri = id + uuid = _dlite.get_uuid(id) + metaid = d.get("meta", _dlite.ENTITY_SCHEMA) + ismeta = ( + "meta" not in d + or d["meta"] in (_dlite.ENTITY_SCHEMA, _dlite.BASIC_METADATA_SCHEMA) + or "properties" in d["properties"] + ) + + if single is None: + single = ismeta + if with_uuid is None: + with_uuid = single and not ("uri" in d or "identity" in d) + + if not uuid and (with_uuid or not single): + raise _dlite.DLiteTypeError("cannot infer UUID from dict") + + if with_uuid: + dct["uuid"] = uuid + if uri: + dct["uri"] = uri + if with_meta or metaid != _dlite.ENTITY_SCHEMA: + dct["meta"] = metaid + if ismeta and "description" in d: + dct["description"] = d["description"] + if with_parent and "parent" in d: + dct["parent"] = d["parent"].copy() + + dct["dimensions"] = {} if soft7 or not ismeta else [] + if "dimensions" in d: + if isinstance(d["dimensions"], Mapping): + if soft7 or not ismeta: + dct["dimensions"].update(d["dimensions"]) + else: + for k, v in d["dimensions"].items(): + dct["dimensions"].append({"name": k, "description": v}) + elif isinstance(d["dimensions"], Sequence): + if soft7 and ismeta: + for dim in d["dimensions"]: + dct["dimensions"][dim["name"]] = dim.get("description", "") + elif ismeta: + dct["dimensions"].extend(d["dimensions"]) + else: + meta = get_instance(metaid) + for name, value in zip(meta.dimnames(), d["dimensions"]): + dct["dimensions"][name] = value + else: + raise dlite.DLiteValueError( + "'dimensions' must be a mapping or sequence, got: " + f"{type(d['dimensions'])}" + ) + + dct["properties"] = {} if soft7 or not ismeta else [] + if isinstance(d["properties"], Mapping): + if soft7 or not ismeta: + dct["properties"].update(d["properties"]) + else: + for k, v in d["properties"].items(): + prop = {"name": k} + prop.update(v) + dct["properties"].append(prop) + elif isinstance(d["properties"], Sequence): + if not ismeta: + raise dlite.DLiteValueError( + "only metadata can have a sequence of properties" + ) + if soft7: + for prop in d["properties"]: + p = prop.copy() + name = p.pop("name") + dct["properties"][name] = p + else: + dct["properties"].extend(d["properties"]) + else: + raise dlite.DLiteValueError( + "'properties' must be a mapping or sequence, got: " + f"{type(d['properties'])}" + ) + + if "relations" in d: + dct["relations"] = d["repations"].copy() + + if single: + return dct + return {uri if uri and urikey else uuid: dct} + + +%} + + +%extend _DLiteJStoreIter { + %pythoncode %{ + def __next__(self): + id = self.next() + if id: + return id + raise StopIteration() + + def __iter__(self): + return self + %} +} + + +%extend _JStore { + %pythoncode %{ + def get(self, id=None): + """Return instance with given `id` from store. + + If `id` is None and there is exactly one instance in the store, + return the instance. Otherwise raise an DLiteLookupError. + """ + inst = self._get(id=id) + return instance_cast(inst) + + def load_dict(self, d, id=None): + """Load dict representation of instance to the store.""" + if "properties" not in d: + if id: + self.load_dict(d[id], id=id) + else: + for id, val in d.items(): + self.load_dict(val, id=id) + return + + d = d.copy() + uuid = None + if "uuid" in d: + uuid = d["uuid"] + elif "uri" in d or "identity" in d: + uuid = _dlite.get_uuid(d.get("uri", d.get("identity"))) + + if id and uuid and _dlite.get_uuid(id) != uuid: + raise _dlite.DLiteInconsistentDataError( + f"id '{id}' is not consistent with existing uuid: {uuid}" + ) + elif not id and not uuid: + raise _dlite.DLiteValueError( + "`id` argument is required when dict has no 'uuid', 'uri' " + "or 'identity' key" + ) + elif not uuid: + assert id + uuid = _dlite.get_uuid(id) + + assert uuid + d.setdefault("uuid", uuid) + if id and id != uuid: + d.setdefault("uri", id) + + self.load_json(json.dumps(d)) + + def get_dict(self, id=None, soft7=True, single=None, with_uuid=None, + with_meta=False, with_parent=True, urikey=False): + """Return dict representation of the store or item with given id. + + Arguments: + id: If given, return dict-representation of this id. + Otherwise, return dict-representation of the store. + soft7: Whether to use soft7 formatting. + single: Whether to return in single-instance format. + If None, single-instance format is used for metadata and + multi-instance format for data instances. + with_uuid: Whether to include UUID in the dict. The default + is true if `single=True` and URI is None, otherwise it + is false. + with_meta: Whether to always include "meta" (even for metadata) + with_parent: Whether to include parent info for transactions. + urikey: Whether the URI is the preferred keys in multi-instance + format. + + """ + d = {} + if id: + d[id] = json.loads(self.get_json(id)) + else: + if single is None: + single = False + for _id in self.get_ids(): + d[_id] = json.loads(self.get_json(_id)) + + return format_dict( + d, id=id, soft7=soft7, single=single, with_uuid=with_uuid, + with_meta=with_meta, with_parent=with_parent, urikey=urikey + ) + + %} +} diff --git a/bindings/python/dlite-jstore.i b/bindings/python/dlite-jstore.i new file mode 100644 index 000000000..ac462e9a2 --- /dev/null +++ b/bindings/python/dlite-jstore.i @@ -0,0 +1,132 @@ +/* -*- C -*- (not really, but good for syntax highlighting) */ + +%{ +#include "dlite.h" +#include "dlite-errors.h" +#include "dlite-json.h" + + /* If store `js` only has one instance, return its id, otherwise raise a + DLiteLookupError. */ + static const char *_single_id(JStore *js) + { + const char *key = jstore_get_single_key(js); + if (key) return key; + return dlite_err(dliteLookupError, + "get_single() expect exactly 1 item in the storage. Got %d", + jstore_count(js)), NULL; + } +%} + + +/* JStore iterator */ +struct _DLiteJStoreIter {}; +%feature("docstring", "\ +Iterates over instances in JSON store `js`. If `pattern` is given, only +instances whos metadata URI matches `pattern` are returned. +") _DLiteJStoreIter; +%extend _DLiteJStoreIter { + _DLiteJStoreIter(struct _JStore *js, const char *pattern=NULL) { + return dlite_jstore_iter_create(js, pattern); + } + ~_DLiteJStoreIter(void) { + dlite_jstore_iter_free($self); + } + + const char *next(void) { + return dlite_jstore_iter_next($self); + } +} + + +/* JStore */ +struct _JStore {}; +%feature("docstring", "Store for JSON data.") _JStore; +%rename(JStore) _JStore; +%extend _JStore { + _JStore(void) { + return jstore_open(); + } + ~_JStore(void) { + jstore_close($self); + } + + %feature("docstring", "Remove instance with given `id`.") remove; + void remove(const char *id) { + dlite_jstore_remove($self, id); + } + + %feature("docstring", "Load JSON content from file into the store.") loadf; + void load_file(const char *INPUT, size_t LEN) { + dlite_jstore_loadf($self, INPUT); + } + + %feature("docstring", + "Load JSON string into the store. " + "Beware that this function has no validation of the input.") loads; + void load_json(const char *INPUT, size_t LEN) { + dlite_jstore_loads($self, INPUT, LEN); + } + + %feature("docstring", "Add json representation of `inst` to store.") add; + void add(const struct _DLiteInstance *inst) { + dlite_jstore_add($self, inst, 0); + } + + %feature("docstring", + "Return instance with given `id` from store. " + "If `id` is None and there is exactly one instance in the store, " + "return the instance. Otherwise raise an DLiteLookupError.") get; + %newobject get; + struct _DLiteInstance *_get(const char *id=NULL) { + if (!id && !(id = _single_id($self))) return NULL; + return dlite_jstore_get($self, id); + } + + %feature("docstring", "Return JSON string for given id from store.") get_json; + const char *get_json(const char *id) { + const char *s; + if (!id && !(id = _single_id($self))) return NULL; + if (!(s = jstore_get($self, id))) { + char uuid[DLITE_UUID_LENGTH+1]; + if (dlite_get_uuid(uuid, id) < 0) return NULL; + s = jstore_get($self, uuid); + } + return s; + } + + %feature("docstring", + "If there is one instance in storage, return its id. " + "Otherwise, raise an DLiteLookupError exception.") get_single_id; + %newobject get_single_id; + const char *get_single_id(void) { + return _single_id($self); + } + + %feature("docstring", "Iterate over all id's matching pattern.") get_ids; + struct _DLiteJStoreIter *get_ids(const char *pattern=NULL) { + return dlite_jstore_iter_create($self, pattern); + } + + int __len__(void) { + return jstore_count($self); + } + + bool __bool__(void) { + return jstore_count($self) > 0; + } + + struct _JStore *__iadd__(struct _JStore *other) { + if (jstore_update($self, other)) + return dlite_err(dliteTypeError, "Cannot update store"), NULL; + return $self; + } + +} + + +/* ----------------------------------- + * Target language-spesific extensions + * ----------------------------------- */ +#ifdef SWIGPYTHON +%include "dlite-jstore-python.i" +#endif diff --git a/bindings/python/dlite-python.i b/bindings/python/dlite-python.i index 38870a21c..043621b75 100644 --- a/bindings/python/dlite-python.i +++ b/bindings/python/dlite-python.i @@ -1175,8 +1175,11 @@ PyObject *dlite_run_file(const char *path, PyObject *globals, PyObject *locals) %typemap("doc") (const char *INPUT, size_t LEN) "string" %typemap(in, numinputs=1) (const char *INPUT, size_t LEN) (Py_ssize_t tmp) { - $1 = (char *)PyUnicode_AsUTF8AndSize($input, &tmp); + PyObject *str = PyObject_Str($input); + if (!str) SWIG_exception(SWIG_TypeError, "Cannot get string representation"); + $1 = (char *)PyUnicode_AsUTF8AndSize(str, &tmp); $2 = tmp; + Py_DECREF(str); } /* Array of input dimensions */ diff --git a/bindings/python/dlite.i b/bindings/python/dlite.i index 1043d99a0..d679c6bb7 100644 --- a/bindings/python/dlite.i +++ b/bindings/python/dlite.i @@ -113,3 +113,4 @@ %include "dlite-path.i" %include "dlite-mapping.i" %include "dlite-behavior.i" +%include "dlite-jstore.i" diff --git a/bindings/python/scripts/CMakeLists.txt b/bindings/python/scripts/CMakeLists.txt index 95dc9d7ea..0f2a7dad8 100644 --- a/bindings/python/scripts/CMakeLists.txt +++ b/bindings/python/scripts/CMakeLists.txt @@ -44,6 +44,7 @@ test_success( dlite-validate-Person dlite-validate ../tests/entities/Person.json ) +# Storage with multiple instances test_success( dlite-validate-persons dlite-validate @@ -58,14 +59,17 @@ test_success( --id=Ada --show ../tests/input/persons.json - ) - -# No --id for storage with multiple instances -test_failure( - dlite-validate-persons-fail2 +) +test_success( + dlite-validate-persons3 dlite-validate - --storage-path ../tests/entities/Person.json - --show ../tests/input/persons.json + --storage-path ../tests/entities + --show + ../tests/input/persons.json +) +test_success( + dlite-validate-multiple-instances + dlite-validate ../tests/input/test_ref_type.json ) # The "dimensions" keyword is required diff --git a/bindings/python/scripts/dlite-validate b/bindings/python/scripts/dlite-validate index b063cc750..b9ddeb302 100644 --- a/bindings/python/scripts/dlite-validate +++ b/bindings/python/scripts/dlite-validate @@ -16,9 +16,10 @@ def parse(url, driver=None, options="mode=r", id=None): driver: The name of storage plugin to use. options: Options passed to the storage plugin. id: ID of instance to load if the storage contains multiple instances. + The default is to load all instances. Returns: - A new instance. + List of new instances. """ import dlite @@ -26,13 +27,26 @@ def parse(url, driver=None, options="mode=r", id=None): if driver is None: driver = Path(loc).suffix.lstrip('.').lower() + # Validate all instances if `id` is None + if id is None: + try: + uuids = dlite.Instance.get_uuids(driver, url, options=options) + #except dlite.DLiteError: + except dlite.DLiteUnsupportedError: + pass + else: + insts = [] + for uuid in uuids: + insts.extend(parse(url, driver=driver, options=options, id=uuid)) + return insts + match = re.match(r'^([a-zA-Z][a-zA-Z0-9+.-]*)://', url) if match and match.groups()[0].lower() != 'file': if options: url += f"?{options}" if id: url += f"#{id}" - return dlite.Instance.from_url(url) + return [dlite.Instance.from_url(url)] else: path = Path(loc.split(":", 1)[1] if match else loc).resolve() @@ -42,9 +56,9 @@ def parse(url, driver=None, options="mode=r", id=None): with open(path, 'rt') as f: json.load(f) - return dlite.Instance.from_location( + return [dlite.Instance.from_location( driver, path, options=options, id=id, - ) + )] def check_dimensions(url, meta): @@ -115,7 +129,7 @@ def main(): dlite.storage_path.append(Path(path).resolve()) try: - inst = parse(args.url, args.driver, args.options, args.id) + insts = parse(args.url, args.driver, args.options, args.id) except Exception as exc: if args.debug: raise @@ -123,13 +137,17 @@ def main(): print(f'{args.url}: {exc.__class__.__name__}: {exc}') sys.exit(1) - if inst.is_meta: - check_dimensions(args.url, inst) + for inst in insts: + if inst.is_meta: + check_dimensions(args.url, inst) print(f'{args.url}: validated') if (args.show): - print(inst) + for inst in insts: + if len(insts) > 1: + print("="*79) + print(inst) if __name__ == '__main__': diff --git a/bindings/python/tests/CMakeLists.txt b/bindings/python/tests/CMakeLists.txt index 7166eb388..9bdf0e631 100644 --- a/bindings/python/tests/CMakeLists.txt +++ b/bindings/python/tests/CMakeLists.txt @@ -31,6 +31,7 @@ set(tests test_isolated_plugins test_options test_plugin + test_jstore ) foreach(test ${tests}) diff --git a/bindings/python/tests/test_jstore.py b/bindings/python/tests/test_jstore.py new file mode 100644 index 000000000..2f543f4bf --- /dev/null +++ b/bindings/python/tests/test_jstore.py @@ -0,0 +1,208 @@ +"""Test JStore.""" +from pathlib import Path + +import dlite +from dlite.testutils import raises + + +thisdir = Path(__file__).resolve().parent +outdir = thisdir / "output" +indir = thisdir / "input" +entitydir = thisdir / "entities" + +dlite.storage_path.append(entitydir) + + +# Test format_dict(), arg: soft7 +D1 = { # soft7 representation + "uri": "http://onto-ns.com/meta/ex/0.2/Test", + "dimensions": {"n": "number of something"}, + "properties": { + "a": {"type": "string"}, + "b": {"type": "float64", "shape": ["n"]}, + }, +} +D2 = { # old (array) representation + "uri": "http://onto-ns.com/meta/ex/0.2/Test", + "dimensions": [{"name": "n", "description": "number of something"}], + "properties": [ + {"name": "a", "type": "string"}, + {"name": "b", "type": "float64", "shape": ["n"]}, + ], +} +assert dlite.format_dict(D1, soft7=True) == D1 +assert dlite.format_dict(D1, soft7=False) == D2 +assert dlite.format_dict(D2, soft7=True) == D1 +assert dlite.format_dict(D2, soft7=False) == D2 + +# soft7 representation. This is identical to the old representation for +# data instances +d1 = { + "uuid": "d6a1c1db-44b6-5b87-b815-83f1127395b6", + "meta": "http://onto-ns.com/meta/ex/0.2/Test", + "dimensions": {"n": 3}, + "properties": { + "a": "hello", + "b": [1.1, 2.2, 3.3], + }, +} +d2 = { # old representation + "uuid": "d6a1c1db-44b6-5b87-b815-83f1127395b6", + "meta": "http://onto-ns.com/meta/ex/0.2/Test", + "dimensions": {"n": 3}, + "properties": { + "a": "hello", + "b": [1.1, 2.2, 3.3], + }, +} +d3 = { + "uuid": "d6a1c1db-44b6-5b87-b815-83f1127395b6", + "meta": "http://onto-ns.com/meta/ex/0.2/Test", + "dimensions": [3], + "properties": { + "a": "hello", + "b": [1.1, 2.2, 3.3], + }, +} +assert dlite.format_dict(d1, soft7=True, single=True) == d1 +assert dlite.format_dict(d1, soft7=False, single=True) == d2 +assert dlite.format_dict(d2, soft7=True, single=True) == d1 +assert dlite.format_dict(d2, soft7=False, single=True) == d2 + +# Test format_dict(), dimension as list of numbers - need metadata +js = dlite.JStore() +js.load_dict(D1) +meta = js.get() +assert dlite.format_dict(d3, soft7=True, single=True) == d1 +assert dlite.format_dict(d3, soft7=False, single=True) == d2 + +# Test format_dict(), arg: single +uuid_D1 = dlite.get_uuid(D1["uri"]) +assert dlite.format_dict(D1) == D1 +assert dlite.format_dict(D1, single=True) == D1 +assert dlite.format_dict(D1, single=False) == {uuid_D1: D1} + +d1_nouuid = d1.copy() +del d1_nouuid["uuid"] +d1_multi = {d1["uuid"]: d1_nouuid} +assert dlite.format_dict(d1, single=None) == d1_multi +assert dlite.format_dict(d1, single=True) == d1 +assert dlite.format_dict(d1, single=False) == d1_multi + +# Test format_dict(), arg: with_uuid +assert dlite.format_dict(d1, single=None, with_uuid=True) == {d1["uuid"]: d1} +assert dlite.format_dict(d1, single=True, with_uuid=True) == d1 +assert dlite.format_dict(d1, single=False, with_uuid=True) == {d1["uuid"]: d1} + +assert dlite.format_dict(d1, single=None, with_uuid=False) == d1_multi +assert dlite.format_dict(d1, single=True, with_uuid=False) == d1_nouuid +assert dlite.format_dict(d1, single=False, with_uuid=False) == d1_multi + +# Test format_dict(), arg: with_meta +D1_meta = D1.copy() +D1_meta["meta"] = dlite.ENTITY_SCHEMA +assert dlite.format_dict(D1, single=True, with_meta=True) == D1_meta +assert dlite.format_dict(D1, single=True, with_meta=False) == D1 +assert dlite.format_dict(D1, single=False, with_meta=True) == {uuid_D1: D1_meta} +assert dlite.format_dict(D1, single=False, with_meta=False) == {uuid_D1: D1} + +d1_nometa = d1.copy() +del d1_nometa["meta"] +assert dlite.format_dict(d1, single=True, with_meta=True) == d1 +assert dlite.format_dict(d1, single=True, with_meta=False) == d1 +assert dlite.format_dict(d1, single=False, with_meta=True) == d1_multi +assert dlite.format_dict(d1, single=False, with_meta=False) == d1_multi + +# Test format_dict(), arg: urikey +assert dlite.format_dict(D1, single=True, urikey=True) == D1 +assert dlite.format_dict(D1, single=False, urikey=True) == {D1["uri"]: D1} +assert dlite.format_dict(D1, single=False, urikey=False) == {uuid_D1: D1} +assert dlite.format_dict(d1, single=False, urikey=True) == d1_multi +assert dlite.format_dict(d1, single=False, urikey=False) == d1_multi + +# Test format_dict(), arg: id +d4 = {"inst1": d1_nouuid} +d5 = { + "inst1": d1, + "inst2": d1_nouuid, + "inst3": d1_nouuid, +} +d1_uri = d1.copy() +d1_uri["uri"] = "inst1" +d1_uri_nouuid = d1_uri.copy() +del d1_uri_nouuid["uuid"] +d1_uri_multi = {d1["uuid"]: d1_uri_nouuid} +assert dlite.format_dict(d4, id="inst1", single=True) == d1_uri +assert dlite.format_dict(d4, id="inst1", single=None) == d1_uri_multi +assert dlite.format_dict(d4, id="inst1", single=False) == d1_uri_multi +assert dlite.format_dict(d4) == {d1["uuid"]: d1_uri} +with raises(dlite.DLiteLookupError): + dlite.format_dict(d4, id="noexisting") + +# FIXME - make sure that the uri is included +assert dlite.format_dict(d5, id="inst1", single=True) == d1 +assert dlite.format_dict(d5, id="inst1", single=None) == d1_multi +assert dlite.format_dict(d5, id="inst1", single=False) == d1_multi +# assert dlite.format_dict(d5, id="inst1", single=True) == d1_uri +# assert dlite.format_dict(d5, id="inst1", single=None) == d1_uri_multi +# assert dlite.format_dict(d5, id="inst1", single=False) == d1_uri_multi +# assert len(dlite.format_dict(d5)) == 3 +with raises(dlite.DLiteLookupError): + dlite.format_dict(d5, id="noexisting") +with raises(dlite.DLiteLookupError): + dlite.format_dict(d5, single=True) + + +# Test JStore +js = dlite.JStore() + +with raises(dlite.DLiteLookupError): + js.get() + +js.load_file(indir / "blob.json") +assert len(js) == 1 +inst = js.get() +key = next(js.get_ids()) + +js.load_file(indir / "persons.json") +assert len(js) == 6 + +js.remove(key) +assert len(js) == 5 + +with raises(dlite.DLiteLookupError): + js.get() + +metaid = "http://onto-ns.com/meta/0.1/Person" +jon = js.get_dict(id="028217b9-2f64-581d-9712-a5b67251bfec", single=None) +assert "028217b9-2f64-581d-9712-a5b67251bfec" in jon +assert jon["028217b9-2f64-581d-9712-a5b67251bfec"]["meta"] == metaid + +cleo = js.get_dict(id="Cleopatra", single=True) +assert cleo["uuid"] == dlite.get_uuid("Cleopatra") +assert cleo["meta"] == "http://onto-ns.com/meta/0.1/SimplePerson" + +js.load_dict(D1) +js.load_dict(d1) +assert len(js) == 7 + +# Reloading existing will just replace +js.load_dict(d1) +assert len(js) == 7 + + +js2 = dlite.JStore() +js2.load_dict(D1) +js2.load_dict(d1) +assert len(js2) == 2 + +dct = js2.get_dict() +assert len(dct) == 2 + + + + +key1 = next(js.get_ids()) +# d1 = js.get_dict(key1) +s1 = js.get_json(key1) +inst1 = js.get(key1) diff --git a/bindings/python/tests/test_python_storage.py b/bindings/python/tests/test_python_storage.py index 189600dce..f7f877e62 100644 --- a/bindings/python/tests/test_python_storage.py +++ b/bindings/python/tests/test_python_storage.py @@ -145,7 +145,9 @@ def equal_rdf_files(path1, path2): print("Test saving metadata...") with dlite.Storage( - "yaml", meta_outfile, "mode=w;uuid=false;single=true" + "yaml", + meta_outfile, + "mode=w;uuid=false;single=false;with_uuid=false;with_meta=true" ) as s: s.save(meta) with open(meta_infile, "r") as f: @@ -163,7 +165,7 @@ def equal_rdf_files(path1, path2): print("...Loading data ok!") print("Test saving data...") - with dlite.Storage("yaml", data_outfile, "mode=w;single=true") as s: + with dlite.Storage("yaml", data_outfile, "mode=w;with_uuid=false") as s: s.save(inst1) s.save(inst2) with open(data_infile, "r") as f: diff --git a/bindings/python/tests/test_storage.py b/bindings/python/tests/test_storage.py index 681bb02fd..30efc99d2 100755 --- a/bindings/python/tests/test_storage.py +++ b/bindings/python/tests/test_storage.py @@ -131,9 +131,12 @@ - `r`: Open existing `location` for reading. - `w`: Open for writing. If `location` exists, it is truncated. - `soft7`: Whether to save using SOFT7 format. - - `single`: Whether the input is assumed to be in single-entity form. - If "auto" (default) the form will be inferred automatically. + - `single`: Whether to save in single-instance form. - `with_uuid`: Whether to include UUID when saving. + - with_meta: Whether to always include "meta" (even for metadata) + - with_parent: Whether to include parent info for transactions. + - urikey: Whether the URI is the preferred keys in multi-instance + format. """ s = dlite.Storage( "yaml", outdir / "test_storage_inst.yaml", options="mode=a" diff --git a/bindings/python/tests/test_utils.py b/bindings/python/tests/test_utils.py index 12affe4bd..fcdc89519 100644 --- a/bindings/python/tests/test_utils.py +++ b/bindings/python/tests/test_utils.py @@ -5,6 +5,7 @@ import dlite from dlite.utils import ( + DictStore, instance_from_dict, to_metadata, infer_dimensions, @@ -190,3 +191,12 @@ ref.refs = [ref] dims = infer_dimensions(meta=Ref, values=ref.asdict(single=True)["properties"]) assert dims == {"nitems": 2, "nrefs": 1} + + +# Test DictStore +ds = DictStore() +ds.add({ + "meta": "http://onto-ns.com/meta/0.1/Collection", + "dimensions": [0], + "properties": {"relations": []}, +}) diff --git a/bindings/python/utils.py b/bindings/python/utils.py index 87eebc359..fe4bd2872 100644 --- a/bindings/python/utils.py +++ b/bindings/python/utils.py @@ -60,6 +60,103 @@ def uncaught_exception_hook(exetype, value, trace): sys.excepthook, oldhook = uncaught_exception_hook, sys.excepthook +class DictStore(dict): + """A dict subclass providing a simple store for dict-representations + of instances (including metadata). + + The internal layout follows the multi-instance representation, + where each top-level key-value pair is an UUID mapped to a + dict-representation of an instance. Metadata is represented in + soft7 format. + + Arguments: + args: One or more to initialise the store with. + + """ + def __init__(self, *args): + for arg in args: + self.add(arg) + + def add(self, d, id=None): + """Add dict-representation `d` of an instance to the store. + + Arguments: + d: A dict to add to the store. + id: Optional `id` of `d`. The `id` must be consistent with + any 'uuid', 'uri' or 'identity' key. This option is + useful in the case `d` doesn't have an 'uuid', 'uri' or + 'identity' key. + + Notes: + The layout of `d` may be any of the supported representations + for data instances or metadata, including any combination of + single/multi-instance and soft7 representations. The internal + layout is always multi-instance soft7. + """ + if "properties" not in d: + # `d` is in multi-instance format, add values separately + for v in d.values(): + self.add(v) + return + + if "uuid" in d: + uuid = d["uuid"] + else: + uuid = dlite.get_uuid(d.get("uri", d.get("identiry"))) + + if id and uuid and dlite.get_uuid(id) != uuid: + raise dlite.DLiteInconsistentDataError( + f"id '{id}' is not consistent with existing uuid: {uuid}" + ) + elif not id and not uuid: + raise dlite.DLiteValueError( + "`id` argument is required when dict has no 'uuid', 'uri' " + "or 'identity' key" + ) + elif not uuid: + uuid = dlite.get_uuid(id) + + ismeta = ( + "meta" not in d + or d["meta"] in (dlite.ENTITY_SCHEMA, dlite.BASIC_METADATA_SCHEMA) + or "properties" in d.get("properties", ()) + ) + + dct = {} + dct["dimensions"] = {} + if "dimensions" in d: + if isinstance(d["dimensions"], Mapping): + dct["dimensions"].update(d["dimensions"]) + elif isinstance(d["dimensions"], Sequence): + meta = dlite.get_instance(d.get("meta", dlite.ENTITY_SCHEMA)) + for name, v in zip(meta.dimnames(), d["dimensions"]): + dct["dimensions"][name] = v + else: + raise dlite.DLiteValueError( + "'dimensions' must be a mapping or sequence, got: " + f"{type(d['dimensions'])}" + ) + + dct["properties"] = {} + if isinstance(d["properties"], Mapping): + dct["properties"].update(d["properties"]) + elif isinstance(d["properties"], Sequence): + if not ismeta: + raise dlite.DLiteValueError("properties can only metadata ") + for k, v in d["properties"]: + dct["properties"][k] = v + else: + raise dlite.DLiteValueError( + "'properties' must be a mapping or sequence, got: " + f"{type(d['properties'])}" + ) + + if "relations" in d: + dct["relations"] = d["repations"].copy() + + self[uuid] = dct + + def instance_from_dict(d, id=None, single=None, check_storages=True): """Returns a new DLite instance created from dict. diff --git a/src/dlite-json.c b/src/dlite-json.c index f906524ae..612a9ebfd 100644 --- a/src/dlite-json.c +++ b/src/dlite-json.c @@ -10,6 +10,7 @@ #include "utils/compat.h" #include "utils/strutils.h" +#include "getuuid.h" #include "dlite.h" #include "dlite-macros.h" #include "dlite-json.h" @@ -489,6 +490,8 @@ static char *get_uri(const char *src, const jsmntok_t *obj) const jsmntok_t *t, *t1, *t2, *t3; if ((t = jsmn_item(src, obj, "uri"))) return strndup(src + t->start, t->end - t->start); + if ((t = jsmn_item(src, obj, "identity"))) + return strndup(src + t->start, t->end - t->start); if ((t1 = jsmn_item(src, obj, "name")) && (t2 = jsmn_item(src, obj, "version")) && (t3 = jsmn_item(src, obj, "namespace"))) { @@ -526,6 +529,21 @@ static char *get_meta_uri(const char *src, const jsmntok_t *obj) return buf; } +/* Writes the UUID of the instance represented by `obj` to `uuid`. + Returns 0 on success, 1 if UUID is not found and -1 otherwise. */ +static int get_uuid(char uuid[DLITE_UUID_LENGTH+1], const char *src, + const jsmntok_t *obj) +{ + const jsmntok_t *item; + if (!(item = jsmn_item(src, (jsmntok_t *)obj, "uuid"))) return 1; + if (item->end - item->start != DLITE_UUID_LENGTH) + return err(dliteParseError, "UUID should have length %d, got %d", + item->end - item->start, DLITE_UUID_LENGTH); + if (dlite_get_uuidn(uuid, src+item->start, item->end-item->start) < 0) + return -1; + return 0; +} + /* Writes the UUID of the metadata of the instance represented by `obj` to `uuid`. Returns non-zero on error. */ static int get_meta_uuid(char uuid[DLITE_UUID_LENGTH+1], const char *src, @@ -1193,10 +1211,20 @@ DLiteJsonFormat dlite_jstore_loads(JStore *js, const char *src, int len) if ((format = dlite_json_check(src, tokens, NULL, &flags)) < 0) goto fail; if (flags & dliteJsonSingle) { - if (!(uri = get_uri(src, tokens))) - FAIL2("missing uri in single-entity formatted json data: \"%.30s%s\"", - src, dots); - if (dlite_get_uuid(uuid, uri) < 0) goto fail; + uuid[0] = '\0'; + if (get_uuid(uuid, src, tokens) < 0) goto fail; + if (!(uri = get_uri(src, tokens)) && !uuid[0]) + FAILCODE2(dliteParseError, + "missing UUID and URI in json data: \"%.30s%s\"", src, dots); + if (uri) { + char uuid2[DLITE_UUID_LENGTH+1]; + if (dlite_get_uuid(uuid2, uri) < 0) goto fail; + if (uuid[0] && strcmp(uuid, uuid2)) + FAILCODE2(dliteParseError, + "inconsistent URI and UUID in json data: uri=%s, uuid=%s", + uri, uuid); + if (!uuid[0]) strncpy(uuid, uuid2, sizeof(uuid)); + } jstore_addn(js, uuid, DLITE_UUID_LENGTH, src, len); } else { jsmntok_t *t = tokens + 1; @@ -1266,6 +1294,27 @@ int dlite_jstore_remove(JStore *js, const char *id) return jstore_remove(js, id); } +/* + Returns instance with given id from json store `js` or NULL on error. +*/ +DLiteInstance *dlite_jstore_get(JStore *js, const char *id) +{ + char uuid[DLITE_UUID_LENGTH+1]; + const char *buf=NULL, *scanid=id; + int uuidver = dlite_get_uuid(uuid, id); + if (uuidver < 0 || uuidver == UUID_RANDOM) + return errx(dliteKeyError, "cannot derive UUID from id: '%s'", id), NULL; + if (!(buf = jstore_get(js, uuid)) && + !(buf = jstore_get(js, id))) + return errx(dliteKeyError, "no such id in store: '%s'", id), NULL; + + /* If `id` is an UUID, check if `id` has been associated with a label */ + if ((uuidver == UUID_COPY || uuidver == UUID_EXTRACT) && + !(scanid = jstore_get_label(js, id))) scanid = id; + + return dlite_json_sscan(buf, scanid, NULL); +} + /* Initiate iterator `init` from json store `js`. If `metaid` is provided, the iterator will only iterate over instances diff --git a/src/dlite-json.h b/src/dlite-json.h index ff3310cc0..6467ba091 100644 --- a/src/dlite-json.h +++ b/src/dlite-json.h @@ -294,6 +294,11 @@ int dlite_jstore_add(JStore *js, const DLiteInstance *inst, */ int dlite_jstore_remove(JStore *js, const char *id); +/** + Returns instance with given id from json store `js` or NULL on error. +*/ +DLiteInstance *dlite_jstore_get(JStore *js, const char *id); + /** Initiate iterator `init` from json store `js`. If `metaid` is provided, the iterator will only iterate over instances diff --git a/src/dlite.h b/src/dlite.h index 41944f6e8..e21e614c1 100644 --- a/src/dlite.h +++ b/src/dlite.h @@ -6,6 +6,11 @@ @brief Main header file for dlite which import main headers */ +#ifdef HAVE_CONFIG +#include "config.h" +#endif + + #ifndef HAVE_DLITE #define HAVE_DLITE #endif diff --git a/src/utils/jstore.c b/src/utils/jstore.c index fc83b14c5..8f0aa9710 100644 --- a/src/utils/jstore.c +++ b/src/utils/jstore.c @@ -318,6 +318,24 @@ int jstore_to_file(JStore *js, const char *filename) return (n == 1) ? 0 : 1; } +/* Return number of elements in the store. */ +int jstore_count(JStore *js) +{ + int n=0; + map_iter_t iter = map_iter(&js->store); + while (map_next(&js->store, &iter)) n++; + return n; +} + +/* If there is one item in the store, return its key. Otherwise return NULL. */ +const char *jstore_get_single_key(JStore *js) +{ + map_iter_t iter = map_iter(&js->store); + const char *key = map_next(&js->store, &iter); + if (key && !map_next(&js->store, &iter)) return key; + return NULL; +} + /* Initialise iterator. Return non-zero on error. */ int jstore_iter_init(JStore *js, JStoreIter *iter) { diff --git a/src/utils/jstore.h b/src/utils/jstore.h index 5b51a6aa5..0209f7446 100644 --- a/src/utils/jstore.h +++ b/src/utils/jstore.h @@ -123,6 +123,12 @@ char *jstore_to_string(JStore *js); Returns non-zero on error. */ int jstore_to_file(JStore *js, const char *filename); +/** Return number of elements in the store. */ +int jstore_count(JStore *js); + +/** If there is one item in the store, return its key. Otherwise return NULL. */ +const char *jstore_get_single_key(JStore *js); + /** Initialise iterator. Return non-zero on error. */ int jstore_iter_init(JStore *js, JStoreIter *iter); diff --git a/storages/python/dlite-plugins-python.c b/storages/python/dlite-plugins-python.c index cf940c355..d9b99a31b 100644 --- a/storages/python/dlite-plugins-python.c +++ b/storages/python/dlite-plugins-python.c @@ -502,7 +502,8 @@ int iterNext(void *iter, char *buf) if (!PyUnicode_Check(next)) FAIL1("generator method %s.query() should return a string", i->classname); if (!(uuid = PyUnicode_AsUTF8(next)) || strlen(uuid) != DLITE_UUID_LENGTH) - FAIL1("generator method %s.query() should return a uuid", i->classname); + FAIL2("generator method %s.query() should return an UUID, got: '%s'", + i->classname, uuid); memcpy(buf, uuid, DLITE_UUID_LENGTH+1); retval = 0; } else { diff --git a/storages/python/python-storage-plugins/yaml.py b/storages/python/python-storage-plugins/yaml.py index 22e8cb6b6..e7b87442e 100644 --- a/storages/python/python-storage-plugins/yaml.py +++ b/storages/python/python-storage-plugins/yaml.py @@ -28,38 +28,48 @@ def open(self, location: str, options=None): - `r`: Open existing `location` for reading. - `w`: Open for writing. If `location` exists, it is truncated. - `soft7`: Whether to save using SOFT7 format. - - `single`: Whether the input is assumed to be in single-entity form. - If "auto" (default) the form will be inferred automatically. + - `single`: Whether to save in single-instance form. - `with_uuid`: Whether to include UUID when saving. + - with_meta: Whether to always include "meta" (even for metadata) + - with_parent: Whether to include parent info for transactions. + - urikey: Whether the URI is the preferred keys in multi-instance + format. """ - self.options = Options( - options, defaults="mode=a;soft7=true;single=auto;with_uuid=false" - ) + df = "mode=a;soft7=true;with_meta=false;with_parent=true;urikey=false" + self.options = Options(options, defaults=df) mode = self.options.mode self.writable = "w" in mode or "a" in mode self.generic = True self.location = location self.flushed = True # whether buffered data has been written to file - self._data = {} # data buffer + self._store = dlite.JStore() # data buffer if "r" in mode or "a" in mode: with open(location, "r") as f: data = pyyaml.safe_load(f) if data: - self._data = data + self._store.load_dict(data) - self.single = ( - "properties" in self._data - if self.options.single == "auto" - else dlite.asbool(self.options.single) - ) - self.with_uuid = dlite.asbool(self.options.with_uuid) + self.with_uuid = None + if "with_uuid" in self.options: + self.with_uuid = dlite.asbool(self.options.with_uuid) + + self.single = None + if "single" in self.options: + self.single = dlite.asbool(self.options.single) def flush(self): """Flush cached data to storage.""" if self.writable and not self.flushed: with open(self.location, "w") as f: self._pyyaml.safe_dump( - self._data, + self._store.get_dict( + soft7=dlite.asbool(self.options.soft7), + single=self.single, + with_uuid=self.with_uuid, + with_meta=dlite.asbool(self.options.with_meta), + with_parent=dlite.asbool(self.options.with_parent), + urikey=dlite.asbool(self.options.urikey), + ), f, default_flow_style=False, sort_keys=False, @@ -70,22 +80,15 @@ def load(self, id: str): """Loads `uuid` from current storage and return it as a new instance. Arguments: - id: A UUID representing a DLite Instance to return from the - storage. + id: UUID or URI of DLite Instance to return from the storage. Returns: - A DLite Instance corresponding to the given `id` (UUID). + A DLite Instance corresponding to the given `id`. """ - inst = instance_from_dict( - self._data, - id, - single=self.options.single, - check_storages=False, - ) - # Ensure metadata in single-entity form is always read-only + inst = self._store.get(id) + # Ensure metadata in single-instance form is always read-only if inst.is_meta and self.single: self.writable = False - return inst def save(self, inst: dlite.Instance): @@ -95,20 +98,16 @@ def save(self, inst: dlite.Instance): inst: A DLite Instance to store in the storage. """ - self._data[inst.uuid] = inst.asdict( - soft7=dlite.asbool(self.options.soft7), - single=True, - uuid=self.with_uuid, - ) + self._store.add(inst) self.flushed = False - def delete(self, uuid): + def delete(self, id): """Delete instance with given `uuid` from storage. Arguments: - uuid: UUID of instance to delete. + id: UUID or URI of instance to delete. """ - del self._data[uuid] + self._store.remove(id) self.flushed = False def query(self, pattern=None): @@ -124,10 +123,8 @@ def query(self, pattern=None): storage. """ - for uuid, inst_as_dict in self._data.items(): - if pattern and dlite.globmatch(pattern, inst_as_dict["meta"]): - continue - yield uuid + for id in self._store.get_ids(pattern): + yield id @classmethod def from_bytes(cls, buffer, id=None, options=None): @@ -137,6 +134,7 @@ def from_bytes(cls, buffer, id=None, options=None): buffer: Bytes or bytearray object to load the instance from. id: ID of instance to load. May be omitted if `buffer` only holds one instance. + options: Unused. Returns: New instance. diff --git a/storages/python/tests-python/input/test_data.yaml b/storages/python/tests-python/input/test_data.yaml index 6cd7842bd..8ffa025ee 100644 --- a/storages/python/tests-python/input/test_data.yaml +++ b/storages/python/tests-python/input/test_data.yaml @@ -23,6 +23,7 @@ myfixstring: Si myshort: 13 mystring: '...' + 2f8ba28c-add6-5718-a03c-ea46961d6ca7: uri: my_test_instance_1 meta: http://onto-ns.com/meta/0.1/TestEntity diff --git a/storages/python/tests-python/input/test_meta.yaml b/storages/python/tests-python/input/test_meta.yaml index 70aa85c93..fe16f2efa 100644 --- a/storages/python/tests-python/input/test_meta.yaml +++ b/storages/python/tests-python/input/test_meta.yaml @@ -28,5 +28,5 @@ d9910bde-6028-524c-9e0f-e8f0db734bc8: description: An unsigned short integer. - name: myarray type: int32 - dims: [L, M, N] + shape: [L, M, N] description: An array string pointer. diff --git a/storages/python/tests-python/test_yaml_storage_python.py b/storages/python/tests-python/test_yaml_storage_python.py index f9bc34187..97f5b118a 100644 --- a/storages/python/tests-python/test_yaml_storage_python.py +++ b/storages/python/tests-python/test_yaml_storage_python.py @@ -34,7 +34,7 @@ # Test saving YAML metadata yaml_inst2 = yaml_mod.yaml() - yaml_inst2.open('yaml_test_save.yaml', 'mode=w;soft7=false') + yaml_inst2.open('yaml_test_save.yaml', 'mode=w;soft7=false;with_meta=true') yaml_inst2.save(inst) yaml_inst2.flush() with open(input_path / 'test_meta.yaml', "r") as f: @@ -53,7 +53,7 @@ # Test saving YAML data yaml_inst4 = yaml_mod.yaml() - yaml_inst4.open('yaml_test_save2.yaml', 'mode=w') + yaml_inst4.open('yaml_test_save2.yaml', 'mode=w;with_uuid=false') yaml_inst4.save(inst1) yaml_inst4.save(inst2) yaml_inst4.flush()