From f2f09e340ddf88875cc70aa275c1963b79095263 Mon Sep 17 00:00:00 2001 From: Stuart Chalk Date: Mon, 18 Dec 2023 14:53:25 -0500 Subject: [PATCH] SciDataLib-79 Code does not recognize ontological value reworked the code to implement this (but not yet at all levels) - updated __addid - added __chkont function to update the key of name/value pair with trailing '#' if not already present - updated examples --- examples/example.py | 12 +++++--- examples/example3.py | 58 ++++++++++++++++++------------------- examples/testnum.py | 20 +++++++++++++ scidatalib/scidata.py | 66 ++++++++++++++++++++++++++++++++++--------- 4 files changed, 110 insertions(+), 46 deletions(-) create mode 100644 examples/testnum.py diff --git a/examples/example.py b/examples/example.py index 448ee5d..0603512 100644 --- a/examples/example.py +++ b/examples/example.py @@ -41,10 +41,14 @@ def create_scidata(): link = 'https://stuchalk.github.io/scidata/examples/ph_min.jsonld' example.permalink(link) - # add discipline and subdiscipline (plus namespace) + # add namespaces (needed for values that point to ontological definitions of things) example.namespaces({'w3i': 'https://w3id.org/skgo/modsci#'}) - example.discipline('w3i:Chemistry') - example.subdiscipline('w3i:ChemicalInformatics') + example.namespaces({'qudt': 'https://qudt.org/vocab/unit/'}) + example.namespaces({'obo': 'http://purl.obolibrary.org/obo/'}) + + # add discipline and subdiscipline + example.discipline('Chemistry') + example.subdiscipline('w3i:Cheminformatics') """ METHODOLOGY SECTION """ # methodology data goes into the aspects array in the JSON-LD file @@ -363,7 +367,7 @@ def create_scidata(): "type": "IC50", "value": "12.300000000000000000000000000000", - "units": "uM"}]}]}] + "units": "qudt:MicroM"}]}]}] example.scidatapackage(packet) diff --git a/examples/example3.py b/examples/example3.py index c89c813..651897c 100644 --- a/examples/example3.py +++ b/examples/example3.py @@ -38,54 +38,54 @@ 'chebi': 'obo:CHEBI_15377'} con1 = { - '@id': 'constituent', + '@id': 'constituent/1/', 'source': 'substance/1/', 'role': 'chm:analyte', - 'properties': [{ - '@id': 'property', - 'quantity': 'mass of substance per volume', - 'property': 'Concentration (w/v)', + 'quantities': [{ + '@id': 'quantity/1/', + 'quantitykind': 'mass of substance per volume', + 'quantity': 'Concentration (w/v)', 'value': { '@id': 'value', 'number': 2.99, 'unitref': 'qudt:PPM'} }, { - '@id': 'property', - 'quantity': 'volume', - 'property': 'Volume of solution', + '@id': 'quantity/2/', + 'quantitykind': 'volume', + 'quantity': 'Volume of solution', 'value': { '@id': 'value', 'number': 100.0, 'unitref': 'qudt:MilliL'} }] } con2 = { - '@id': 'constituent', + '@id': 'constituent/2/', 'source': 'substance/2/', 'role': 'chm:reagent', 'properties': [{ - '@id': 'property', - 'quantity': 'mass of substance per volume', - 'property': 'Concentration (w/v)', + '@id': 'quantity/1/', + 'quantitykind': 'mass of substance per volume', + 'quantity': 'Concentration (w/v)', 'value': { '@id': 'value', 'number': 2.99, 'unitref': 'qudt:PPM'} }, { - '@id': 'property', - 'quantity': 'volume', - 'property': 'Volume of solution', + '@id': 'quantity/2/', + 'quantitykind': 'volume', + 'quantity': 'Volume of solution', 'value': { '@id': 'value', 'number': 100.0, 'unitref': 'qudt:MilliL'} }]} con3 = { - '@id': 'constituent', + '@id': 'constituent/3/', 'source': 'substance/3/', 'role': 'chm:reagent'} con4 = { - '@id': 'constituent', + '@id': 'constituent/4/', 'source': 'substance/4/', 'role': 'chm:buffer'} con5 = { - '@id': 'constituent', + '@id': 'constituent/5/', 'source': 'substance/5/', 'role': 'chm:solvent'} con6 = { - '@id': 'constituent', + '@id': 'constituent/6/', 'source': 'substance/6/', 'role': 'chm:solvent'} cons = [con1, con2, con3, con4, con5, con6] @@ -99,25 +99,25 @@ 'phase#': 'sub:liquid', 'constituents': cons, 'properties': [{ - '@id': 'property', - 'quantity': 'mass of substance per volume', - 'property': 'Concentration (w/v)', + '@id': 'quantity/1/', + 'quantitykind': 'mass of substance per volume', + 'quantity': 'Concentration (w/v)', 'value': { '@id': 'value', 'number': 4, 'unitref': 'qudt:PPM'} }, { - '@id': 'property', - 'quantity': 'volume', - 'property': 'Volume of solution', + '@id': 'quantity/2/', + 'quantitykind': 'volume', + 'quantity': 'Volume of solution', 'value': { '@id': 'value', 'number': 250.0, 'unitref': 'qudt:MilliL'} }]} con1 = { - '@id': 'condition', + '@id': 'condition/1/', 'source': 'measurement/1/', 'scope': 'substance/1/', - 'quantity': 'temperature', - 'property': 'Ambient temperature', - 'propertyref': 'gb:T06321', + 'quantitykind': 'temperature', + 'quantity': 'Ambient temperature', + 'quantity#': 'gb:T06321', 'value': '100.0'} facets = [comp1, comp2, comp3, comp4, comp5, comp6, sub1, con1] example.facets(facets) diff --git a/examples/testnum.py b/examples/testnum.py new file mode 100644 index 0000000..6683dc6 --- /dev/null +++ b/examples/testnum.py @@ -0,0 +1,20 @@ +""" testing functions """ +from scidatalib.scidata import SciData +import json + + +def scidata_nums(): + uid = 'ids' + testy = SciData(uid) + # add discipline and subdiscipline data + src = { + 'citation': 'Example SciData JSON-LD file, Chalk Research Group', + 'url': 'https://stuchalk.github.io/scidata/examples/ph_min.jsonld', + 'stype': 'dataset', + 'otype#': 'sdo:dataset' + } + testy.sources([src]) + print(json.dumps(testy.output, indent=4, ensure_ascii=False)) + + +scidata_nums() diff --git a/scidatalib/scidata.py b/scidatalib/scidata.py index f460a95..d185b79 100644 --- a/scidatalib/scidata.py +++ b/scidatalib/scidata.py @@ -46,11 +46,14 @@ def __init__(self, uid: str): "@id": "scidata/", "@type": "sdo:scientificData", "discipline": "", # def discipline + "discipline#": "", # def discipline (ont entry) "subdiscipline": "", # def subdiscipline + "subdiscipline#": "", # def subdiscipline (ont entry) "methodology": { "@id": "methodology/", "@type": "sdo:methodology", "evaluation": "", # def evaluation + "evaluation#": "", # def evaluation (ont entry) "aspects": []}, # def aspects OR def scidatapacket "system": { "@id": "system/", @@ -490,9 +493,15 @@ def discipline(self, disc: str) -> str: """ if isinstance(disc, str): if ":" in disc: - self.__addid(disc) - self.meta['@graph']['scidata']['discipline'] = disc - return self.meta['@graph']['scidata']['discipline'] + if self.__addid(disc): + self.meta['@graph']['scidata']['discipline#'] = disc + return self.meta['@graph']['scidata']['discipline#'] + else: + self.meta['@graph']['scidata']['discipline'] = disc + return self.meta['@graph']['scidata']['discipline'] + else: + self.meta['@graph']['scidata']['discipline'] = disc + return self.meta['@graph']['scidata']['discipline'] def subdiscipline(self, subdisc: str) -> str: """ @@ -511,9 +520,15 @@ def subdiscipline(self, subdisc: str) -> str: """ if isinstance(subdisc, str): if ":" in subdisc: - self.__addid(subdisc) - self.meta['@graph']['scidata']['subdiscipline'] = subdisc - return self.meta['@graph']['scidata']['subdiscipline'] + if self.__addid(subdisc): + self.meta['@graph']['scidata']['subdiscipline#'] = subdisc + return self.meta['@graph']['scidata']['subdiscipline#'] + else: + self.meta['@graph']['scidata']['subdiscipline'] = subdisc + return self.meta['@graph']['scidata']['subdiscipline'] + else: + self.meta['@graph']['scidata']['subdiscipline'] = subdisc + return self.meta['@graph']['scidata']['subdiscipline'] def evaluation(self, evaln: str) -> str: """ @@ -532,9 +547,15 @@ def evaluation(self, evaln: str) -> str: """ if isinstance(evaln, str): if ":" in evaln: - self.__addid(evaln) - self.meta['@graph']['scidata']['methodology']['evaluation'] = evaln - return self.meta['@graph']['scidata']['methodology']['evaluation'] + if self.__addid(evaln): + self.meta['@graph']['scidata']['methodology']['evaluation#'] = evaln + return self.meta['@graph']['scidata']['methodology']['evaluation#'] + else: + self.meta['@graph']['scidata']['methodology']['evaluation'] = evaln + return self.meta['@graph']['scidata']['methodology']['evaluation'] + else: + self.meta['@graph']['scidata']['methodology']['evaluation'] = evaln + return self.meta['@graph']['scidata']['methodology']['evaluation'] def aspects(self, aspects: list) -> list: """Add to or replace the aspects of the file @@ -883,7 +904,7 @@ def scidatapackage(self, package): dp.update({'aspects#': ataspect}) self.datapoint(packet['dataset']) - def sources(self, sources: list, replace=False) -> dict: + def sources(self, sources: list, replace=False) -> list: """ Add to or replace the source reference list @@ -913,6 +934,9 @@ def sources(self, sources: list, replace=False) -> dict: '@id': 'source/' + str(len(srcs) + 1) + '/', '@type': 'dc:source' } + # check data in source for values that are ontological (':') + # updates dictionary keys with additional '#' if not present and needed + x = self.__chkont(x) ld.update(x) srcs.append(ld) self.meta['@graph']['sources'] = srcs @@ -949,14 +973,30 @@ def __addid(self, text: str) -> bool: if isinstance(text, str): if '://' in text: return False - elif len(text.split(':')) > 1: - return False - elif ':' in text: + elif ':' in text and len(text.split(':')) == 2: self.ids(text) return True + else: + return False else: return False + def __chkont(self, data: dict) -> bool: + # checks that + keys = list(data.keys()) + vals = list(data.values()) + data = {} + for val in vals: + idx = vals.index(val) + if ':' in val: + # check this is ontology entry and if it is then check the dictionary key for '#' + if self.__addid(val): + # update dictionary name (keys) with '#' ending if not already present + if keys[idx][-1] != "#": + keys[idx] = keys[idx] + "#" + data.update({keys[idx]: vals[idx]}) + return data + def __graphid(self, gid: str) -> bool: """ Assigns the @id value within the @graph JSON object.