diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..36362f5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# python file cache +__pycache__/ + +# python build products +*egg-info/ diff --git a/script/NeXusOntology_V1.1.ipynb b/code/jupyterNotebook/NeXusOntology_V1.1.ipynb similarity index 100% rename from script/NeXusOntology_V1.1.ipynb rename to code/jupyterNotebook/NeXusOntology_V1.1.ipynb diff --git a/code/nxsOnto/README.md b/code/nxsOnto/README.md new file mode 100644 index 0000000..8323544 --- /dev/null +++ b/code/nxsOnto/README.md @@ -0,0 +1,28 @@ +**NeXusOntology creation script** + +Ensure that packages `owlready2` and `pygithub` are installed by running: `pip install -r requirements.txt` + +Alternative: If you have [conda](https://docs.conda.io/) available, a custom +conda environment can be created and this package installed (by `pip`) with +these steps: + +```bash + conda env create -f environment. + conda activate NeXusOntology + pip install -e . +``` + +Add four parameters when running the code: + +``` +python3 -m nxsOnto.generator [github access token] [out path for ontology] [temporary file path] + ``` + +To get a Github access token: +Github/settings/developer settings/personal access tokens/create new token + +Some deprecation warnings are likely to be displayed before the `.owl` file is created. + +The `.owl` file (RDF/XML syntax) can be opened by a text editor or ontology tool such as Protege (https://protege.stanford.edu/) + +See ontology metadata for more information. diff --git a/code/nxsOnto/environment.yml b/code/nxsOnto/environment.yml new file mode 100644 index 0000000..11a8629 --- /dev/null +++ b/code/nxsOnto/environment.yml @@ -0,0 +1,14 @@ +# Define conda environment for use with Python code + +name: NeXusOntology + +# conda env create -f environment.yml +# conda activate NeXusOntology + +channels: + - defaults + - conda-forge + +dependencies: + - owlready2 + - pygithub diff --git a/code/nxsOnto/nxsOnto/__init__.py b/code/nxsOnto/nxsOnto/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/code/nxsOnto/nxsOnto/generator.py b/code/nxsOnto/nxsOnto/generator.py new file mode 100644 index 0000000..ee08e54 --- /dev/null +++ b/code/nxsOnto/nxsOnto/generator.py @@ -0,0 +1,458 @@ +#!/usr/bin/env python +# coding: utf-8 + +# # Python code to parse nexus base class nxdl file contents to python dict and create owl ontology +# # When updating this file change script version in line 54 + + +from github import Github +import json +import xml.dom.minidom +import pickle +from owlready2 import sys,urllib, onto_path, get_ontology, DataProperty, FunctionalProperty, AnnotationProperty, ObjectProperty, Thing, time +import types +import datetime + +# Github token and file path for created owl file +token = sys.argv[1] +out_path = sys.argv[2] +tmp_file_path = sys.argv[3] + +# Pickle files used to avoid uneccesary parsing of NeXus files, mainly for development +defn_pickle_file = tmp_file_path + '/defn.p' +baseclass_pickle_file = tmp_file_path + '/baseclass.p' +types_pickle_file = tmp_file_path + '/types.p' +tags_pickle_file = tmp_file_path + '/tags.p' + +# Parameters for retrieving the definitions +nexus_repository = 'https://github.com/nexusformat' +types_url = 'https://raw.githubusercontent.com/nexusformat/definitions/main/nxdlTypes.xsd' +nexus_repo = 'nexusformat/definitions' # for github api + +# Operational parameters +join_string = '-' #string added between joined base class and field names for identifiers +join_string_label = ' ' #string added between joined base class and fieldnames for rdfs:label +default_units = 'NX_UNITLESS' #use this if units not specified + +# Parameters for linking definitions in seeAlso field to nexusformat.org +base_class_web_page_prefix = 'https://manual.nexusformat.org/classes/base_classes/' +application_definition_web_page_prefix = 'https://manual.nexusformat.org/classes/applications/' + +# Create ontology IRI +base_iri = 'http://purl.org/nexusformat/definitions/' +onto_name = 'NeXusOntology' +onto_iri = base_iri + onto_name + +# Ontology metadata section +_script_version = '1.1' # script version - update after edit +nexus_website = 'https://www.nexusformat.org/' +_creator = 'NeXus International Advisory Committee (NIAC)' +_licence = 'https://creativecommons.org/licenses/by/4.0/' +_publication = 'https://doi.org/10.5281/zenodo.4806026' +onto_comment = ''' + + This ontology extracts information about NeXus classes and fields from + NeXus nxdl definition files on the NeXus GitHub site. + See 'seeAlso' for links to the NeXus project, including licencing information. + This project was undertaken under ExPaNDS WP3.2 (https://expands.eu/) + + Purpose + The ontology is designed to fulfil several purposes. First, it creates unique identifiers + for each of the NeXus fields which would normally exist only within the namespaces of the + defining NeXus classes. This is the primary goal and provides PIDs for annotation and tagging. + The second purpose is to allow, via separate ontologies, NeXus fields and classes to be mapped + onto equivalent or related terms defined elsewhere. + Finally, we hope that this ontology, when used with a tool such as Protege, will provide a + useful 'NeXus Explorer' tool to gain a quick overview of NeXus with links to official NeXus + documentation. + + Design Philosophy + The terms in the ontology are extracted almost entirely from NeXus nxdl definition file and converted to + an OWL ontology using the owlready2 python module (https://pypi.org/project/Owlready2/) + NeXus classes - Base Classes and Application Definitions - are expressed as OWL classes. + NeXus fields, which contain the NeXus metadata, are expressed as Owl data properties. + While NeXus provides a subclassing method ('extends') for NeXus classes, this is not currently reflected + in the corresponding OWL classes. + One can think of the main purpose of the ontology as being to 'flatten' the NeXus fields into a single + namespace, rather than existing in the multiple namespaces of the NeXus classes. This requires longer and + more explicit names for the NeXus fields, which are created by prepending the NeXus base class name to the + NeXus field name. It is very important to note that a NeXus application definition can extend a base class + adding new fields, and that it is understood (see NeXus documentation) that the new fields then reserve names + within the class dictionary in order to avoid later duplication. These new fields are therefore shown as data + properties of the NeXus Base Classes, even though they are defined outside the original class definition. + + NeXus classes are reviewed periodically by the NeXus NIAC. It is anticipated that this ontology can be updated + quickly and automatically to reflect the updated definitions. + + Caveats + Some NeXus classes (e.g. NXtransformations) are related specifically to the class that they are contained in. + This relationship is not preserves. + NeXus allows multiple instances of metadata fields within a dataset. Relating multiple field values to a + single identifier will require a selection algorithm. + + Version + The version string is the NeXus version followed by the ontology version. + +''' + + +def dictionary_from_types(): + """Create a dictionary of NeXus simple types (unit categories) and dumps them to the types_pickle_file""" + global typesDict + types_dom = xml.dom.minidom.parse(urllib.request.urlopen(types_url)) + typesDict = {} + for nxtype in types_dom.getElementsByTagName('xs:simpleType'): + name = nxtype.getAttribute('name') + doc = nxtype.getElementsByTagName('xs:documentation') + docstr = doc[0].firstChild.nodeValue + docstr = docstr.replace('\n', '').replace('\t', '') + typesDict[name] = {'doc': docstr} + pickle.dump(typesDict, open(types_pickle_file, "wb")) + + +def dictionary_from_base_class_files(): + """Parse nexus base class files via url to python dictionary""" + global repo, tagsDict, base_class_url, file + g = Github(token) + repo = g.get_repo(nexus_repo) + with urllib.request.urlopen(repo.tags_url) as url: + tags = json.loads(url.read().decode()) + tagsDict = tags[0] # get version tags from master branch + base_class_url = [] + for file in repo.get_contents("base_classes"): + if str(file.path).endswith('nxdl.xml'): + base_class_url.append(file.download_url) + + +def addFieldToDict(field, defn_name, className): + """ defn_name is used to add application definition to field dict if the field is defined in an app deff. """ + field_name = field.getAttribute('name') + deprecationAttribute = field.getAttribute('deprecated') + if deprecationAttribute: + notice = ['Deprecation warning ', field_name, ' in ', className, ': ', deprecationAttribute] + print(''.join(notice)) + + long_name = className + join_string + field_name + label = className + join_string_label + field_name + + classname_fields = classDict[className]['fields'] + if not long_name in classDict[className]['fields'].keys(): + classname_fields[long_name] = {} # create dictionary for field if doesn't exist + + classname_fields[long_name]['fieldName'] = field_name + classname_fields[long_name]['units'] = field.getAttribute('units') + if classname_fields[long_name]['units'] == '': + classname_fields[long_name]['units'] = default_units + + classname_fields[long_name]['xml_file'] = file #xml file where field is defined + classname_fields[long_name]['defn_name'] = defn_name # application defn name is passed in if field is defined in a defn, else None is used + classname_fields[long_name]['label'] = label # compound name for label + + _type = field.getAttribute('type') + if _type == '': + _type = 'NX_CHAR' # default if not specified + + classname_fields[long_name]['type'] = _type + + try: + field_doc = field.getElementsByTagName('doc')[0].firstChild.nodeValue.replace('\n','') + except: + field_doc = '' + classname_fields[long_name]['fieldDoc'] = field_doc + + +def parse_base_classes(): + global classDict + #global classDict, file, dom1, defn, className, docstr, docelement, flds, field, group + classDict = {} # create empty classDict dictionary + _maxTries = 10 # try to parse file this many times before giving up + for file in base_class_url: + + for i in range(_maxTries): + try: + dom1 = xml.dom.minidom.parse(urllib.request.urlopen(file)) + break + except: + print('=== Problem parsing %s; try %i times then give up' % (file, _maxTries)) + time.sleep(1) + + defn = dom1.getElementsByTagName('definition')[0] + + className = defn.getAttribute('name') # class name from name attribute in definition + + if not className in classDict.keys(): + classDict[className] = {} # each class is a dictionary - create a new one if doesn't exist + + classDict[className]['xml_file'] = file + + classDict[className]['extends'] = defn.getAttribute('extends') + + docstr = '' + for docelement in dom1.getElementsByTagName('doc'): + if docelement.parentNode.tagName == 'definition': + docstr = docelement.firstChild.nodeValue.replace('\n', '') + break + classDict[className]['classDoc'] = docstr + + if not 'fields' in classDict[className].keys(): + classDict[className]['fields'] = {} # create fields dictionary for class if doesn't exist + + # look for fields in group but not recursive + flds = (field for field in defn.getElementsByTagName('field') if field.parentNode == defn) + + for field in flds: + addFieldToDict(field, None, className) + + classDict[className]['groups_cited'] = [] + for group in defn.getElementsByTagName('group'): + groupName = group.getAttribute('type') + classDict[className]['groups_cited'] += [groupName] + pickle.dump(classDict, open(baseclass_pickle_file, "wb")) + pickle.dump(tagsDict, open(tags_pickle_file, "wb")) + + +def parse_application_definitions(): + """ + parse nexus application definitions + extract extra base class fields and add to base class dictionary + get a list of NeXus base application definition urls from github + """ + global applicationDict + #global file, applicationDict, dom1, defn_name, group, defn, className, field, flds, docstr, docelement + + application_url = [] + for file in repo.get_contents("applications"): + try: + if str(file).split('.')[-2] == 'nxdl': + application_url += [file.download_url] + except: + pass + applicationDict = {} + for file in application_url: + dom1 = xml.dom.minidom.parse(urllib.request.urlopen(file)) # pick one at random + + appdefn = dom1.getElementsByTagName('definition')[0] + defn_name = appdefn.getAttribute('name') + + group = dom1.getElementsByTagName('group') + classList, classNameList = [], [] + for defn in group: + className = defn.getAttribute('type') + classNameList += [className] + + # look for fields in group but not recursive + flds = (field for field in defn.getElementsByTagName('field') if field.parentNode == defn) + for field in flds: + # print('=== Added field %s from class %s in application definition %s' % (field.getAttribute('name'), className, defn_name)) + addFieldToDict(field, defn_name, className) + + docstr = '' + for docelement in dom1.getElementsByTagName('doc'): + if docelement.parentNode.tagName == 'definition': + docstr = docelement.firstChild.nodeValue.replace('\n', '') + break + + # get information about application definition (name, xml_file, extends, doc) and add to dict + + applicationDict[defn_name] = {} # new entry with definition nae as key + applicationDict[defn_name]['extends'] = appdefn.getAttribute('extends') + applicationDict[defn_name]['doc'] = docstr + applicationDict[defn_name]['xml_file'] = file + applicationDict[defn_name]['groups_cited'] = classNameList + pickle.dump(classDict, open(baseclass_pickle_file, "wb")) # re-save class dict (now has new fields) + pickle.dump(applicationDict, open(defn_pickle_file, "wb")) + + +def write_ontology(): + """create owl ontology from previously created dicts using owlready2 module""" + global onto, dataset + #global onto, created, dataset, unitCategory, hasValue, hasUnit + + onto_path.append(out_path) + onto = get_ontology(onto_iri) + # get properties from cdterms + with get_ontology("http://purl.org/dc/terms/"): + class creator(AnnotationProperty): pass + + class licence(AnnotationProperty): pass + + class created(AnnotationProperty): pass + with onto: + version = '%s-%s' % (tagsDict['name'], _script_version) # from NeXus tag and script version + + onto.metadata.versionInfo.append(version) + onto.metadata.creator = _creator + onto.metadata.licence = _licence + onto.metadata.seeAlso.append(nexus_website) + onto.metadata.seeAlso.append(nexus_repository) + onto.metadata.seeAlso.append(_publication) + onto.metadata.comment.append(onto_comment) + onto.metadata.created.append(datetime.date.today().strftime("%b-%d-%Y")) + + class NeXus(Thing): + comment = 'NeXus concept' + + class dataset(Thing): + comment = 'Dummy data set' + + class NeXusField(ObjectProperty): + domain = [dataset] + comment = 'NeXus field (ObjectProperty). Unique names are created by prepending the NeXus class name to the NeXus field name' + + class NXobject(NeXus): + comment = classDict['NXobject']['classDoc'].replace('\t', '') # NeXus documentation string + seeAlso = base_class_web_page_prefix + 'NXobject' + '.html' + + NXobject.set_iri(NXobject, base_iri + 'NXobject') # set iri using agree pattern for Nexus + + class NeXusBaseClass(NXobject): + comment = 'NeXus Base Class' + seeAlso = 'https://manual.nexusformat.org/classes/index.html' + + class NeXusApplicationDefinition(NXobject): + comment = 'NeXus Application Definition' + seeAlso = 'https://manual.nexusformat.org/classes/index.html' + + class citesGroup(NXobject >> NeXusBaseClass): + comment = 'NXobject cites base class relationship' + + class extends(AnnotationProperty): + pass + + class NeXusType(AnnotationProperty): + pass + + class unit(AnnotationProperty): + pass + + class NeXusClass(AnnotationProperty): + pass + + class unitCategory(NeXus): + comment = 'NeXus unit category. Can be considered instances of a measure. Assign data properties hasValue(' \ + 'any), hasMinValue(any), hasMaxValue(any), hasUnits(str) ' + + class hasValue(DataProperty, FunctionalProperty): + domain = [unitCategory] + comment = 'NeXus field value' + + class hasMinValue(DataProperty, FunctionalProperty): + domain = [unitCategory] + comment = 'Minimum of NeXus field value' + + class hasMaxValue(DataProperty, FunctionalProperty): + domain = [unitCategory] + comment = 'Maximum of NeXus field value' + + class hasUnit(DataProperty, FunctionalProperty): + domain = [unitCategory] + range = [str] + comment = 'NeXus unit (string). Should be consistent with unit category.' + + for unit in typesDict.keys(): + if unit == 'anyUnitsAttr': # general description, not specific unit category + unitCategory.comment.append(typesDict[unit]['doc']) # use to document unitCategory class + elif unit == 'primitiveType': # do nothing with this entry + pass + else: + typesDict[unit]['class'] = types.new_class(unit, (unitCategory,)) # create new unit category subclass + typesDict[unit]['class'].comment.append(typesDict[unit]['doc']) # document it + + for nxBaseClass in classDict.keys(): + + if not nxBaseClass == 'NXobject': # NXobject can't be subclass of NXobject + _nx_class = types.new_class(nxBaseClass, (NeXusBaseClass,)) + _nx_class.set_iri(_nx_class, base_iri + nxBaseClass) # use agreed term iri + classDict[nxBaseClass]['onto_class'] = _nx_class # add class to dict + _nx_class.comment.append(classDict[nxBaseClass]['classDoc']) + _nx_class.extends.append(classDict[nxBaseClass]['extends']) + web_page = base_class_web_page_prefix + nxBaseClass + '.html' + + _nx_class.seeAlso.append(web_page) + + for nxField in classDict[nxBaseClass]['fields'].keys(): # loop through each field in base class + + _nx_field = types.new_class(nxField, (NeXusField,)) + + _nx_field.set_iri( + base_iri + nxField) # use agreed term iri (seem to need only single parameter for properties) + + classDict[nxBaseClass]['fields'][nxField]['class'] = _nx_field + + _nx_field.comment.append(classDict[nxBaseClass]['fields'][nxField]['fieldDoc']) + _nx_field.label.append(classDict[nxBaseClass]['fields'][nxField]['label']) + + defn_name = classDict[nxBaseClass]['fields'][nxField]['defn_name'] + + if defn_name != None: + # Field is defined by an application definition; give app defn web page (no anchor - might add later) + web_page = application_definition_web_page_prefix + defn_name + '.html' + _nx_field.seeAlso.append(web_page) + else: + # Field is defined by base class file; give base class web page with arhchor + + anchor = '#%s-%s-field' % (nxBaseClass.lower(), + classDict[nxBaseClass]['fields'][nxField]['fieldName'].lower()) + anchor = anchor.replace('_', '-') # replace symbols for anchors + + web_page = base_class_web_page_prefix + nxBaseClass + '.html' + anchor + _nx_field.seeAlso.append(web_page) + + unit_string = classDict[nxBaseClass]['fields'][nxField]['units'] + unit_class = typesDict[unit_string]['class'] + _nx_class.is_a.append(_nx_field.some(unit_class)) + + _nx_field.NeXusClass.append(_nx_class) + + _nx_field.range = [unit_class] + + # second loop required to ensure all classes defined before trying to cite them + for nxBaseClass in classDict.keys(): + if not nxBaseClass == 'NXobject': # NXobject can't be subclass of NXobject + for cited in classDict[nxBaseClass]['groups_cited']: + classDict[nxBaseClass]['onto_class'].is_a.append(citesGroup.some(classDict[cited]['onto_class'])) + + for application in applicationDict.keys(): + _nx_app = types.new_class(application, (NeXusApplicationDefinition,)) + _nx_app.set_iri(_nx_app, base_iri + application) # use agreed term iri + + _nx_app.comment.append(applicationDict[application]['doc']) + _nx_app.extends.append(applicationDict[application]['extends']) + + web_page = application_definition_web_page_prefix + application + '.html' + _nx_app.seeAlso.append(web_page) + + for base_class in applicationDict[application]['groups_cited']: + _nx_app.is_a.append(citesGroup.some(classDict[base_class]['onto_class'])) + onto.save() + + +def create_test_individuals(): + # create individuals - these are just for testing + with onto: + sample_temp_1 = typesDict['NX_TEMPERATURE']['class']('sample_temp_1') + sample_temp_1.hasUnit = 'Kelvin' + sample_temp_1.hasValue = 10 + + dataset_1 = dataset('dataset1') + setattr(dataset_1, 'NXsample%stemperature' % join_string, [sample_temp_1]) + + beam_energy_1 = typesDict['NX_ENERGY']['class']('beam_energy_1') + beam_energy_1.hasUnit = 'keV' + beam_energy_1.hasValue = 12.4 + + dataset_2 = dataset('dataset2') + setattr(dataset_2, 'NXbeam%sfinal_energy' % join_string, [beam_energy_1]) + onto.save() + + +def main(): + dictionary_from_types() + dictionary_from_base_class_files() + parse_base_classes() + parse_application_definitions() + write_ontology() + create_test_individuals() + +if __name__ == "__main__": + main() diff --git a/code/nxsOnto/requirements.txt b/code/nxsOnto/requirements.txt new file mode 100644 index 0000000..c020231 --- /dev/null +++ b/code/nxsOnto/requirements.txt @@ -0,0 +1,2 @@ +owlready2 +pygithub \ No newline at end of file diff --git a/code/nxsOnto/setup.py b/code/nxsOnto/setup.py new file mode 100644 index 0000000..df573fa --- /dev/null +++ b/code/nxsOnto/setup.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# +# To use this file type: +# +# python setup.py install +# +from setuptools import setup + +from setuptools import setup + +__entry_points__ = { + "console_scripts": [ + "nexusontology = nxsOnto.generator:main", + ], + # 'gui_scripts': [], +} + +setup(name='NeXusOntologyGenerator', + version='1.1', + description='Generates an ontology from nxdl files', + author='Steve Collins', + url="https://github.com/nexusformat/NeXusOntology/code/nxsOnto", + packages=['nxsOnto'], + license='Apache 2', + entry_points=__entry_points__, + ) diff --git a/script/NeXusOntology_V1.1.py b/script/NeXusOntology_V1.1.py deleted file mode 100644 index adfa538..0000000 --- a/script/NeXusOntology_V1.1.py +++ /dev/null @@ -1,565 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Python code to parse nexus base class nxdl file contents to python dict and create owl ontology -# -# ## To create the NeXus ontology edit cell 2 and execute the whole notebook. - -# In[1]: - - -#modules to install -#pip install owlready2 -#pip install pygithub - - -# In[2]: - - -################################################################# -#github token and file path for created owl file - edit this cell -_script_version = '1.1' # script version - update after edit -token = "" # insert your github token -out_path = '/home/spc93/ontology' -tmp_file_path = '/home/spc93/tmp' -################################################################# - - -# In[3]: - - -#get a list of NeXus base class urls from github - -base_iri = 'http://purl.org/nexusformat/definitions/' -nexus_repo = 'nexusformat/definitions' # for github api -onto_name = 'NeXusOntology' -_creator = 'NeXus International Advisory Committee (NIAC)' -_licence = 'https://creativecommons.org/licenses/by/4.0/' -_publication = 'https://doi.org/10.5281/zenodo.4806026' - -onto_iri = base_iri + onto_name - -# pickle files used to avoid uneccesary parsing of NeXus files, mainly for development -defn_pickle_file = tmp_file_path + '/defn.p' -baseclass_pickle_file = tmp_file_path + '/baseclass.p' -types_pickle_file = tmp_file_path + '/types.p' -tags_pickle_file = tmp_file_path + '/tags.p' - -base_class_web_page_prefix = 'https://manual.nexusformat.org/classes/base_classes/' -application_definition_web_page_prefix = 'https://manual.nexusformat.org/classes/applications/' -types_url = 'https://raw.githubusercontent.com/nexusformat/definitions/main/nxdlTypes.xsd' - -join_string = '-' #string added between joined base class and field names for identifiers -join_string_label = ' ' #string added between joined base class and fieldnames for rdfs:label - -nexus_website = 'https://www.nexusformat.org/' -nexus_repository = 'https://github.com/nexusformat' - -default_units = 'NX_UNITLESS' #use this if units not specified - -import datetime -import pickle - -# Ontology metadata comment -onto_comment = ''' - - This ontology extracts information about NeXus classes and fields from - NeXus nxdl definition files on the NeXus GitHub site. - See 'seeAlso' for links to the NeXus project, including licencing information. - This project was undertaken under ExPaNDS WP3.2 (https://expands.eu/) - - Purpose - The ontology is designed to fulfil several purposes. First, it creates unique identifiers - for each of the NeXus fields which would normally exist only within the namespaces of the - defining NeXus classes. This is the primary goal and provides PIDs for annotation and tagging. - The second purpose is to allow, via separate ontologies, NeXus fields and classes to be mapped - onto equivalent or related terms defined elsewhere. - Finally, we hope that this ontology, when used with a tool such as Protege, will provide a - useful 'NeXus Explorer' tool to gain a quick overview of NeXus with links to official NeXus - documentation. - - Design Philosophy - The terms in the ontology are extracted almost entirely from NeXus nxdl definition file and converted to - an OWL ontology using the owlready2 python module (https://pypi.org/project/Owlready2/) - NeXus classes - Base Classes and Application Definitions - are expressed as OWL classes. - NeXus fields, which contain the NeXus metadata, are expressed as Owl data properties. - While NeXus provides a subclassing method ('extends') for NeXus classes, this is not currently reflected - in the corresponding OWL classes. - One can think of the main purpose of the ontology as being to 'flatten' the NeXus fields into a single - namespace, rather than existing in the multiple namespaces of the NeXus classes. This requires longer and - more explicit names for the NeXus fields, which are created by prepending the NeXus base class name to the - NeXus field name. It is very important to note that a NeXus application definition can extend a base class - adding new fields, and that it is understood (see NeXus documentation) that the new fields then reserve names - within the class dictionary in order to avoid later duplication. These new fields are therefore shown as data - properties of the NeXus Base Classes, even though they are defined outside the original class definition. - - NeXus classes are reviewed periodically by the NeXus NIAC. It is anticipated that this ontology can be updated - quickly and automatically to reflect the updated definitions. - - Caveats - Some NeXus classes (e.g. NXtransformations) are related specifically to the class that they are contained in. - This relationship is not preserves. - NeXus allows multiple instances of metadata fields within a dataset. Relating multiple field values to a - single identifier will require a selection algorithm. - - Version - The version string is the NeXus version followed by the ontology version. - -''' - -# To avoid re-parsing NeXus files after initial run, execute this -# cell instead of the next three - -classDict = pickle.load( open(baseclass_pickle_file , "rb" ) ) -applicationDict = pickle.load( open(defn_pickle_file , "rb" ) ) -typesDict = pickle.load( open(types_pickle_file , "rb" ) ) -tagsDict = pickle.load( open(tags_pickle_file , "rb" ) ) - -# In[4]: - - -# Create a dictionary of NeXus simple types (unit categories) - -from github import Github -import xml.dom.minidom -import urllib -import pickle - -types_dom = xml.dom.minidom.parse(urllib.request.urlopen(types_url)) - -typesDict = {} -for nxtype in types_dom.getElementsByTagName('xs:simpleType'): - name = nxtype.getAttribute('name') - doc = nxtype.getElementsByTagName('xs:documentation') - docstr = doc[0].firstChild.nodeValue - docstr = docstr.replace('\n','').replace('\t','') - typesDict[name] = {'doc': docstr} - - -pickle.dump(typesDict, open(types_pickle_file, "wb" ) ) - - -# In[5]: - - -# parse nexus base class files via url to python dictionary - - -from github import Github -import xml.dom.minidom -import os -import urllib -import time -import pickle -import json - -g = Github(token) -repo = g.get_repo(nexus_repo) - -with urllib.request.urlopen(repo.tags_url) as url: - tags = json.loads(url.read().decode()) - tagsDict = tags[0] # get version tags from master branch - - -base_class_url = [] -for file in repo.get_contents("base_classes"): - if str(file).split('.')[-2] == 'nxdl': - base_class_url += [file.download_url] - - -_maxTries = 10 # try to parse file this many times before giving up - -def addFieldToDict(classDict, field, defn_name): # make a function to be reused later - #defn_name is used to add application definition to field dict if the field is defined in an app deff. - field_name = field.getAttribute('name') - - deprecationAttribute = field.getAttribute('deprecated') - if not deprecationAttribute == '': - print("=== Deprecation warning %s in %s: %s" % (field_name, className, deprecationAttribute)) - - long_name = className + join_string + field_name - label = className + join_string_label + field_name - - if not long_name in classDict[className]['fields'].keys(): - #print('~~~ field did not exist: %s' % long_name) - classDict[className]['fields'][long_name] = {} # create dictionary for field if doesn't exist - - - classDict[className]['fields'][long_name]['fieldName'] = field_name - classDict[className]['fields'][long_name]['units'] = field.getAttribute('units') - if classDict[className]['fields'][long_name]['units'] == '': - classDict[className]['fields'][long_name]['units'] = default_units - - classDict[className]['fields'][long_name]['xml_file'] = file #xml file where field is defined - classDict[className]['fields'][long_name]['defn_name'] = defn_name # application defn name is passed in if field is defined in a defn, else None is used - classDict[className]['fields'][long_name]['label'] = label # compound name for label - - _type = field.getAttribute('type') - if _type == '': - _type = 'NX_CHAR' # default if not specified - - classDict[className]['fields'][long_name]['type'] = _type - - try: - field_doc = field.getElementsByTagName('doc')[0].firstChild.nodeValue.replace('\n','') - except: - field_doc = '' - classDict[className]['fields'][long_name]['fieldDoc'] = field_doc - - -classDict = {} # create empty classDict dictionary - - -for file in base_class_url: - #print(file) - - for i in range(_maxTries): - try: - dom1 = xml.dom.minidom.parse(urllib.request.urlopen(file)) - break - except: - print('=== Problem parsing %s; try %i times then give up' % (file, _maxTries)) - time.sleep(1) - - defn = dom1.getElementsByTagName('definition')[0] - - className = defn.getAttribute('name') #class name from name attribute in definition - - if not className in classDict.keys(): - classDict[className] = {} # each class is a dictionary - create a new one if doesn't exist - - classDict[className]['xml_file'] = file - - classDict[className]['extends'] = defn.getAttribute('extends') - - - docstr = '' - for docelement in dom1.getElementsByTagName('doc'): - if docelement.parentNode.tagName == 'definition': - docstr = docelement.firstChild.nodeValue.replace('\n','') - break - classDict[className]['classDoc'] = docstr - - - - if not 'fields' in classDict[className].keys(): - classDict[className]['fields'] = {} # create fields dictionary for class if doesn't exist - - - # look for fields in group but not recursive - flds = (field for field in defn.getElementsByTagName('field') if field.parentNode == defn) - - for field in flds: - addFieldToDict(classDict, field, None) - - classDict[className]['groups_cited'] = [] - for group in defn.getElementsByTagName('group'): - groupName = group.getAttribute('type') - classDict[className]['groups_cited'] += [groupName] - - -pickle.dump(classDict, open(baseclass_pickle_file, "wb" ) ) -pickle.dump(tagsDict, open(tags_pickle_file, "wb" ) ) - - - -# In[6]: - - -# parse nexus application definitions -# extract extra base class fields and add to base class dictionary - -import xml.dom.minidom -import os -#import yaml -import urllib -import time -import pickle - - -#get a list of NeXus base application definition urls from github -application_url = [] -for file in repo.get_contents("applications"): - try: - if str(file).split('.')[-2] == 'nxdl': - application_url += [file.download_url] - except: - pass - - -applicationDict = {} - -for file in application_url: - - - dom1 = xml.dom.minidom.parse(urllib.request.urlopen(file)) # pick one at random - - appdefn = dom1.getElementsByTagName('definition')[0] - defn_name = appdefn.getAttribute('name') - - group = dom1.getElementsByTagName('group') - classList, classNameList = [], [] - for defn in group: - className = defn.getAttribute('type') - classNameList += [className] - - # look for fields in group but not recursive - flds = (field for field in defn.getElementsByTagName('field') if field.parentNode == defn) - for field in flds: - #print('=== Added field %s from class %s in application definition %s' % (field.getAttribute('name'), className, defn_name)) - addFieldToDict(classDict, field, defn_name) - - - docstr = '' - for docelement in dom1.getElementsByTagName('doc'): - if docelement.parentNode.tagName == 'definition': - docstr = docelement.firstChild.nodeValue.replace('\n','') - break - - - # get information about application definition (name, xml_file, extends, doc) and add to dict - - applicationDict[defn_name] = {} # new entry with definition nae as key - applicationDict[defn_name]['extends'] = appdefn.getAttribute('extends') - applicationDict[defn_name]['doc'] = docstr - applicationDict[defn_name]['xml_file'] = file - applicationDict[defn_name]['groups_cited'] = classNameList - - -pickle.dump(classDict, open(baseclass_pickle_file, "wb" ) ) # re-save class dict (now has new fields) -pickle.dump(applicationDict, open(defn_pickle_file, "wb" ) ) -#pprint(applicationDict) - - - - - -# In[7]: - - -# create owl ontology from previously created dicts using owlready2 module - -from owlready2 import * -import types -import datetime - -onto_path.append(out_path) -onto = get_ontology(onto_iri) - -# get properties from cdterms -with get_ontology("http://purl.org/dc/terms/"): - class creator(AnnotationProperty): pass - class licence(AnnotationProperty): pass - class created(AnnotationProperty): pass - -with onto: - - #xxxx delete ############################################### - #try: - # nexus_version = tagsDict['name'] - #except: - # nexus_version = 'Unknown' - # print('=== Problem getting version from github name tag') - # - #version = 'Creation date: %s\nNeXus version: %s' % (datetime.date.today().strftime("%b-%d-%Y"), nexus_version) - - - version = '%s-%s' % (tagsDict['name'], _script_version) # from NeXus tag and script version - - - onto.metadata.versionInfo.append(version) - onto.metadata.creator = _creator - onto.metadata.licence = _licence - onto.metadata.seeAlso.append(nexus_website) - onto.metadata.seeAlso.append(nexus_repository) - onto.metadata.seeAlso.append(_publication) - onto.metadata.comment.append(onto_comment) - onto.metadata.created.append(datetime.date.today().strftime("%b-%d-%Y")) - - - class NeXus(Thing): - comment = 'NeXus concept' - - class dataset(Thing): - comment = 'Dummy data set' - - class NeXusField(ObjectProperty): - domain = [dataset] - comment = 'NeXus field (ObjectProperty). Unique names are created by prepending the NeXus class name to the NeXus field name' - - class NXobject(NeXus): - comment = classDict['NXobject']['classDoc'].replace('\t','') # NeXus documentation string - seeAlso = base_class_web_page_prefix + 'NXobject' + '.html' - NXobject.set_iri(NXobject, base_iri + 'NXobject') #set iri using agree pattern for Nexus - - class NeXusBaseClass(NXobject): - comment = 'NeXus Base Class' - seeAlso = 'https://manual.nexusformat.org/classes/index.html' - - class NeXusApplicationDefinition(NXobject): - comment = 'NeXus Application Definition' - seeAlso = 'https://manual.nexusformat.org/classes/index.html' - - class citesGroup(NXobject >> NeXusBaseClass): - comment = 'NXobject cites base class relationship' - - class extends(AnnotationProperty): - pass - - class NeXusType(AnnotationProperty): - pass - - class unit(AnnotationProperty): - pass - - class NeXusClass(AnnotationProperty): - pass - - class unitCategory(NeXus): - comment = 'NeXus unit category. Can be considered instances of a measure. Assign data properties ' 'hasValue(any), hasMinValue(any), hasMaxValue(any), hasUnits(str)' - - class hasValue(DataProperty, FunctionalProperty): - domain = [unitCategory] - comment = 'NeXus field value' - - class hasMinValue(DataProperty, FunctionalProperty): - domain = [unitCategory] - comment = 'Minimum of NeXus field value' - - class hasMaxValue(DataProperty, FunctionalProperty): - domain = [unitCategory] - comment = 'Maximum of NeXus field value' - - class hasUnit(DataProperty, FunctionalProperty): - domain = [unitCategory] - range = [str] - comment = 'NeXus unit (string). Should be consistent with unit category.' - - - for unit in typesDict.keys(): - if unit == 'anyUnitsAttr': # general description, not specific unit category - unitCategory.comment.append(typesDict[unit]['doc']) # use to document unitCategory class - elif unit == 'primitiveType': # do nothing with this entry - pass - else: - typesDict[unit]['class'] = types.new_class(unit, (unitCategory,)) # create new unit category subclass - typesDict[unit]['class'].comment.append(typesDict[unit]['doc']) # document it - - - - for nxBaseClass in classDict.keys(): - - if not nxBaseClass == 'NXobject': # NXobject can't be subclass of NXobject - _nx_class = types.new_class(nxBaseClass, (NeXusBaseClass,)) - _nx_class.set_iri(_nx_class, base_iri + nxBaseClass) # use agreed term iri - classDict[nxBaseClass]['onto_class'] = _nx_class # add class to dict - _nx_class.comment.append(classDict[nxBaseClass]['classDoc']) - _nx_class.extends.append(classDict[nxBaseClass]['extends']) - web_page = base_class_web_page_prefix + nxBaseClass + '.html' - - _nx_class.seeAlso.append(web_page) - - for nxField in classDict[nxBaseClass]['fields'].keys(): #loop through each field in base class - - _nx_field = types.new_class(nxField, (NeXusField, )) - - _nx_field.set_iri(base_iri + nxField) # use agreed term iri (seem to need only single parameter for properties) - - classDict[nxBaseClass]['fields'][nxField]['class'] = _nx_field - - - _nx_field.comment.append(classDict[nxBaseClass]['fields'][nxField]['fieldDoc']) - _nx_field.label.append(classDict[nxBaseClass]['fields'][nxField]['label']) - - - defn_name = classDict[nxBaseClass]['fields'][nxField]['defn_name'] - - - if defn_name != None: - #Field is defined by an application definition; give app defn web page (no anchor - might add later) - web_page = application_definition_web_page_prefix + defn_name + '.html' - _nx_field.seeAlso.append(web_page) - else: - #Field is defined by base class file; give base class web page with arhchor - - anchor = '#%s-%s-field' % (nxBaseClass.lower(), - classDict[nxBaseClass]['fields'][nxField]['fieldName'].lower()) - anchor = anchor.replace('_', '-') # replace symbols for anchors - - web_page = base_class_web_page_prefix + nxBaseClass + '.html' + anchor - _nx_field.seeAlso.append(web_page) - - - unit_string = classDict[nxBaseClass]['fields'][nxField]['units'] - unit_class = typesDict[unit_string]['class'] - _nx_class.is_a.append(_nx_field.some(unit_class)) - - _nx_field.NeXusClass.append(_nx_class) - - _nx_field.range = [unit_class] - - - # second loop required to ensure all classes defined before trying to cite them - for nxBaseClass in classDict.keys(): - if not nxBaseClass == 'NXobject': # NXobject can't be subclass of NXobject - for cited in classDict[nxBaseClass]['groups_cited']: - classDict[nxBaseClass]['onto_class'].is_a.append(citesGroup.some(classDict[cited]['onto_class'])) - - - for application in applicationDict.keys(): - _nx_app = types.new_class(application, (NeXusApplicationDefinition,)) - _nx_app.set_iri(_nx_app, base_iri + application) # use agreed term iri - - _nx_app.comment.append(applicationDict[application]['doc']) - _nx_app.extends.append(applicationDict[application]['extends']) - - web_page = application_definition_web_page_prefix + application + '.html' - _nx_app.seeAlso.append(web_page) - - for base_class in applicationDict[application]['groups_cited']: - _nx_app.is_a.append(citesGroup.some(classDict[base_class]['onto_class'])) - - -onto.save() - - - - -# In[8]: - - -# create individuals - these are just for testing - - -with onto: - - sample_temp_1 = typesDict['NX_TEMPERATURE']['class']('sample_temp_1') - sample_temp_1.hasUnit = 'Kelvin' - sample_temp_1.hasValue = 10 - - dataset_1 = dataset('dataset1') - setattr(dataset_1,'NXsample%stemperature' % join_string, [sample_temp_1]) - - - beam_energy_1 = typesDict['NX_ENERGY']['class']('beam_energy_1') - beam_energy_1.hasUnit = 'keV' - beam_energy_1.hasValue = 12.4 - - dataset_2 = dataset('dataset2') - setattr(dataset_2,'NXbeam%sfinal_energy' % join_string, [beam_energy_1]) - - -onto.save() - - -# In[ ]: - - - - - -# In[ ]: - - - - diff --git a/script/README.md b/script/README.md deleted file mode 100644 index 5302415..0000000 --- a/script/README.md +++ /dev/null @@ -1,23 +0,0 @@ -**NeXusOntology creation script** - -Run either the Jupyter notebook or exported Python script - -Ensure that owlready2 and pygithub are installed (pip install) - -Edit four lines near the top of the script: - -_script_version (change version if the ontology has been modified by changes to the script) -token (your github token - see below) -out_path (path for created .owl file) -tmp_file_path (temporary file path) - -To get a Github access token: -Github/settings/developer settings/personal access tokens/create new token - -Run the script (using Python 3) - -Some deprecation warnings are likely to be displayed before the .owl file is created. - -The .owl file (RDF/XML syntax) can be opened by a text editor or ontology tool such as Protege (https://protege.stanford.edu/) - -See ontology metadata for more information.