Skip to content

Commit

Permalink
Merge pull request #825 from IanCa/develop
Browse files Browse the repository at this point in the history
Add support for saving/loading in various RDF formats
  • Loading branch information
VisLab authored Jan 8, 2024
2 parents 14c0b6c + d2b0539 commit 639397d
Show file tree
Hide file tree
Showing 18 changed files with 944 additions and 202 deletions.
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ myst-parser>=1.0.0
Sphinx>=5.2.2
sphinx_rtd_theme>=1.0.0
wordcloud==1.9.3
rdflib>=6
2 changes: 2 additions & 0 deletions hed/errors/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class HedExceptions:

SCHEMA_DUPLICATE_NAMES = "SCHEMA_DUPLICATE_NAMES"

CANNOT_PARSE_RDF = "CANNOT_PARSE_RDF"


class HedFileError(Exception):
"""Exception raised when a file cannot be parsed due to being malformed, file IO, etc."""
Expand Down
84 changes: 71 additions & 13 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import json
import os

from hed.schema.hed_schema_constants import HedKey, HedSectionKey
from hed.schema import hed_schema_constants as constants
from hed.schema.schema_io import schema_util
from hed.schema.schema_io.schema2xml import Schema2XML
from hed.schema.schema_io.schema2wiki import Schema2Wiki
from hed.schema.schema_io.schema2owl import Schema2Owl
from hed.schema.schema_io.owl_constants import ext_to_format
from hed.schema.hed_schema_section import HedSchemaSection, HedSchemaTagSection, HedSchemaUnitClassSection
from hed.errors import ErrorHandler
from hed.errors.error_types import ValidationErrors
Expand Down Expand Up @@ -208,6 +211,11 @@ def valid_prefixes(self):
# ===============================================
# Creation and saving functions
# ===============================================

# todo: we may want to collapse these 6 functions into one like this
# def serialize(self, filename=None, save_merged=False, file_format=whatever is default):
# pass

def get_as_mediawiki_string(self, save_merged=False):
""" Return the schema to a mediawiki string.
Expand All @@ -222,6 +230,26 @@ def get_as_mediawiki_string(self, save_merged=False):
output_strings = Schema2Wiki.process_schema(self, save_merged)
return '\n'.join(output_strings)

def get_as_owl_string(self, save_merged=False, file_format="owl"):
""" Return the schema to a mediawiki string.
Parameters:
save_merged (bool): If true, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.
file_format(str or None): Override format from filename extension.
Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld")
Other values should work, but aren't as fully supported.
Returns:
str: The schema as a string in mediawiki format.
:raises rdflib.plugin.PluginException:
- Invalid format of file_format. Make sure you use a supported RDF format.
"""
if file_format == "owl":
file_format = "xml"
rdf_data = Schema2Owl.process_schema(self, save_merged)
return rdf_data.serialize(format=file_format)

def get_as_xml_string(self, save_merged=True):
""" Return the schema to an XML string.
Expand All @@ -234,39 +262,69 @@ def get_as_xml_string(self, save_merged=True):
"""
xml_tree = Schema2XML.process_schema(self, save_merged)
return schema_util._xml_element_2_str(xml_tree)
return schema_util.xml_element_2_str(xml_tree)

def save_as_mediawiki(self, filename=None, save_merged=False):
def save_as_mediawiki(self, filename, save_merged=False):
""" Save as mediawiki to a file.
filename: str
If present, move the resulting file to this location.
save location
save_merged: bool
If true, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.
Returns:
str: The newly created schema filename.
:raises OSError:
- File cannot be saved for some reason
"""
output_strings = Schema2Wiki.process_schema(self, save_merged)
local_wiki_file = schema_util.write_strings_to_file(output_strings, ".mediawiki")
return schema_util.move_file(local_wiki_file, filename)
with open(filename, mode='w', encoding='utf-8') as opened_file:
for string in output_strings:
opened_file.write(string)
opened_file.write('\n')

def save_as_xml(self, filename=None, save_merged=True):
def save_as_owl(self, filename, save_merged=False, file_format=None):
""" Save as json to a file.
filename: str
Save the file here
save_merged: bool
If true, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.
file_format(str or None): Required for owl formatted files other than the following:
.ttl: turtle
.owl: xml
.json-ld: json-ld
:raises OSError:
- File cannot be saved for some reason
:raises rdflib.plugin.PluginException:
- Invalid format of file_format. Make sure you use a supported RDF format.
"""
ext = os.path.splitext(filename.lower())[1]
if ext in ext_to_format and file_format is None:
file_format = ext_to_format[ext]
if file_format == "owl":
file_format = "xml"
rdf_data = Schema2Owl.process_schema(self, save_merged)
rdf_data.serialize(filename, format=file_format)

def save_as_xml(self, filename, save_merged=True):
""" Save as XML to a file.
filename: str
If present, move the resulting file to this location.
save location
save_merged: bool
If true, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.
Returns:
str: The name of the newly created schema file.
:raises OSError:
- File cannot be saved for some reason
"""
xml_tree = Schema2XML.process_schema(self, save_merged)
local_xml_file = schema_util.write_xml_tree_2_xml_file(xml_tree, ".xml")
return schema_util.move_file(local_xml_file, filename)
with open(filename, mode='w', encoding='utf-8') as opened_file:
xml_string = schema_util.xml_element_2_str(xml_tree)
opened_file.write(xml_string)

def set_schema_prefix(self, schema_namespace):
""" Set library namespace associated for this schema.
Expand Down
21 changes: 12 additions & 9 deletions hed/schema/hed_schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,7 @@ class HedKey:
Rooted = "rooted"
DeprecatedFrom = "deprecatedFrom"
ConversionFactor = "conversionFactor"

# All known properties
BoolProperty = 'boolProperty'
UnitClassProperty = 'unitClassProperty'
UnitProperty = 'unitProperty'
UnitModifierProperty = 'unitModifierProperty'
ValueClassProperty = 'valueClassProperty'
ElementProperty = 'elementProperty'
IsInheritedProperty = 'isInheritedProperty'
Reserved = "reserved"

SIUnit = 'SIUnit'
UnitSymbol = 'unitSymbol'
Expand All @@ -68,6 +60,17 @@ class HedKey:
# Node attributes
InLibrary = "inLibrary"

# All known properties
BoolProperty = 'boolProperty'
UnitClassProperty = 'unitClassProperty'
UnitProperty = 'unitProperty'
UnitModifierProperty = 'unitModifierProperty'
ValueClassProperty = 'valueClassProperty'
ElementProperty = 'elementProperty'
NodeProperty = 'nodeProperty'
IsInheritedProperty = 'isInheritedProperty'



VERSION_ATTRIBUTE = 'version'
LIBRARY_ATTRIBUTE = 'library'
Expand Down
10 changes: 9 additions & 1 deletion hed/schema/hed_schema_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ def __eq__(self, other):
# We only want to compare known attributes
self_attr = self.get_known_attributes()
other_attr = other.get_known_attributes()
if self_attr != other_attr:
# We can no longer be sure on the order of attribute values, since owl formatting has no order
if self_attr != other_attr and not self._compare_attributes_no_order(self_attr, other_attr):
return False
if self.description != other.description:
return False
Expand All @@ -135,6 +136,13 @@ def get_known_attributes(self):
return {key: value for key, value in self.attributes.items()
if not self._unknown_attributes or key not in self._unknown_attributes}

@staticmethod
def _compare_attributes_no_order(left, right):
left = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in left.items()}
right = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in right.items()}

return left == right


class UnitClassEntry(HedSchemaEntry):
""" A single unit class entry in the HedSchema. """
Expand Down
22 changes: 18 additions & 4 deletions hed/schema/hed_schema_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
import functools
from hed.schema.schema_io.xml2schema import SchemaLoaderXML
from hed.schema.schema_io.wiki2schema import SchemaLoaderWiki
from hed.schema.schema_io.owl2schema import SchemaLoaderOWL
from hed.schema import hed_cache

from hed.errors.exceptions import HedFileError, HedExceptions
from hed.schema.schema_io import schema_util
from hed.schema.hed_schema_group import HedSchemaGroup
from hed.schema.schema_validation_util import validate_version_string
from collections import defaultdict
from hed.schema.schema_io.owl_constants import ext_to_format


MAX_MEMORY_CACHE = 40
Expand All @@ -20,8 +22,10 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche
""" Create a schema from the given string.
Parameters:
schema_string (str): An XML or mediawiki file as a single long string.
schema_string (str): An XML, mediawiki or OWL, file as a single long string
schema_format (str): The schema format of the source schema string.
Allowed normal values: .mediawiki, .xml
Allowed owl values: xml, owl, pretty-xml, turtle (or any other value rdflib supports)
schema_namespace (str, None): The name_prefix all tags in this schema will accept.
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
Expand All @@ -45,6 +49,8 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche
hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema)
elif schema_format.endswith(".mediawiki"):
hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema)
elif schema_format:
hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format)
else:
raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=schema_format)

Expand All @@ -54,14 +60,18 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche
return hed_schema


def load_schema(hed_path=None, schema_namespace=None, schema=None):
def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None):
""" Load a schema from the given file or URL path.
Parameters:
hed_path (str or None): A filepath or url to open a schema from.
hed_path (str): A filepath or url to open a schema from.
schema_namespace (str or None): The name_prefix all tags in this schema will accept.
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
file_format(str or None): Required for owl formatted files other than the following:
.ttl: turtle
.owl: xml
.json-ld: json-ld
Returns:
HedSchema: The loaded schema.
Expand All @@ -76,11 +86,15 @@ def load_schema(hed_path=None, schema_namespace=None, schema=None):
raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file path passed to HedSchema.load_file",
filename=hed_path)

ext = os.path.splitext(hed_path.lower())[1]
is_url = hed_cache._check_if_url(hed_path)

if is_url:
file_as_string = schema_util.url_to_string(hed_path)
hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1])
elif ext in ext_to_format:
hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=ext_to_format[ext])
elif file_format:
hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=file_format)
elif hed_path.lower().endswith(".xml"):
hed_schema = SchemaLoaderXML.load(hed_path, schema=schema)
elif hed_path.lower().endswith(".mediawiki"):
Expand Down
1 change: 1 addition & 0 deletions hed/schema/hed_schema_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def _finalize_section(self, hed_schema):

class HedSchemaUnitClassSection(HedSchemaSection):
def _check_if_duplicate(self, name_key, new_entry):
"""Allow adding units to existing unit classes, using a placeholder one with no attributes."""
if name_key in self and len(new_entry.attributes) == 1\
and HedKey.InLibrary in new_entry.attributes:
return self.all_names[name_key]
Expand Down
10 changes: 7 additions & 3 deletions hed/schema/schema_io/base2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,20 @@ class SchemaLoader(ABC):
SchemaLoaderXML(filename) will load just the header_attributes
"""
def __init__(self, filename, schema_as_string=None, schema=None):
def __init__(self, filename, schema_as_string=None, schema=None, file_format=None):
"""Loads the given schema from one of the two parameters.
Parameters:
filename(str or None): A valid filepath or None
schema_as_string(str or None): A full schema as text or None
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
file_format(str or None): The format of this file if needed(only for owl currently)
"""
if schema_as_string and filename:
raise HedFileError(HedExceptions.BAD_PARAMETERS, "Invalid parameters to schema creation.",
filename)
self.file_format = file_format
self.filename = filename
self.schema_as_string = schema_as_string
self.appending_to_schema = False
Expand Down Expand Up @@ -68,19 +70,21 @@ def schema(self):
return self._schema

@classmethod
def load(cls, filename=None, schema_as_string=None, schema=None):
def load(cls, filename=None, schema_as_string=None, schema=None, file_format=None):
""" Loads and returns the schema, including partnered schema if applicable.
Parameters:
filename(str or None): A valid filepath or None
schema_as_string(str or None): A full schema as text or None
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
file_format(str or None): If this is an owl file being loaded, this is the format.
Allowed values include: turtle, json-ld, and owl(xml)
Returns:
schema(HedSchema): The new schema
"""
loader = cls(filename, schema_as_string, schema)
loader = cls(filename, schema_as_string, schema, file_format)
return loader._load()

def _load(self):
Expand Down
Loading

0 comments on commit 639397d

Please sign in to comment.