Skip to content

Commit

Permalink
Merge pull request #798 from IanCa/develop
Browse files Browse the repository at this point in the history
Add support for loading multiple library schemas with the same prefix
  • Loading branch information
VisLab authored Nov 15, 2023
2 parents 6454dd9 + 4f3dfad commit 65fad2b
Show file tree
Hide file tree
Showing 10 changed files with 351 additions and 63 deletions.
1 change: 1 addition & 0 deletions hed/errors/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class HedExceptions:
HED_SCHEMA_NODE_NAME_INVALID = 'HED_SCHEMA_NODE_NAME_INVALID'

SCHEMA_DUPLICATE_PREFIX = 'schemaDuplicatePrefix'
SCHEMA_DUPLICATE_LIBRARY = "SCHEMA_LIBRARY_INVALID"
BAD_COLUMN_NAMES = 'BAD_COLUMN_NAMES'


Expand Down
23 changes: 18 additions & 5 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,19 @@ def library(self):
Returns:
str: Library name if any.
"""
return self.header_attributes.get(constants.LIBRARY_ATTRIBUTE, "")

def can_save(self):
""" Returns if it's legal to save this schema.
You cannot save schemas loaded as merged from multiple library schemas.
Returns:
bool: True if this can be saved
"""
return not self.library or "," not in self.library

@property
def with_standard(self):
""" The version of the base schema this is extended from, if it exists..
Expand Down Expand Up @@ -738,10 +747,14 @@ def _get_attributes_for_section(self, key_class):
def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
# Add the InLibrary attribute to any library schemas as they are loaded
# These are later removed when they are saved out, if saving unmerged
if self.library and (not self.with_standard or (not self.merged and self.with_standard)):
# only add it if not already present - This is a rare case
if not new_entry.has_attribute(HedKey.InLibrary):
new_entry._set_attribute_value(HedKey.InLibrary, self.library)
# if self.library and (not self.with_standard or (not self.merged and self.with_standard)):
# # only add it if not already present - This is a rare case
# Todo ian: I think this should be moved up one level for parity with the other loading changes
# .library will be updated to potentially be a list
# Cannot save schema if .library is a list
#
# if not new_entry.has_attribute(HedKey.InLibrary):
# new_entry._set_attribute_value(HedKey.InLibrary, self.library)

section = self._sections[key_class]
return section._add_to_dict(long_tag_name, new_entry)
Expand Down
102 changes: 88 additions & 14 deletions hed/schema/hed_schema_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,21 @@
from hed.schema.schema_io import schema_util
from hed.schema.hed_schema_group import HedSchemaGroup
from hed.schema.schema_validation_util import validate_version_string
from collections import defaultdict


MAX_MEMORY_CACHE = 20
MAX_MEMORY_CACHE = 40


def from_string(schema_string, schema_format=".xml", schema_namespace=None):
def from_string(schema_string, schema_format=".xml", schema_namespace=None, schema=None):
""" Create a schema from the given string.
Parameters:
schema_string (str): An XML or mediawiki file as a single long string.
schema_format (str): The schema format of the source schema string.
schema_namespace (str, None): The name_prefix all tags in this schema will accept.
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
Returns:
(HedSchema): The loaded schema.
Expand All @@ -39,9 +42,9 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None):
filename=schema_string)

if schema_format.endswith(".xml"):
hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string)
hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema)
elif schema_format.endswith(".mediawiki"):
hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string)
hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema)
else:
raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=schema_format)

Expand All @@ -51,12 +54,14 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None):
return hed_schema


def load_schema(hed_path=None, schema_namespace=None):
def load_schema(hed_path=None, schema_namespace=None, schema=None):
""" Load a schema from the given file or URL path.
Parameters:
hed_path (str or None): A filepath or url to open a schema from.
schema_namespace (str or None): The name_prefix all tags in this schema will accept.
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
Returns:
HedSchema: The loaded schema.
Expand All @@ -77,9 +82,9 @@ def load_schema(hed_path=None, schema_namespace=None):
file_as_string = schema_util.url_to_string(hed_path)
hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1])
elif hed_path.lower().endswith(".xml"):
hed_schema = SchemaLoaderXML.load(hed_path)
hed_schema = SchemaLoaderXML.load(hed_path, schema=schema)
elif hed_path.lower().endswith(".mediawiki"):
hed_schema = SchemaLoaderWiki.load(hed_path)
hed_schema = SchemaLoaderWiki.load(hed_path, schema=schema)
else:
raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=hed_path)

Expand Down Expand Up @@ -111,7 +116,11 @@ def _load_schema_version(xml_version=None, xml_folder=None):
""" Return specified version or latest if not specified.
Parameters:
xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_]X.Y.Z'.
xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_][X.Y.Z]'
Further versions can be added comma separated after the version number/library name.
e.g. "lib:library,otherlibrary" will load "library" and "otherlibrary" into "lib:"
The schema namespace must be the same and not repeated if loading multiple merged schemas.
xml_folder (str): Path to a folder containing schema.
Returns:
Expand All @@ -124,10 +133,44 @@ def _load_schema_version(xml_version=None, xml_folder=None):
- The prefix is invalid
"""
schema_namespace = ""
library_name = None
if xml_version:
if ":" in xml_version:
schema_namespace, _, xml_version = xml_version.partition(":")

if xml_version:
xml_versions = xml_version.split(",")
# Add a blank entry if we have no xml version
else:
xml_versions = [""]

first_schema = _load_schema_version_sub(schema_namespace, xml_versions[0], xml_folder=xml_folder)
for version in xml_versions[1:]:
_load_schema_version_sub(schema_namespace, version, xml_folder=xml_folder, schema=first_schema)
return first_schema


def _load_schema_version_sub(schema_namespace="", xml_version=None, xml_folder=None, schema=None):
""" Return specified version or latest if not specified.
Parameters:
xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_][X.Y.Z]'
xml_folder (str): Path to a folder containing schema.
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
Returns:
HedSchema: The requested HedSchema object.
:raises HedFileError:
- The xml_version is not valid.
- The specified version cannot be found or loaded
- Other fatal errors loading the schema (These are unlikely if you are not editing them locally)
- The prefix is invalid
"""
library_name = None

if xml_version:
if "_" in xml_version:
library_name, _, xml_version = xml_version.rpartition("_")
elif validate_version_string(xml_version):
Expand All @@ -138,7 +181,7 @@ def _load_schema_version(xml_version=None, xml_folder=None):
if not final_hed_xml_file:
hed_cache.cache_local_versions(xml_folder)
final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder)
hed_schema = load_schema(final_hed_xml_file)
hed_schema = load_schema(final_hed_xml_file, schema=schema)
except HedFileError as e:
if e.code == HedExceptions.FILE_NOT_FOUND:
hed_cache.cache_xml_versions(cache_folder=xml_folder)
Expand All @@ -147,7 +190,7 @@ def _load_schema_version(xml_version=None, xml_folder=None):
raise HedFileError(HedExceptions.FILE_NOT_FOUND,
f"HED version '{xml_version}' not found in cache: {hed_cache.get_cache_directory()}",
filename=xml_folder)
hed_schema = load_schema(final_hed_xml_file)
hed_schema = load_schema(final_hed_xml_file, schema=schema)
else:
raise e

Expand All @@ -158,14 +201,14 @@ def _load_schema_version(xml_version=None, xml_folder=None):


def load_schema_version(xml_version=None, xml_folder=None):
""" Return a HedSchema or HedSchemaGroup extracted from xml_version field.
""" Return a HedSchema or HedSchemaGroup extracted from xml_version
Parameters:
xml_version (str or list or None): List or str specifying which official HED schemas to use.
An empty string returns the latest version
A json str format is also supported,
based on the output of HedSchema.get_formatted_version
Basic format: '[schema_namespace:][library_name_]X.Y.Z'.
Basic format: '[schema_namespace:][library_name_][X.Y.Z]'.
xml_folder (str): Path to a folder containing schema.
Returns:
Expand All @@ -185,10 +228,41 @@ def load_schema_version(xml_version=None, xml_folder=None):
except json.decoder.JSONDecodeError as e:
raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), xml_version) from e
if xml_version and isinstance(xml_version, list):
schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in xml_version]
xml_versions = parse_version_list(xml_version)
schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in xml_versions.values()]
if len(schemas) == 1:
return schemas[0]

return HedSchemaGroup(schemas)
else:
return _load_schema_version(xml_version=xml_version, xml_folder=xml_folder)


def parse_version_list(xml_version_list):
"""Takes a list of xml versions and returns a dictionary split by prefix
e.g. ["score", "testlib"] will return {"": "score, testlib"}
e.g. ["score", "testlib", "ol:otherlib"] will return {"": "score, testlib", "ol:": "otherlib"}
Parameters:
xml_version_list (list): List of str specifying which hed schemas to use
Returns:
HedSchema or HedSchemaGroup: The schema or schema group extracted.
"""
out_versions = defaultdict(list)
for version in xml_version_list:
schema_namespace = ""
if version and ":" in version:
schema_namespace, _, version = version.partition(":")

if version is None:
version = ""
if version in out_versions[schema_namespace]:
raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_LIBRARY, f"Attempting to load the same library '{version}' twice: {out_versions[schema_namespace]}",
filename=None)
out_versions[schema_namespace].append(version)

out_versions = {key: ",".join(value) if not key else f"{key}:" + ",".join(value) for key, value in out_versions.items()}

return out_versions
2 changes: 1 addition & 1 deletion hed/schema/schema_attribute_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def in_library_check(hed_schema, tag_entry, attribute_name):
issues = []

library = tag_entry.attributes.get(attribute_name, "")
if hed_schema.library != library:
if library not in hed_schema.library.split(","):
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID,
tag_entry.name,
library)
Expand Down
52 changes: 43 additions & 9 deletions hed/schema/schema_io/base2schema.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import copy
from hed.errors.exceptions import HedFileError, HedExceptions
from hed.schema import HedSchema
from hed.schema.hed_schema_constants import HedKey
from abc import abstractmethod, ABC
from hed.schema import schema_validation_util
from hed.schema import hed_schema_constants


class SchemaLoader(ABC):
Expand All @@ -12,20 +14,21 @@ class SchemaLoader(ABC):
SchemaLoaderXML(filename) will load just the header_attributes
"""
def __init__(self, filename, schema_as_string=None):
def __init__(self, filename, schema_as_string=None, schema=None):
"""Loads the given schema from one of the two parameters.
Parameters:
filename(str or None): A valid filepath or None
schema_as_string(str or None): A full schema as text or None
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
"""
if schema_as_string and filename:
raise HedFileError(HedExceptions.BAD_PARAMETERS, "Invalid parameters to schema creation.",
filename)

self.filename = filename
self.schema_as_string = schema_as_string

self.appending_to_schema = False
try:
self.input_data = self._open_file()
except OSError as e:
Expand All @@ -34,11 +37,28 @@ def __init__(self, filename, schema_as_string=None):
raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), filename)
except ValueError as e:
raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), filename)

self._schema = HedSchema()
self._schema.filename = filename

# self._schema.filename = filename
hed_attributes = self._get_header_attributes(self.input_data)
schema_validation_util.validate_attributes(hed_attributes, filename=self.filename)

withStandard = hed_attributes.get(hed_schema_constants.WITH_STANDARD_ATTRIBUTE, "")
self.library = hed_attributes.get(hed_schema_constants.LIBRARY_ATTRIBUTE, "")
if not schema:
self._schema = HedSchema()
else:
self._schema = schema
self.appending_to_schema = True
if not self._schema.with_standard:
raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_PREFIX,
"Trying to load multiple normal schemas as a merged one with the same namespace. "
"Ensure schemas have the withStandard header attribute set",
self.filename)
elif withStandard != self._schema.with_standard:
raise HedFileError(HedExceptions.BAD_WITH_STANDARD_VERSION,
"When merging two schemas without a schema namespace, you they must have the same withStandard value.", self.filename)
hed_attributes[hed_schema_constants.LIBRARY_ATTRIBUTE] = self._schema.library + f",{self.library}"
self._schema.filename = filename
self._schema.header_attributes = hed_attributes
self._loading_merged = False

Expand All @@ -48,16 +68,19 @@ def schema(self):
return self._schema

@classmethod
def load(cls, filename=None, schema_as_string=None):
def load(cls, filename=None, schema_as_string=None, schema=None):
""" Loads and returns the schema, including partnered schema if applicable.
Parameters:
filename(str or None): A valid filepath or None
schema_as_string(str or None): A full schema as text or None
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
Returns:
schema(HedSchema): The new schema
"""
loader = cls(filename, schema_as_string)
loader = cls(filename, schema_as_string, schema)
return loader._load()

def _load(self):
Expand All @@ -68,7 +91,7 @@ def _load(self):
"""
self._loading_merged = True
# Do a full load of the standard schema if this is a partnered schema
if self._schema.with_standard and not self._schema.merged:
if not self.appending_to_schema and self._schema.with_standard and not self._schema.merged:
from hed.schema.hed_schema_io import load_schema_version
saved_attr = self._schema.header_attributes
try:
Expand Down Expand Up @@ -102,3 +125,14 @@ def _get_header_attributes(self, input_data):
def _parse_data(self):
"""Puts the input data into the new schema"""
pass

def _add_to_dict_base(self, entry, key_class):
if not entry.has_attribute(HedKey.InLibrary) and self.appending_to_schema and self._schema.merged:
return None

if self.library and (not self._schema.with_standard or (not self._schema.merged and self._schema.with_standard)):
# only add it if not already present - This is a rare case
if not entry.has_attribute(HedKey.InLibrary):
entry._set_attribute_value(HedKey.InLibrary, self.library)

return self._schema._add_tag_to_dict(entry.name, entry, key_class)
5 changes: 5 additions & 0 deletions hed/schema/schema_io/schema2base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Baseclass for mediawiki/xml writers"""
from hed.schema.hed_schema_constants import HedSectionKey, HedKey
from hed.errors.exceptions import HedFileError, HedExceptions


class Schema2Base:
Expand Down Expand Up @@ -29,6 +30,10 @@ def process_schema(cls, hed_schema, save_merged=False):
Varies based on inherited class
"""
if not hed_schema.can_save():
raise HedFileError(HedExceptions.SCHEMA_LIBRARY_INVALID,
"Cannot save a schema merged from multiple library schemas",
hed_schema.filename)
saver = cls()
saver._save_lib = False
saver._save_base = False
Expand Down
Loading

0 comments on commit 65fad2b

Please sign in to comment.