Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validation for Onthology #1

Merged
merged 13 commits into from
Oct 20, 2023
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ classifiers = [
dependencies = [
"jsonschema>=4.4.0",
"fastjsonschema>=2.16.2",
"raillabel>=3.1.0"
"raillabel>=3.1.0",
"pyyaml>=6.0.0"
]

[project.urls]
Expand Down
1 change: 1 addition & 0 deletions raillabel_providerkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from . import format
from .convert import loader_classes
from .convert.convert import convert
from .validation.validate import validate

try:
__version__ = metadata.version("raillabel-providerkit")
Expand Down
6 changes: 6 additions & 0 deletions raillabel_providerkit/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,9 @@ class SchemaError(Exception):
"""Raised when the data does not validate against a given schema."""

__module__ = "raillabel_providerkit"


class OnthologySchemaError(Exception):
"""Raised when the .yaml-file provided is not valid against the schema."""

__module__ = "raillabel_providerkit"
2 changes: 1 addition & 1 deletion raillabel_providerkit/format/understand_ai/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def to_raillabel(self) -> dict:

def _get_subschema_version(self) -> str:
RAILLABEL_SCHEMA_PATH = (
Path(__file__).parent.parent.parent / "validate" / "raillabel_schema.json"
Path(__file__).parent.parent.parent / "format" / "raillabel_schema.json"
)

with RAILLABEL_SCHEMA_PATH.open() as schema_file:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
"""Package for validating raillabel data regarding the format requirements."""

from .validate_onthology.validate_onthology import validate_onthology
35 changes: 35 additions & 0 deletions raillabel_providerkit/validation/validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from pathlib import Path

import raillabel

from . import validate_onthology


def validate(scene: raillabel.Scene, onthology: t.Union[dict, Path]) -> t.List[str]:
"""Validate a scene based on the Deutsche Bahn Requirements.

Parameters
----------
scene : raillabel.Scene
The scene containing the annotations.
onthology : dict or Path
Onthology YAML-data or file containing a information about all classes and their
attributes. The onthology must adhere to the onthology_schema. If a path is provided, the
file is loaded as a YAML.

Returns
-------
list[str]
list of all requirement errors in the scene. If an empty list is returned, then there are
no errors present and the scene is valid.
"""

errors = []

errors += validate_onthology(scene, onthology)

return errors
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
"""Package for validating a scene via an onthology."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import abc
import typing as t
from dataclasses import dataclass
from importlib import import_module
from inspect import isclass
from pathlib import Path
from pkgutil import iter_modules


@dataclass
class _Attribute(abc.ABC):
@classmethod
@abc.abstractmethod
def supports(cls, data_dict: dict) -> bool:
raise NotImplementedError

@classmethod
@abc.abstractmethod
def fromdict(cls, data_dict: dict) -> t.Type["_Attribute"]:
raise NotImplementedError

@abc.abstractmethod
def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
raise NotImplementedError


def attribute_classes() -> t.List[t.Type[_Attribute]]:
"""Return dictionary with Attribute child classes."""
return ATTRIBUTE_CLASSES


def _collect_attribute_classes():
"""Collect attribute child classes and store them."""

global ATTRIBUTE_CLASSES

package_dir = str(Path(__file__).resolve().parent)
for (_, module_name, _) in iter_modules([package_dir]):

module = import_module(
f"raillabel_providerkit.validation.validate_onthology._onthology_classes._attributes.{module_name}"
)
for class_name in dir(module):
class_ = getattr(module, class_name)

if isclass(class_) and issubclass(class_, _Attribute) and class_ != _Attribute:
ATTRIBUTE_CLASSES.append(class_)


ATTRIBUTE_CLASSES = []
_collect_attribute_classes()
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _BooleanAttribute(_Attribute):
@classmethod
def supports(cls, data_dict: dict):
return data_dict == "boolean"

@classmethod
def fromdict(cls, data_dict: dict):
return _BooleanAttribute()

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
errors = []

if type(attribute_value) != bool:
errors.append(
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'bool')."
)

return errors
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _IntegerAttribute(_Attribute):
@classmethod
def supports(cls, data_dict: dict):
return data_dict == "integer"

@classmethod
def fromdict(cls, data_dict: dict):
return _IntegerAttribute()

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
errors = []

if type(attribute_value) != int:
errors.append(
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'int')."
)

return errors
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _MultiSelectAttribute(_Attribute):

options: t.Set[str]

@classmethod
def supports(cls, data_dict: dict):
return (
type(data_dict) == dict and "type" in data_dict and data_dict["type"] == "multi-select"
)

@classmethod
def fromdict(cls, data_dict: dict):
return _MultiSelectAttribute(options=set(data_dict["options"]))

def check(self, attribute_name: str, attribute_values, annotation_id: str) -> t.List[str]:

if type(attribute_values) != list:
return [
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_values.__class__.__name__}' (should be 'list')."
]

for attribute_value in attribute_values:
if attribute_value not in self.options:
return [
f"Attribute '{attribute_name}' of annotation {annotation_id} has an undefined "
+ f"value '{attribute_value}' (defined options: {self._stringify_options()})."
]

return []

def _stringify_options(self) -> str:
options_str = ""

for option in sorted(list(self.options)):
options_str += f"'{option}', "

if options_str != "":
options_str = options_str[:-2]

return options_str
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _SingleSelectAttribute(_Attribute):

options: t.Set[str]

@classmethod
def supports(cls, data_dict: dict):
return (
type(data_dict) == dict and "type" in data_dict and data_dict["type"] == "single-select"
)

@classmethod
def fromdict(cls, data_dict: dict):
return _SingleSelectAttribute(options=set(data_dict["options"]))

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:

if type(attribute_value) != str:
return [
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'str')."
]

if attribute_value not in self.options:
return [
f"Attribute '{attribute_name}' of annotation {annotation_id} has an undefined "
+ f"value '{attribute_value}' (defined options: {self._stringify_options()})."
]

return []

def _stringify_options(self) -> str:
options_str = ""

for option in sorted(list(self.options)):
options_str += f"'{option}', "

if options_str != "":
options_str = options_str[:-2]

return options_str
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _StringAttribute(_Attribute):
@classmethod
def supports(cls, data_dict: dict):
return data_dict == "string"

@classmethod
def fromdict(cls, data_dict: dict):
return _StringAttribute()

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
errors = []

if type(attribute_value) != str:
errors.append(
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'str')."
)

return errors
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _VectorAttribute(_Attribute):
@classmethod
def supports(cls, data_dict: dict):
return data_dict == "vector"

@classmethod
def fromdict(cls, data_dict: dict):
return _VectorAttribute()

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
errors = []

if type(attribute_value) != list:
errors.append(
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'list')."
)

return errors
Loading