From 817663f25d8cab97310b861f81eaaee2849ea9b5 Mon Sep 17 00:00:00 2001 From: mferrera Date: Mon, 13 Jan 2025 08:36:51 +0100 Subject: [PATCH] MAINT: Relocate all schema classes together --- src/fmu/dataio/_definitions.py | 85 +------- src/fmu/dataio/_models/_schema_base.py | 189 ++++++++++++++++++ .../dataio/_models/fmu_results/fmu_results.py | 139 ++++++------- .../_models/products/inplace_volumes.py | 50 +---- tests/test_schema/test_schemabase.py | 2 +- tests/test_schema/test_schemas_up_to_date.py | 2 +- tests/test_units/test_metadata_class.py | 3 +- tools/update-schema | 2 +- 8 files changed, 258 insertions(+), 214 deletions(-) create mode 100644 src/fmu/dataio/_models/_schema_base.py diff --git a/src/fmu/dataio/_definitions.py b/src/fmu/dataio/_definitions.py index bc3931bb7..edaaea3d1 100644 --- a/src/fmu/dataio/_definitions.py +++ b/src/fmu/dataio/_definitions.py @@ -2,13 +2,8 @@ from __future__ import annotations -import os -from abc import ABC, abstractmethod from enum import Enum -from pathlib import Path -from typing import Any, Final - -SOURCE: Final = "fmu" +from typing import Final class ValidationError(ValueError, KeyError): @@ -19,84 +14,6 @@ class ConfigurationError(ValueError): pass -class FmuSchemas: - """These URLs can be constructed programmatically from radixconfig.yaml if need be: - - {cfg.components[].name}-{cfg.metadata.name}-{spec.environments[].name} - - As they are unlikely to change they are hardcoded here. - """ - - DEV_URL: Final[str] = "https://main-fmu-schemas-dev.radix.equinor.com" - PROD_URL: Final[str] = "https://main-fmu-schemas-prod.radix.equinor.com" - PATH: Final[Path] = Path("schemas") - - -class SchemaBase(ABC): - VERSION: str - """The current version of the schema.""" - - FILENAME: str - """The filename, i.e. schema.json.""" - - PATH: Path - """The on-disk _and_ URL path following the domain, i.e: - - schemas/0.1.0/schema.json - - This path should _always_ have `FmuSchemas.PATH` as its first parent. - This determines the on-disk and URL location of this schema file. A - trivial example is: - - PATH: Path = FmuSchemas.PATH / VERSION / FILENAME - - """ - - @classmethod - def __init_subclass__(cls, **kwargs: dict[str, Any]) -> None: - super().__init_subclass__(**kwargs) - for attr in ("VERSION", "FILENAME", "PATH"): - if not hasattr(cls, attr): - raise TypeError(f"Subclass {cls.__name__} must define '{attr}'") - - if not cls.PATH.parts[0].startswith(str(FmuSchemas.PATH)): - raise ValueError( - f"PATH must start with `FmuSchemas.PATH`: {FmuSchemas.PATH}. " - f"Got {cls.PATH}" - ) - - @classmethod - def url(cls) -> str: - """Returns the URL this file will reside at, based upon class variables set here - and in FmuSchemas.""" - DEV_URL = f"{FmuSchemas.DEV_URL}/{cls.PATH}" - PROD_URL = f"{FmuSchemas.PROD_URL}/{cls.PATH}" - - if os.environ.get("SCHEMA_RELEASE", False): - return PROD_URL - return DEV_URL - - @staticmethod - @abstractmethod - def dump() -> dict[str, Any]: - """ - Dumps the export root model to JSON format for schema validation and - usage in FMU data structures. - - To update the schema: - 1. Run the following CLI command to dump the updated schema: - `./tools/update_schema`. - 2. Check the diff for changes. Adding fields usually indicates non-breaking - changes and is generally safe. However, if fields are removed, it could - indicate breaking changes that may affect dependent systems. Perform a - quality control (QC) check to ensure these changes do not break existing - implementations. - If changes are satisfactory and do not introduce issues, commit - them to maintain schema consistency. - """ - raise NotImplementedError - - class ValidFormats(Enum): surface = { "irap_binary": ".gri", diff --git a/src/fmu/dataio/_models/_schema_base.py b/src/fmu/dataio/_models/_schema_base.py new file mode 100644 index 000000000..d199401c2 --- /dev/null +++ b/src/fmu/dataio/_models/_schema_base.py @@ -0,0 +1,189 @@ +from __future__ import annotations + +import os +from abc import ABC, abstractmethod +from pathlib import Path +from typing import ( + Any, + Dict, + Final, + List, + Literal, + Mapping, + TypeVar, +) + +from pydantic.json_schema import GenerateJsonSchema + +T = TypeVar("T", Dict, List, object) + + +class FmuSchemas: + """These URLs can be constructed programmatically from radixconfig.yaml if need be: + + {cfg.components[].name}-{cfg.metadata.name}-{spec.environments[].name} + + As they are unlikely to change they are hardcoded here. + """ + + DEV_URL: Final[str] = "https://main-fmu-schemas-dev.radix.equinor.com" + PROD_URL: Final[str] = "https://main-fmu-schemas-prod.radix.equinor.com" + PATH: Final[Path] = Path("schemas") + + +class BaseGenerateJsonSchema(GenerateJsonSchema): + """Implements a schema generator so that some additional fields may be + added. + + This class also collects static methods used to transform the default OpenAPI + schemas generated by Pydantic into schemas compatible with JSON Schema specs.""" + + @staticmethod + def remove_discriminator_mapping(data: T) -> T: + """ + Removes entries with key ["discriminator"]["mapping"] from the schema. This + adjustment is necessary because JSON Schema does not recognize this value + while OpenAPI does. + """ + + if isinstance(data, dict): + if "discriminator" in data and isinstance(data["discriminator"], dict): + data["discriminator"].pop("mapping", None) + + for key, value in data.items(): + data[key] = BaseGenerateJsonSchema.remove_discriminator_mapping(value) + + elif isinstance(data, list): + for index, element in enumerate(data): + data[index] = BaseGenerateJsonSchema.remove_discriminator_mapping( + element + ) + + return data + + @staticmethod + def remove_format_path(data: T) -> T: + """ + Removes entries with key ["format"] = "path" from the schema. This + adjustment is necessary because JSON Schema does not recognize the "format": + "path", while OpenAPI does. This function is used in contexts where OpenAPI + specifications are not applicable. + """ + + if isinstance(data, dict): + return { + k: BaseGenerateJsonSchema.remove_format_path(v) + for k, v in data.items() + if not (k == "format" and v == "path") + } + + if isinstance(data, list): + return [ + BaseGenerateJsonSchema.remove_format_path(element) for element in data + ] + + return data + + def generate( + self, + schema: Mapping[str, Any], + mode: Literal["validation", "serialization"] = "validation", + ) -> dict[str, Any]: + json_schema = super().generate(schema, mode=mode) + + json_schema = BaseGenerateJsonSchema.remove_discriminator_mapping(json_schema) + json_schema = BaseGenerateJsonSchema.remove_format_path(json_schema) + json_schema["$schema"] = self.schema_dialect + + return json_schema + + +class SchemaBase(ABC): + VERSION: str + """The current version of the schema.""" + + FILENAME: str + """The filename, i.e. schema.json.""" + + PATH: Path + """The on-disk _and_ URL path following the domain, i.e: + + schemas/0.1.0/schema.json + + This path should _always_ have `FmuSchemas.PATH` as its first parent. + This determines the on-disk and URL location of this schema file. A + trivial example is: + + PATH: Path = FmuSchemas.PATH / VERSION / FILENAME + + """ + + @classmethod + def __init_subclass__(cls, **kwargs: dict[str, Any]) -> None: + super().__init_subclass__(**kwargs) + for attr in ("VERSION", "FILENAME", "PATH"): + if not hasattr(cls, attr): + raise TypeError(f"Subclass {cls.__name__} must define '{attr}'") + + if not cls.PATH.parts[0].startswith(str(FmuSchemas.PATH)): + raise ValueError( + f"PATH must start with `FmuSchemas.PATH`: {FmuSchemas.PATH}. " + f"Got {cls.PATH}" + ) + + @classmethod + def url(cls) -> str: + """Returns the URL this file will reside at, based upon class variables set here + and in FmuSchemas.""" + DEV_URL = f"{FmuSchemas.DEV_URL}/{cls.PATH}" + PROD_URL = f"{FmuSchemas.PROD_URL}/{cls.PATH}" + + if os.environ.get("SCHEMA_RELEASE", False): + return PROD_URL + return DEV_URL + + @classmethod + def default_generator(cls) -> type[GenerateJsonSchema]: + """Provides a default schema generator that should be adequate for most simple + schemas. + + When more customization is required a separate schema generator may be + warranted. See the 'FmuResults' model for how this can be done.""" + + class DefaultGenerateJsonSchema(BaseGenerateJsonSchema): + """Implements a schema generator so that some additional fields may be + added.""" + + def generate( + self, + schema: Mapping[str, Any], + mode: Literal["validation", "serialization"] = "validation", + ) -> dict[str, Any]: + json_schema = super().generate(schema, mode=mode) + + json_schema["$id"] = cls.url() + json_schema["version"] = cls.VERSION + + return json_schema + + return DefaultGenerateJsonSchema + + @classmethod + @abstractmethod + def dump(cls) -> dict[str, Any]: + """ + Dumps the export root model to JSON format for schema validation and + usage in FMU data structures. + + To update the schema: + 1. Run the following CLI command to dump the updated schema: + `./tools/update_schema --diff`. + 2. Check the diff for changes. Adding fields usually indicates non-breaking + changes and is generally safe. However, if fields are removed, it could + indicate breaking changes that may affect dependent systems. Perform a + quality control (QC) check to ensure these changes do not break existing + implementations. + If changes are satisfactory and do not introduce issues, commit + them to maintain schema consistency. + """ + raise NotImplementedError diff --git a/src/fmu/dataio/_models/fmu_results/fmu_results.py b/src/fmu/dataio/_models/fmu_results/fmu_results.py index a28c51b02..27db5312b 100644 --- a/src/fmu/dataio/_models/fmu_results/fmu_results.py +++ b/src/fmu/dataio/_models/fmu_results/fmu_results.py @@ -1,7 +1,7 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, Dict, List, Literal, TypeVar, Union +from typing import TYPE_CHECKING, Literal, Union from pydantic import ( BaseModel, @@ -10,10 +10,13 @@ RootModel, model_validator, ) -from pydantic.json_schema import GenerateJsonSchema from typing_extensions import Annotated -from fmu.dataio._definitions import SOURCE, FmuSchemas, SchemaBase +from fmu.dataio._models._schema_base import ( + BaseGenerateJsonSchema, + FmuSchemas, + SchemaBase, +) from .data import AnyData from .enums import FMUClass @@ -35,8 +38,6 @@ from pydantic_core import CoreSchema -T = TypeVar("T", Dict, List, object) - class FmuResultsSchema(SchemaBase): """The main metadata export describing the results.""" @@ -45,92 +46,68 @@ class FmuResultsSchema(SchemaBase): FILENAME: str = "fmu_results.json" PATH: Path = FmuSchemas.PATH / VERSION / FILENAME - class FmuResultsGenerateJsonSchema(GenerateJsonSchema): - contractual: Final[list[str]] = [ - "access", - "class", - "data.alias", - "data.bbox", - "data.content", - "data.format", - "data.geometry", - "data.grid_model", - "data.is_observation", - "data.is_prediction", - "data.name", - "data.offset", - "data.product.name", - "data.seismic.attribute", - "data.spec.columns", - "data.stratigraphic", - "data.stratigraphic_alias", - "data.tagname", - "data.time", - "data.vertical_domain", - "file.checksum_md5", - "file.relative_path", - "file.size_bytes", - "fmu.aggregation.operation", - "fmu.aggregation.realization_ids", - "fmu.case", - "fmu.context.stage", - "fmu.iteration.name", - "fmu.iteration.uuid", - "fmu.model", - "fmu.realization.id", - "fmu.realization.is_reference", - "fmu.realization.name", - "fmu.realization.uuid", - "fmu.workflow", - "masterdata", - "source", - "tracklog.datetime", - "tracklog.event", - "tracklog.user.id", - "version", - ] - - def _remove_format_path(self, obj: T) -> T: - """ - Removes entries with key "format" and value "path" from dictionaries. This - adjustment is necessary because JSON Schema does not recognize the "format": - "path", while OpenAPI does. This function is used in contexts where OpenAPI - specifications are not applicable. - """ - - if isinstance(obj, dict): - return { - k: self._remove_format_path(v) - for k, v in obj.items() - if not (k == "format" and v == "path") - } - - if isinstance(obj, list): - return [self._remove_format_path(element) for element in obj] - - return obj + SOURCE: str = "fmu" + CONTRACTUAL: Final[list[str]] = [ + "access", + "class", + "data.alias", + "data.bbox", + "data.content", + "data.format", + "data.geometry", + "data.grid_model", + "data.is_observation", + "data.is_prediction", + "data.name", + "data.offset", + "data.product.name", + "data.seismic.attribute", + "data.spec.columns", + "data.stratigraphic", + "data.stratigraphic_alias", + "data.tagname", + "data.time", + "data.vertical_domain", + "file.checksum_md5", + "file.relative_path", + "file.size_bytes", + "fmu.aggregation.operation", + "fmu.aggregation.realization_ids", + "fmu.case", + "fmu.context.stage", + "fmu.iteration.name", + "fmu.iteration.uuid", + "fmu.model", + "fmu.realization.id", + "fmu.realization.is_reference", + "fmu.realization.name", + "fmu.realization.uuid", + "fmu.workflow", + "masterdata", + "source", + "tracklog.datetime", + "tracklog.event", + "tracklog.user.id", + "version", + ] + class FmuResultsGenerateJsonSchema(BaseGenerateJsonSchema): def generate( self, schema: Mapping[str, Any], mode: Literal["validation", "serialization"] = "validation", ) -> dict[str, Any]: json_schema = super().generate(schema, mode=mode) - json_schema["$schema"] = self.schema_dialect - json_schema["$id"] = FmuResultsSchema.url() - json_schema["$contractual"] = self.contractual - # sumo-core's validator does not recognize these. - del json_schema["discriminator"]["mapping"] - del json_schema["$defs"]["AnyData"]["discriminator"]["mapping"] - del json_schema["$defs"]["AnyProduct"]["discriminator"]["mapping"] + json_schema["$id"] = FmuResultsSchema.url() + json_schema["$contractual"] = FmuResultsSchema.CONTRACTUAL - return self._remove_format_path(json_schema) + return json_schema - @staticmethod - def dump() -> dict[str, Any]: + @classmethod + def dump(cls) -> dict[str, Any]: return FmuResults.model_json_schema( - schema_generator=FmuResultsSchema.FmuResultsGenerateJsonSchema + schema_generator=cls.FmuResultsGenerateJsonSchema ) @@ -148,7 +125,7 @@ class MetadataBase(BaseModel): """The ``tracklog`` block contains a record of events recorded on these data. See :class:`Tracklog`.""" - source: str = SOURCE + source: str = Field(default=FmuResultsSchema.SOURCE) """The source of this data. Defaults to 'fmu'.""" version: str = Field(default=FmuResultsSchema.VERSION) diff --git a/src/fmu/dataio/_models/products/inplace_volumes.py b/src/fmu/dataio/_models/products/inplace_volumes.py index 2b7cdbe13..c2045961d 100644 --- a/src/fmu/dataio/_models/products/inplace_volumes.py +++ b/src/fmu/dataio/_models/products/inplace_volumes.py @@ -1,18 +1,15 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, Dict, List, Literal, Optional, TypeVar +from typing import TYPE_CHECKING, List, Optional from pydantic import BaseModel, Field, RootModel -from pydantic.json_schema import GenerateJsonSchema -from fmu.dataio._definitions import FmuSchemas, SchemaBase +from fmu.dataio._models._schema_base import FmuSchemas, SchemaBase from fmu.dataio.export._enums import InplaceVolumes if TYPE_CHECKING: - from typing import Any, Mapping - -T = TypeVar("T", Dict, List, object) + from typing import Any class InplaceVolumesResultRow(BaseModel): @@ -53,43 +50,8 @@ class InplaceVolumesSchema(SchemaBase): FILENAME: str = "inplace_volumes.json" PATH: Path = FmuSchemas.PATH / "file_formats" / VERSION / FILENAME - class InplaceVolumesGenerateJsonSchema(GenerateJsonSchema): - """Implements a schema generator so that some additional fields may be added.""" - - def _remove_format_path(self, obj: T) -> T: - """ - Removes entries with key "format" and value "path" from dictionaries. This - adjustment is necessary because JSON Schema does not recognize the "format": - "path", while OpenAPI does. This function is used in contexts where OpenAPI - specifications are not applicable. - """ - - if isinstance(obj, dict): - return { - k: self._remove_format_path(v) - for k, v in obj.items() - if not (k == "format" and v == "path") - } - - if isinstance(obj, list): - return [self._remove_format_path(element) for element in obj] - - return obj - - def generate( - self, - schema: Mapping[str, Any], - mode: Literal["validation", "serialization"] = "validation", - ) -> dict[str, Any]: - json_schema = super().generate(schema, mode=mode) - json_schema["$schema"] = self.schema_dialect - json_schema["$id"] = InplaceVolumesSchema.url() - json_schema["version"] = InplaceVolumesSchema.VERSION - - return json_schema - - @staticmethod - def dump() -> dict[str, Any]: + @classmethod + def dump(cls) -> dict[str, Any]: return InplaceVolumesResult.model_json_schema( - schema_generator=InplaceVolumesSchema.InplaceVolumesGenerateJsonSchema + schema_generator=cls.default_generator() ) diff --git a/tests/test_schema/test_schemabase.py b/tests/test_schema/test_schemabase.py index b09772bad..ccf1658e5 100644 --- a/tests/test_schema/test_schemabase.py +++ b/tests/test_schema/test_schemabase.py @@ -4,7 +4,7 @@ import pytest -from fmu.dataio._definitions import FmuSchemas, SchemaBase +from fmu.dataio._models._schema_base import FmuSchemas, SchemaBase def test_schemabase_validates_class_vars() -> None: diff --git a/tests/test_schema/test_schemas_up_to_date.py b/tests/test_schema/test_schemas_up_to_date.py index 8648b78fd..1094ddebc 100644 --- a/tests/test_schema/test_schemas_up_to_date.py +++ b/tests/test_schema/test_schemas_up_to_date.py @@ -6,8 +6,8 @@ import pytest from pytest import MonkeyPatch -from fmu.dataio._definitions import FmuSchemas, SchemaBase from fmu.dataio._models import schemas +from fmu.dataio._models._schema_base import FmuSchemas, SchemaBase def contains_discriminator_mapping(schema: Any) -> bool: diff --git a/tests/test_units/test_metadata_class.py b/tests/test_units/test_metadata_class.py index 9acd8b33c..c8f8f8d93 100644 --- a/tests/test_units/test_metadata_class.py +++ b/tests/test_units/test_metadata_class.py @@ -6,7 +6,6 @@ import pytest import fmu.dataio as dio -from fmu.dataio._definitions import SOURCE from fmu.dataio._metadata import generate_export_metadata from fmu.dataio._models.fmu_results import FmuResultsSchema, enums from fmu.dataio._models.fmu_results.fields import ( @@ -32,7 +31,7 @@ def test_metadata_dollars(edataobj1, regsurf): assert mymeta["version"] == FmuResultsSchema.VERSION assert mymeta["$schema"] == FmuResultsSchema.url() - assert mymeta["source"] == SOURCE + assert mymeta["source"] == FmuResultsSchema.SOURCE # -------------------------------------------------------------------------------------- diff --git a/tools/update-schema b/tools/update-schema index 4b969d02c..7c5857f54 100755 --- a/tools/update-schema +++ b/tools/update-schema @@ -23,8 +23,8 @@ from copy import deepcopy from pathlib import Path from typing import Any, Dict, List, TypeVar -from fmu.dataio._definitions import SchemaBase from fmu.dataio._models import schemas +from fmu.dataio._models._schema_base import SchemaBase GREEN = "\033[32m" RED = "\033[31m"