Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Generalize schema configuration and dumping #952

Merged
merged 1 commit into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion schemas/0.8.0/fmu_results.json
Original file line number Diff line number Diff line change
Expand Up @@ -10879,7 +10879,7 @@
"type": "object"
}
},
"$id": "https://main-fmu-schemas-prod.radix.equinor.com/schemas/0.8.0/fmu_results.json",
"$id": "https://main-fmu-schemas-dev.radix.equinor.com/schemas/0.8.0/fmu_results.json",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"discriminator": {
"propertyName": "class"
Expand Down
93 changes: 76 additions & 17 deletions src/fmu/dataio/_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,11 @@

from __future__ import annotations

import os
from abc import ABC, abstractmethod
from enum import Enum
from typing import Final


class FmuResultsSchema:
DEV_ROOT: Final[str] = "https://main-fmu-schemas-dev.radix.equinor.com/schemas"
PROD_ROOT: Final[str] = "https://main-fmu-schemas-prod.radix.equinor.com/schemas"
VERSION: Final[str] = "0.8.0"
FILENAME: Final[str] = "fmu_results.json"
DEV_URL: Final[str] = f"{DEV_ROOT}/{VERSION}/{FILENAME}"
PROD_URL: Final[str] = f"{PROD_ROOT}/{VERSION}/{FILENAME}"

@staticmethod
def url() -> str:
"""This method is meant to return the `PROD_URL` or `DEV_URL` under relevant
circumstances."""
return FmuResultsSchema.PROD_URL

from pathlib import Path
from typing import Any, Final

SOURCE: Final = "fmu"

Expand All @@ -32,6 +19,78 @@ class ConfigurationError(ValueError):
pass


class FmuSchemas:
"""These URLs can be constructed programmatically from radixconfig.yaml if need be:

{cfg.components[].name}-{cfg.metadata.name}-{spec.environments[].name}

As they are unlikely to change they are hardcoded here.
"""

DEV_URL: Final[str] = "https://main-fmu-schemas-dev.radix.equinor.com"
PROD_URL: Final[str] = "https://main-fmu-schemas-prod.radix.equinor.com"
PATH: Final[Path] = Path("schemas")


class SchemaBase(ABC):
VERSION: str
"""The current version of the schema."""

FILENAME: str
"""The filename, i.e. schema.json."""

PATH: Path
"""The on-disk _and_ URL path following the domain, i.e:

schemas/0.1.0/schema.json

This path should _always_ have `FmuSchemas.PATH` as its first parent.
This determines the on-disk and URL location of this schema file. A
trivial example is:

PATH: Path = FmuSchemas.PATH / VERSION / FILENAME

"""

@classmethod
def __init_subclass__(cls, **kwargs: dict[str, Any]) -> None:
super().__init_subclass__(**kwargs)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I always learn something new 🙂 never seen this __init_subclass__ before

Copy link
Collaborator Author

@mferrera mferrera Jan 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It didn't feel quite right to use use a Pydantic model as an abstract interface with methods (and Pydantic also didn't seem to like using an ABC as a mixin), so Pydantic-light it was

for attr in ("VERSION", "FILENAME", "PATH"):
if not hasattr(cls, attr):
raise TypeError(f"Subclass {cls.__name__} must define '{attr}'")

@classmethod
def url(cls) -> str:
"""Returns the URL this file will reside at, based upon class variables set here
and in FmuSchemas."""
DEV_URL = f"{FmuSchemas.DEV_URL}/{cls.PATH}"
PROD_URL = f"{FmuSchemas.PROD_URL}/{cls.PATH}"

if os.environ.get("SCHEMA_RELEASE", None):
return PROD_URL
return DEV_URL

@staticmethod
@abstractmethod
def dump() -> dict[str, Any]:
"""
Dumps the export root model to JSON format for schema validation and
usage in FMU data structures.

To update the schema:
1. Run the following CLI command to dump the updated schema:
`./tools/update_schema`.
2. Check the diff for changes. Adding fields usually indicates non-breaking
changes and is generally safe. However, if fields are removed, it could
indicate breaking changes that may affect dependent systems. Perform a
quality control (QC) check to ensure these changes do not break existing
implementations.
If changes are satisfactory and do not introduce issues, commit
them to maintain schema consistency.
"""
raise NotImplementedError


class ValidFormats(Enum):
surface = {
"irap_binary": ".gri",
Expand Down
8 changes: 1 addition & 7 deletions src/fmu/dataio/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@

from typing import TYPE_CHECKING, Final

from pydantic import AnyHttpUrl, TypeAdapter

from ._definitions import SOURCE, FmuResultsSchema
from ._logging import null_logger
from ._model import fields, schema
from ._model.global_configuration import GlobalConfiguration
Expand Down Expand Up @@ -105,10 +102,7 @@ def generate_export_metadata(

objdata = objectdata_provider_factory(obj, dataio, product)

return schema.InternalObjectMetadata(
schema_=TypeAdapter(AnyHttpUrl).validate_strings(FmuResultsSchema.url()), # type: ignore[call-arg]
version=FmuResultsSchema.VERSION,
source=SOURCE,
mferrera marked this conversation as resolved.
Show resolved Hide resolved
return schema.InternalObjectMetadata( # type: ignore[call-arg]
class_=objdata.classname,
fmu=_get_meta_fmu(fmudata) if fmudata else None,
masterdata=(
Expand Down
4 changes: 2 additions & 2 deletions src/fmu/dataio/_model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .root import Root, dump
from .root import FmuResultsSchema, Root

__all__ = [
"dump",
"FmuResultsSchema",
"Root",
]
195 changes: 95 additions & 100 deletions src/fmu/dataio/_model/root.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING, Dict, List, Literal, TypeVar, Union

from pydantic import (
Expand All @@ -12,7 +13,7 @@
from pydantic.json_schema import GenerateJsonSchema
from typing_extensions import Annotated

from fmu.dataio._definitions import FmuResultsSchema
from fmu.dataio._definitions import FmuSchemas, SchemaBase

from .data import AnyData
from .enums import FMUClass
Expand Down Expand Up @@ -198,103 +199,97 @@ def __get_pydantic_json_schema__(
return json_schema


class FmuResultsJsonSchema(GenerateJsonSchema):
contractual: Final[list[str]] = [
"access",
"class",
"data.alias",
"data.bbox",
"data.content",
"data.format",
"data.geometry",
"data.grid_model",
"data.is_observation",
"data.is_prediction",
"data.name",
"data.offset",
"data.product.name",
"data.seismic.attribute",
"data.spec.columns",
"data.stratigraphic",
"data.stratigraphic_alias",
"data.tagname",
"data.time",
"data.vertical_domain",
"file.checksum_md5",
"file.relative_path",
"file.size_bytes",
"fmu.aggregation.operation",
"fmu.aggregation.realization_ids",
"fmu.case",
"fmu.context.stage",
"fmu.iteration.name",
"fmu.iteration.uuid",
"fmu.model",
"fmu.realization.id",
"fmu.realization.is_reference",
"fmu.realization.name",
"fmu.realization.uuid",
"fmu.workflow",
"masterdata",
"source",
"tracklog.datetime",
"tracklog.event",
"tracklog.user.id",
"version",
]

def _remove_format_path(self, obj: T) -> T:
"""
Removes entries with key "format" and value "path" from dictionaries. This
adjustment is necessary because JSON Schema does not recognize the "format":
"path", while OpenAPI does. This function is used in contexts where OpenAPI
specifications are not applicable.
"""

if isinstance(obj, dict):
return {
k: self._remove_format_path(v)
for k, v in obj.items()
if not (k == "format" and v == "path")
}

if isinstance(obj, list):
return [self._remove_format_path(element) for element in obj]

return obj

def generate(
self,
schema: Mapping[str, Any],
mode: Literal["validation", "serialization"] = "validation",
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
json_schema["$schema"] = self.schema_dialect
json_schema["$id"] = FmuResultsSchema.url()
json_schema["$contractual"] = self.contractual

# sumo-core's validator does not recognize these.
del json_schema["discriminator"]["mapping"]
del json_schema["$defs"]["AnyData"]["discriminator"]["mapping"]
del json_schema["$defs"]["AnyProduct"]["discriminator"]["mapping"]

return self._remove_format_path(json_schema)

class FmuResultsSchema(SchemaBase):
"""The main metadata export describing the results."""

VERSION: str = "0.8.0"
FILENAME: str = "fmu_results.json"
PATH: Path = FmuSchemas.PATH / VERSION / FILENAME

class FmuResultsGenerateJsonSchema(GenerateJsonSchema):
contractual: Final[list[str]] = [
"access",
"class",
"data.alias",
"data.bbox",
"data.content",
"data.format",
"data.geometry",
"data.grid_model",
"data.is_observation",
"data.is_prediction",
"data.name",
"data.offset",
"data.product.name",
"data.seismic.attribute",
"data.spec.columns",
"data.stratigraphic",
"data.stratigraphic_alias",
"data.tagname",
"data.time",
"data.vertical_domain",
"file.checksum_md5",
"file.relative_path",
"file.size_bytes",
"fmu.aggregation.operation",
"fmu.aggregation.realization_ids",
"fmu.case",
"fmu.context.stage",
"fmu.iteration.name",
"fmu.iteration.uuid",
"fmu.model",
"fmu.realization.id",
"fmu.realization.is_reference",
"fmu.realization.name",
"fmu.realization.uuid",
"fmu.workflow",
"masterdata",
"source",
"tracklog.datetime",
"tracklog.event",
"tracklog.user.id",
"version",
]

def dump() -> dict:
"""
Dumps the export root model to JSON format for schema validation and
usage in FMU data structures.

To update the schema:
1. Run the following CLI command to dump the updated schema:
`./tools/update_schema`
2. Check the diff for changes. Adding fields usually indicates non-breaking
changes and is generally safe. However, if fields are removed, it could
indicate breaking changes that may affect dependent systems. Perform a
quality control (QC) check to ensure these changes do not break existing
implementations.
If changes are satisfactory and do not introduce issues, commit
them to maintain schema consistency.
"""
return Root.model_json_schema(schema_generator=FmuResultsJsonSchema)
def _remove_format_path(self, obj: T) -> T:
"""
Removes entries with key "format" and value "path" from dictionaries. This
adjustment is necessary because JSON Schema does not recognize the "format":
"path", while OpenAPI does. This function is used in contexts where OpenAPI
specifications are not applicable.
"""

if isinstance(obj, dict):
return {
k: self._remove_format_path(v)
for k, v in obj.items()
if not (k == "format" and v == "path")
}

if isinstance(obj, list):
return [self._remove_format_path(element) for element in obj]

return obj

def generate(
self,
schema: Mapping[str, Any],
mode: Literal["validation", "serialization"] = "validation",
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
json_schema["$schema"] = self.schema_dialect
json_schema["$id"] = FmuResultsSchema.url()
json_schema["$contractual"] = self.contractual

# sumo-core's validator does not recognize these.
del json_schema["discriminator"]["mapping"]
del json_schema["$defs"]["AnyData"]["discriminator"]["mapping"]
del json_schema["$defs"]["AnyProduct"]["discriminator"]["mapping"]

return self._remove_format_path(json_schema)

@staticmethod
def dump() -> dict[str, Any]:
return Root.model_json_schema(
schema_generator=FmuResultsSchema.FmuResultsGenerateJsonSchema
)
Loading
Loading