From f6720fdf03fe19f497582f8f7d71b8f987e93b81 Mon Sep 17 00:00:00 2001 From: mferrera Date: Fri, 3 Jan 2025 09:32:27 +0100 Subject: [PATCH] ENH: Generalize schema configuration and dumping This creates a pattern for defining schema versions, filenames, and paths. These can then be used throughout the code without being hardcoded strings. The dev and prod URLs are also now present in the code. In the main branch the `$id` URLs now point to the dev enviroment, as this environment tracks main anyway. The idea is that these URLs switching to the production ones will be a step during the promotion to the staging environment. Currently this is done "secretly" with a shell script but as the number of schemas grows, I think it makes more sense to have this be explicit. --- schemas/0.8.0/fmu_results.json | 2 +- src/fmu/dataio/_definitions.py | 93 +++++++++-- src/fmu/dataio/_metadata.py | 4 +- src/fmu/dataio/_model/__init__.py | 4 +- src/fmu/dataio/_model/root.py | 195 +++++++++++----------- src/fmu/dataio/_model/schema.py | 5 +- tests/test_schema/test_schema_uptodate.py | 47 ++++-- tests/test_units/test_metadata_class.py | 6 +- tools/update_schema | 5 +- 9 files changed, 214 insertions(+), 147 deletions(-) diff --git a/schemas/0.8.0/fmu_results.json b/schemas/0.8.0/fmu_results.json index cfdfa223f..57174f251 100644 --- a/schemas/0.8.0/fmu_results.json +++ b/schemas/0.8.0/fmu_results.json @@ -10879,7 +10879,7 @@ "type": "object" } }, - "$id": "https://main-fmu-schemas-prod.radix.equinor.com/schemas/0.8.0/fmu_results.json", + "$id": "https://main-fmu-schemas-dev.radix.equinor.com/schemas/0.8.0/fmu_results.json", "$schema": "https://json-schema.org/draft/2020-12/schema", "discriminator": { "propertyName": "class" diff --git a/src/fmu/dataio/_definitions.py b/src/fmu/dataio/_definitions.py index 47cde02f8..2b8ad3b0e 100644 --- a/src/fmu/dataio/_definitions.py +++ b/src/fmu/dataio/_definitions.py @@ -2,24 +2,11 @@ from __future__ import annotations +import os +from abc import ABC, abstractmethod from enum import Enum -from typing import Final - - -class FmuResultsSchema: - DEV_ROOT: Final[str] = "https://main-fmu-schemas-dev.radix.equinor.com/schemas" - PROD_ROOT: Final[str] = "https://main-fmu-schemas-prod.radix.equinor.com/schemas" - VERSION: Final[str] = "0.8.0" - FILENAME: Final[str] = "fmu_results.json" - DEV_URL: Final[str] = f"{DEV_ROOT}/{VERSION}/{FILENAME}" - PROD_URL: Final[str] = f"{PROD_ROOT}/{VERSION}/{FILENAME}" - - @staticmethod - def url() -> str: - """This method is meant to return the `PROD_URL` or `DEV_URL` under relevant - circumstances.""" - return FmuResultsSchema.PROD_URL - +from pathlib import Path +from typing import Any, Final SOURCE: Final = "fmu" @@ -32,6 +19,78 @@ class ConfigurationError(ValueError): pass +class FmuSchemas: + """These URLs can be constructed programmatically from radixconfig.yaml if need be: + + {cfg.components[].name}-{cfg.metadata.name}-{spec.environments[].name} + + As they are unlikely to change they are hardcoded here. + """ + + DEV_URL: Final[str] = "https://main-fmu-schemas-dev.radix.equinor.com" + PROD_URL: Final[str] = "https://main-fmu-schemas-prod.radix.equinor.com" + PATH: Final[Path] = Path("schemas") + + +class SchemaBase(ABC): + VERSION: str + """The current version of the schema.""" + + FILENAME: str + """The filename, i.e. schema.json.""" + + PATH: Path + """The on-disk _and_ URL path following the domain, i.e: + + schemas/0.1.0/schema.json + + This path should _always_ have `FmuSchemas.PATH` as its first parent. + This determines the on-disk and URL location of this schema file. A + trivial example is: + + PATH: Path = FmuSchemas.PATH / VERSION / FILENAME + + """ + + @classmethod + def __init_subclass__(cls, **kwargs: dict[str, Any]) -> None: + super().__init_subclass__(**kwargs) + for attr in ("VERSION", "FILENAME", "PATH"): + if not hasattr(cls, attr): + raise TypeError(f"Subclass {cls.__name__} must define '{attr}'") + + @classmethod + def url(cls) -> str: + """Returns the URL this file will reside at, based upon class variables set here + and in FmuSchemas.""" + DEV_URL = f"{FmuSchemas.DEV_URL}/{cls.PATH}" + PROD_URL = f"{FmuSchemas.PROD_URL}/{cls.PATH}" + + if os.environ.get("SCHEMA_RELEASE", None): + return PROD_URL + return DEV_URL + + @staticmethod + @abstractmethod + def dump() -> dict[str, Any]: + """ + Dumps the export root model to JSON format for schema validation and + usage in FMU data structures. + + To update the schema: + 1. Run the following CLI command to dump the updated schema: + `./tools/update_schema`. + 2. Check the diff for changes. Adding fields usually indicates non-breaking + changes and is generally safe. However, if fields are removed, it could + indicate breaking changes that may affect dependent systems. Perform a + quality control (QC) check to ensure these changes do not break existing + implementations. + If changes are satisfactory and do not introduce issues, commit + them to maintain schema consistency. + """ + raise NotImplementedError + + class ValidFormats(Enum): surface = { "irap_binary": ".gri", diff --git a/src/fmu/dataio/_metadata.py b/src/fmu/dataio/_metadata.py index 8542e137c..42a679ebd 100644 --- a/src/fmu/dataio/_metadata.py +++ b/src/fmu/dataio/_metadata.py @@ -9,9 +9,9 @@ from pydantic import AnyHttpUrl, TypeAdapter -from ._definitions import SOURCE, FmuResultsSchema +from ._definitions import SOURCE from ._logging import null_logger -from ._model import fields, schema +from ._model import FmuResultsSchema, fields, schema from ._model.global_configuration import GlobalConfiguration from .exceptions import InvalidMetadataError from .providers._filedata import FileDataProvider diff --git a/src/fmu/dataio/_model/__init__.py b/src/fmu/dataio/_model/__init__.py index 230129d15..8f5832dd9 100644 --- a/src/fmu/dataio/_model/__init__.py +++ b/src/fmu/dataio/_model/__init__.py @@ -1,6 +1,6 @@ -from .root import Root, dump +from .root import FmuResultsSchema, Root __all__ = [ - "dump", + "FmuResultsSchema", "Root", ] diff --git a/src/fmu/dataio/_model/root.py b/src/fmu/dataio/_model/root.py index bcac17d02..c446f6f31 100644 --- a/src/fmu/dataio/_model/root.py +++ b/src/fmu/dataio/_model/root.py @@ -1,5 +1,6 @@ from __future__ import annotations +from pathlib import Path from typing import TYPE_CHECKING, Dict, List, Literal, TypeVar, Union from pydantic import ( @@ -12,7 +13,7 @@ from pydantic.json_schema import GenerateJsonSchema from typing_extensions import Annotated -from fmu.dataio._definitions import FmuResultsSchema +from fmu.dataio._definitions import FmuSchemas, SchemaBase from .data import AnyData from .enums import FMUClass @@ -198,103 +199,97 @@ def __get_pydantic_json_schema__( return json_schema -class FmuResultsJsonSchema(GenerateJsonSchema): - contractual: Final[list[str]] = [ - "access", - "class", - "data.alias", - "data.bbox", - "data.content", - "data.format", - "data.geometry", - "data.grid_model", - "data.is_observation", - "data.is_prediction", - "data.name", - "data.offset", - "data.product.name", - "data.seismic.attribute", - "data.spec.columns", - "data.stratigraphic", - "data.stratigraphic_alias", - "data.tagname", - "data.time", - "data.vertical_domain", - "file.checksum_md5", - "file.relative_path", - "file.size_bytes", - "fmu.aggregation.operation", - "fmu.aggregation.realization_ids", - "fmu.case", - "fmu.context.stage", - "fmu.iteration.name", - "fmu.iteration.uuid", - "fmu.model", - "fmu.realization.id", - "fmu.realization.is_reference", - "fmu.realization.name", - "fmu.realization.uuid", - "fmu.workflow", - "masterdata", - "source", - "tracklog.datetime", - "tracklog.event", - "tracklog.user.id", - "version", - ] - - def _remove_format_path(self, obj: T) -> T: - """ - Removes entries with key "format" and value "path" from dictionaries. This - adjustment is necessary because JSON Schema does not recognize the "format": - "path", while OpenAPI does. This function is used in contexts where OpenAPI - specifications are not applicable. - """ - - if isinstance(obj, dict): - return { - k: self._remove_format_path(v) - for k, v in obj.items() - if not (k == "format" and v == "path") - } - - if isinstance(obj, list): - return [self._remove_format_path(element) for element in obj] - - return obj - - def generate( - self, - schema: Mapping[str, Any], - mode: Literal["validation", "serialization"] = "validation", - ) -> dict[str, Any]: - json_schema = super().generate(schema, mode=mode) - json_schema["$schema"] = self.schema_dialect - json_schema["$id"] = FmuResultsSchema.url() - json_schema["$contractual"] = self.contractual - - # sumo-core's validator does not recognize these. - del json_schema["discriminator"]["mapping"] - del json_schema["$defs"]["AnyData"]["discriminator"]["mapping"] - del json_schema["$defs"]["AnyProduct"]["discriminator"]["mapping"] - - return self._remove_format_path(json_schema) - +class FmuResultsSchema(SchemaBase): + """The main metadata export describing the results.""" + + VERSION: str = "0.8.0" + FILENAME: str = "fmu_results.json" + PATH: Path = FmuSchemas.PATH / VERSION / FILENAME + + class FmuResultsGenerateJsonSchema(GenerateJsonSchema): + contractual: Final[list[str]] = [ + "access", + "class", + "data.alias", + "data.bbox", + "data.content", + "data.format", + "data.geometry", + "data.grid_model", + "data.is_observation", + "data.is_prediction", + "data.name", + "data.offset", + "data.product.name", + "data.seismic.attribute", + "data.spec.columns", + "data.stratigraphic", + "data.stratigraphic_alias", + "data.tagname", + "data.time", + "data.vertical_domain", + "file.checksum_md5", + "file.relative_path", + "file.size_bytes", + "fmu.aggregation.operation", + "fmu.aggregation.realization_ids", + "fmu.case", + "fmu.context.stage", + "fmu.iteration.name", + "fmu.iteration.uuid", + "fmu.model", + "fmu.realization.id", + "fmu.realization.is_reference", + "fmu.realization.name", + "fmu.realization.uuid", + "fmu.workflow", + "masterdata", + "source", + "tracklog.datetime", + "tracklog.event", + "tracklog.user.id", + "version", + ] -def dump() -> dict: - """ - Dumps the export root model to JSON format for schema validation and - usage in FMU data structures. - - To update the schema: - 1. Run the following CLI command to dump the updated schema: - `./tools/update_schema` - 2. Check the diff for changes. Adding fields usually indicates non-breaking - changes and is generally safe. However, if fields are removed, it could - indicate breaking changes that may affect dependent systems. Perform a - quality control (QC) check to ensure these changes do not break existing - implementations. - If changes are satisfactory and do not introduce issues, commit - them to maintain schema consistency. - """ - return Root.model_json_schema(schema_generator=FmuResultsJsonSchema) + def _remove_format_path(self, obj: T) -> T: + """ + Removes entries with key "format" and value "path" from dictionaries. This + adjustment is necessary because JSON Schema does not recognize the "format": + "path", while OpenAPI does. This function is used in contexts where OpenAPI + specifications are not applicable. + """ + + if isinstance(obj, dict): + return { + k: self._remove_format_path(v) + for k, v in obj.items() + if not (k == "format" and v == "path") + } + + if isinstance(obj, list): + return [self._remove_format_path(element) for element in obj] + + return obj + + def generate( + self, + schema: Mapping[str, Any], + mode: Literal["validation", "serialization"] = "validation", + ) -> dict[str, Any]: + json_schema = super().generate(schema, mode=mode) + json_schema["$schema"] = self.schema_dialect + json_schema["$id"] = FmuResultsSchema.url() + json_schema["$contractual"] = self.contractual + + # sumo-core's validator does not recognize these. + del json_schema["discriminator"]["mapping"] + del json_schema["$defs"]["AnyData"]["discriminator"]["mapping"] + del json_schema["$defs"]["AnyProduct"]["discriminator"]["mapping"] + + return self._remove_format_path(json_schema) + + @staticmethod + def dump() -> dict[str, Any]: + return Root.model_json_schema( + schema_generator=FmuResultsSchema.FmuResultsGenerateJsonSchema + ) diff --git a/src/fmu/dataio/_model/schema.py b/src/fmu/dataio/_model/schema.py index e1e6414ce..87a9af6c5 100644 --- a/src/fmu/dataio/_model/schema.py +++ b/src/fmu/dataio/_model/schema.py @@ -20,9 +20,10 @@ model_validator, ) -from fmu.dataio._definitions import SOURCE, FmuResultsSchema +from fmu.dataio._definitions import SOURCE from . import data, enums, fields +from .root import FmuResultsSchema def property_warn() -> None: @@ -102,7 +103,7 @@ def _validate_input(cls, values: dict) -> dict: class JsonSchemaMetadata(BaseModel, populate_by_name=True): schema_: AnyHttpUrl = Field( alias="$schema", - default=TypeAdapter(AnyHttpUrl).validate_python(FmuResultsSchema.PROD_URL), + default=TypeAdapter(AnyHttpUrl).validate_python(FmuResultsSchema.url()), ) version: str = Field(default=FmuResultsSchema.VERSION) source: str = Field(default=SOURCE) diff --git a/tests/test_schema/test_schema_uptodate.py b/tests/test_schema/test_schema_uptodate.py index 07cde4a07..9cf986438 100644 --- a/tests/test_schema/test_schema_uptodate.py +++ b/tests/test_schema/test_schema_uptodate.py @@ -1,23 +1,15 @@ +from __future__ import annotations + import json +from typing import Any -from fmu.dataio._model import dump +from pytest import MonkeyPatch +from fmu.dataio._definitions import FmuSchemas +from fmu.dataio._model import FmuResultsSchema -def test_schema_uptodate(): - # ruff: noqa: E501 - """ - Test to verify if the local schema is up to date with the schema - generated by pydantic's `dump` method. It compares the content of - the local `fmu_results.json` with the output of `dump()`. - To update the local schema, run: - `./tools/update_schema` - """ - with open("schemas/0.8.0/fmu_results.json") as f: - assert json.load(f) == dump() - - -def contains_discriminator_mapping(schema): +def contains_discriminator_mapping(schema: Any) -> bool: """Recursively checks ["discriminator"]["mapping"] in the schema.""" if isinstance(schema, dict): if ( @@ -34,9 +26,30 @@ def contains_discriminator_mapping(schema): return False -def test_no_discriminator_mappings_leftover_in_schema(): +def test_schema_uptodate() -> None: + """ + Test to verify if the local schema is up to date with the schema + generated by pydantic's `dump` method. It compares the content of + the local `fmu_results.json` with the output of `dump()`. + + To update the local schema, run: + `./tools/update_schema` + """ + with open(FmuResultsSchema.PATH) as f: + assert json.load(f) == FmuResultsSchema.dump() + + +def test_schema_url_changes_with_env_var(monkeypatch: MonkeyPatch) -> None: + assert FmuResultsSchema.url().startswith(FmuSchemas.DEV_URL) + assert FmuResultsSchema.dump()["$id"].startswith(FmuSchemas.DEV_URL) + monkeypatch.setenv("SCHEMA_RELEASE", "1") + assert FmuResultsSchema.url().startswith(FmuSchemas.PROD_URL) + assert FmuResultsSchema.dump()["$id"].startswith(FmuSchemas.PROD_URL) + + +def test_no_discriminator_mappings_leftover_in_schema() -> None: """Sumo's AJV validator doesn't like discriminator mappings leftover in the schema.""" - with open("schemas/0.8.0/fmu_results.json") as f: + with open(FmuResultsSchema.PATH) as f: schema = json.load(f) assert contains_discriminator_mapping(schema) is False diff --git a/tests/test_units/test_metadata_class.py b/tests/test_units/test_metadata_class.py index a7e6aee7c..9a5c9cc2c 100644 --- a/tests/test_units/test_metadata_class.py +++ b/tests/test_units/test_metadata_class.py @@ -6,9 +6,9 @@ import pytest import fmu.dataio as dio -from fmu.dataio._definitions import SOURCE, FmuResultsSchema +from fmu.dataio._definitions import SOURCE from fmu.dataio._metadata import generate_export_metadata -from fmu.dataio._model import enums +from fmu.dataio._model import FmuResultsSchema, enums from fmu.dataio._model.fields import ( OperatingSystem, TracklogEvent, @@ -31,7 +31,7 @@ def test_metadata_dollars(edataobj1, regsurf): mymeta = edataobj1.generate_metadata(obj=regsurf) assert mymeta["version"] == FmuResultsSchema.VERSION - assert mymeta["$schema"] == FmuResultsSchema.PROD_URL + assert mymeta["$schema"] == FmuResultsSchema.url() assert mymeta["source"] == SOURCE diff --git a/tools/update_schema b/tools/update_schema index 4aea1cb06..9e492efd8 100755 --- a/tools/update_schema +++ b/tools/update_schema @@ -9,8 +9,7 @@ import sys from pathlib import Path from typing import Any -from fmu.dataio._definitions import FmuResultsSchema -from fmu.dataio._model import dump +from fmu.dataio._model import FmuResultsSchema GREEN = "\033[32m" RED = "\033[31m" @@ -119,7 +118,7 @@ def main() -> None: parser = _get_parser() args = parser.parse_args() - new_schema = dump() + new_schema = FmuResultsSchema.dump() output_path = _get_output_path(args.version) output_filepath = output_path / args.filename