feat(schemas): generate .schema.json files for experimenter schemas

Because - we need the .schema.json files in Desktop; - they are not currently packaged; and - the FeatureManifest schema did not match the existing .schema.json file in Desktop This commit: - updates the FeatureManifest schema to match Firefox Desktop; - generates .schema.json files for experimenter schemas; - adds the .schema.json files to the npm package; and - updates the schemas version to 2024.10.1.
mozilla · Oct 16, 2024 · 4f57435 · 4f57435
1 parent 0357f43
commit 4f57435
Show file tree

Hide file tree

Showing 18 changed files with 2,481 additions and 95 deletions.
diff --git a/schemas/.gitignore b/schemas/.gitignore
@@ -0,0 +1,2 @@
+
+mozilla_nimbus_schemas/schemas/
diff --git a/schemas/VERSION b/schemas/VERSION
@@ -1 +1 @@
-2024.9.3
+2024.10.1
diff --git a/schemas/generate_json_schema.py b/schemas/generate_json_schema.py
@@ -4,17 +4,21 @@
 """
 
 import json
+import re
+import shutil
 import subprocess
 from pathlib import Path
 from tempfile import TemporaryDirectory
-from typing import Any
+from typing import Any, Iterable
 
 import click
 from polyfactory.factories.pydantic_factory import ModelFactory
 from pydantic import BaseModel, create_model
 
 from mozilla_nimbus_schemas import experiments, jetstream
 
+NEWLINES_RE = re.compile("\n+")
+
 
 def clean_output_file(ts_path: Path) -> None:
     """Clean up the output file typescript definitions were written to by:
@@ -98,6 +102,121 @@ def iterate_models() -> dict[str, Any]:
     return schema
 
 
+def prettify_json_schema(schema: dict[str, Any]) -> dict[str, Any]:
+    # Add a $schema field.
+    pretty_schema = {
+        "$schema": "https://json-schema.org/draft/2019-09/schema",
+    }
+
+    # Re-order the properties in the dict so that they are in a sensible order
+    # for humans consuming these schemas.
+
+    # Use this order for top-level keys.
+    key_order = [
+        "title",
+        "description",
+        "type",
+        "properties",
+        "required",
+        "additionalProperties",
+        "$defs",
+    ]
+
+    # If there are any other keys not listed above, splice them in before $defs.
+    key_order = [
+        *key_order[:-1],
+        *(set(schema.keys()) - set(key_order)),
+        key_order[-1],
+    ]
+
+    pretty_schema.update({key: schema[key] for key in key_order if key in schema})
+
+    # Assert that the schemas have not structurally changed.
+    #
+    # We have to add the $schema field back to the original schema for comparison.
+    schema["$schema"] = pretty_schema["$schema"]
+    assert schema == pretty_schema
+
+    # Next, lets walk the schema and remove attributes we don't care about.
+    def _walk_objects(objs: Iterable[dict[str, Any]]):
+        for obj in objs:
+            _walk_object(obj)
+
+    def _walk_object(obj: dict[str, Any], top_level: bool = False):
+        # All but the top-level title will be auto-generated base on field names. They are
+        # not useful.
+        if not top_level:
+            obj.pop("title", None)
+
+        # We don't support defaults.
+        obj.pop("default", None)
+
+        # This is an OpenAPI extension and it leads to incorrect code generation in our
+        # case (due to using a boolean discriminator).
+        obj.pop("discriminator", None)
+
+        # Strip newlines from descriptions.
+        if description := obj.get("description"):
+            obj["description"] = NEWLINES_RE.sub(" ", description)
+
+        # Remove redundant enum entries for constants.
+        if obj.get("const") is not None:
+            obj.pop("enum", None)
+
+        match obj.get("type"):
+            case "object":
+                if properties := obj.get("properties"):
+                    _walk_objects(properties.values())
+
+            case "array":
+                if items := obj.get("items"):
+                    _walk_object(items)
+
+        for group_key in ("allOf", "anyOf", "oneOf"):
+            if group := obj.get(group_key):
+                _walk_objects(group)
+
+    _walk_object(pretty_schema, top_level=True)
+    if defs := pretty_schema.get("$defs"):
+        _walk_objects(defs.values())
+
+    return pretty_schema
+
+
+def write_json_schemas(json_schemas_path: Path, python_package_dir: Path):
+    json_schemas_path.mkdir(exist_ok=True)
+
+    models = {
+        model_name: getattr(experiments, model_name)
+        for model_name in experiments.__all__
+        if issubclass(getattr(experiments, model_name), BaseModel)
+    }
+
+    written_paths = set()
+
+    for model_name, model in models.items():
+        model_schema_path = json_schemas_path / f"{model_name}.schema.json"
+        written_paths.add(model_schema_path)
+
+        json_schema = prettify_json_schema(model.model_json_schema())
+        with model_schema_path.open("w") as f:
+            json.dump(json_schema, f, indent=2)
+            f.write("\n")
+
+    # Ensure we don't include any files in schemas/ that we did not generate (e.g., if a
+    # model gets removed).
+    for path in list(json_schemas_path.iterdir()):
+        if path not in written_paths:
+            path.unlink()
+
+    # Copy schemas into the python package.
+    schemas_dist_dir = python_package_dir / "schemas"
+    if schemas_dist_dir.exists():
+        shutil.rmtree(schemas_dist_dir)
+
+    shutil.copytree(json_schemas_path, schemas_dist_dir)
+
+
 @click.command()
 @click.option(
     "--output",
@@ -106,7 +225,25 @@ def iterate_models() -> dict[str, Any]:
     default=Path("index.d.ts"),
     help="Output typescript file.",
 )
-def main(*, ts_output_path: Path):
+@click.option(
+    "--json-schemas",
+    "json_schemas_path",
+    type=Path,
+    default=Path("schemas"),
+    help="Output JSON Schemas to this directory.",
+)
+@click.option(
+    "--python-package-dir",
+    "python_package_dir",
+    type=Path,
+    default=Path("mozilla_nimbus_schemas"),
+    help=(
+        "The directory to the mozilla-nimbus-schemas python package.\n"
+        "\n"
+        "Schemas will be installed inside this package at the schemas dir."
+    ),
+)
+def main(*, ts_output_path: Path, json_schemas_path: Path, python_package_dir: Path):
     json_schema = iterate_models()
 
     with TemporaryDirectory() as tmp_dir:
@@ -132,6 +269,8 @@ def main(*, ts_output_path: Path):
 
         clean_output_file(ts_output_path)
 
+    write_json_schemas(json_schemas_path, python_package_dir)
+
 
 if __name__ == "__main__":
     main()