From ed1cfef15499e2688713c17012932d9e3c342fe1 Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Tue, 29 Oct 2024 15:09:06 -0700 Subject: [PATCH] /* PR_START p--short-term-perf 32 */ Add `DimensionLookup`. --- .../model/semantics/dimension_lookup.py | 67 +++++++++++++++++++ .../model/semantics/semantic_model_lookup.py | 6 ++ .../model/semantics/test_dimension_lookup.py | 40 +++++++++++ .../dict/test_get_invariant__obj_0.txt | 12 ++++ 4 files changed, 125 insertions(+) create mode 100644 metricflow-semantics/metricflow_semantics/model/semantics/dimension_lookup.py create mode 100644 metricflow-semantics/tests_metricflow_semantics/model/semantics/test_dimension_lookup.py create mode 100644 metricflow-semantics/tests_metricflow_semantics/snapshots/test_dimension_lookup.py/dict/test_get_invariant__obj_0.txt diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/dimension_lookup.py b/metricflow-semantics/metricflow_semantics/model/semantics/dimension_lookup.py new file mode 100644 index 000000000..d96a55522 --- /dev/null +++ b/metricflow-semantics/metricflow_semantics/model/semantics/dimension_lookup.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Dict, Sequence + +from dbt_semantic_interfaces.protocols import SemanticModel +from dbt_semantic_interfaces.references import DimensionReference +from dbt_semantic_interfaces.type_enums import DimensionType + +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat + + +@dataclass(frozen=True) +class DimensionInvariant: + """For a given manifest, all defined dimensions with the same name should have these same properties.""" + + dimension_type: DimensionType + is_partition: bool + + +class DimensionLookup: + """Looks up properties related to dimensions.""" + + def __init__(self, semantic_models: Sequence[SemanticModel]) -> None: # noqa: D107 + self._dimension_reference_to_invariant: Dict[DimensionReference, DimensionInvariant] = {} + for semantic_model in semantic_models: + for dimension in semantic_model.dimensions: + invariant = DimensionInvariant( + dimension_type=dimension.type, + is_partition=dimension.is_partition, + ) + dimension_reference = dimension.reference + existing_invariant = self._dimension_reference_to_invariant.get(dimension_reference) + if existing_invariant is not None and existing_invariant != invariant: + raise ValueError( + str( + LazyFormat( + "Dimensions with the same name have been defined with conflicting values that " + "should have been the same in a given semantic manifest. This should have been caught " + "during validation.", + dimension_reference=dimension_reference, + existing_invariant=existing_invariant, + conflicting_invariant=invariant, + semantic_model_reference=semantic_model.reference, + ) + ) + ) + + self._dimension_reference_to_invariant[dimension_reference] = invariant + + def get_invariant(self, dimension_reference: DimensionReference) -> DimensionInvariant: + """Get invariants for the given dimension in the semantic manifest.""" + # dimension_reference might be a TimeDimensionReference, so change types. + dimension_reference = DimensionReference(element_name=dimension_reference.element_name) + invariant = self._dimension_reference_to_invariant[dimension_reference] + if invariant is None: + raise ValueError( + str( + LazyFormat( + "Unknown dimension reference", + dimension_reference=dimension_reference, + known_dimension_references=list(self._dimension_reference_to_invariant.keys()), + ) + ) + ) + + return invariant diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py b/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py index b3b52cb80..7e4f24c2a 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py @@ -21,6 +21,7 @@ from metricflow_semantics.errors.error_classes import InvalidSemanticModelError from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat +from metricflow_semantics.model.semantics.dimension_lookup import DimensionLookup from metricflow_semantics.model.semantics.element_group import ElementGrouper from metricflow_semantics.model.semantics.measure_lookup import MeasureLookup from metricflow_semantics.model.semantics.semantic_model_helper import SemanticModelHelper @@ -73,6 +74,7 @@ def __init__(self, model: SemanticManifest, custom_granularities: Dict[str, Expa self._measure_reference_to_agg_time_dimension_specs: Dict[MeasureReference, Sequence[TimeDimensionSpec]] = {} self._measure_lookup = MeasureLookup(sorted_semantic_models, custom_granularities) + self._dimension_lookup = DimensionLookup(sorted_semantic_models) def get_dimension_references(self) -> Sequence[DimensionReference]: """Retrieve all dimension references from the collection of semantic models.""" @@ -340,3 +342,7 @@ def _get_defined_time_granularity(self, time_dimension_reference: TimeDimensionR @property def measure_lookup(self) -> MeasureLookup: # noqa: D102 return self._measure_lookup + + @property + def dimension_lookup(self) -> DimensionLookup: # noqa: D102 + return self._dimension_lookup diff --git a/metricflow-semantics/tests_metricflow_semantics/model/semantics/test_dimension_lookup.py b/metricflow-semantics/tests_metricflow_semantics/model/semantics/test_dimension_lookup.py new file mode 100644 index 000000000..ff1fbdff6 --- /dev/null +++ b/metricflow-semantics/tests_metricflow_semantics/model/semantics/test_dimension_lookup.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import pytest +from _pytest.fixtures import FixtureRequest +from dbt_semantic_interfaces.implementations.semantic_manifest import PydanticSemanticManifest +from metricflow_semantics.model.semantics.dimension_lookup import DimensionLookup +from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration +from metricflow_semantics.test_helpers.snapshot_helpers import assert_object_snapshot_equal + + +@pytest.fixture(scope="module") +def dimension_lookup( # noqa: D103 + partitioned_multi_hop_join_semantic_manifest: PydanticSemanticManifest, +) -> DimensionLookup: + return DimensionLookup(partitioned_multi_hop_join_semantic_manifest.semantic_models) + + +def test_get_invariant( + request: FixtureRequest, + mf_test_configuration: MetricFlowTestConfiguration, + partitioned_multi_hop_join_semantic_manifest: PydanticSemanticManifest, + dimension_lookup: DimensionLookup, +) -> None: + """Test invariants for all dimensions. + + Uses `partitioned_multi_hop_join_semantic_manifest` to show an example of different `is_partition` values. + """ + dimension_references = [] + for semantic_model in partitioned_multi_hop_join_semantic_manifest.semantic_models: + for dimension in semantic_model.dimensions: + dimension_references.append(dimension.reference) + + sorted_dimension_references = sorted(dimension_references) + result = { + dimension_reference.element_name: dimension_lookup.get_invariant(dimension_reference) + for dimension_reference in sorted_dimension_references + } + assert_object_snapshot_equal( + request=request, mf_test_configuration=mf_test_configuration, obj_id="obj_0", obj=result + ) diff --git a/metricflow-semantics/tests_metricflow_semantics/snapshots/test_dimension_lookup.py/dict/test_get_invariant__obj_0.txt b/metricflow-semantics/tests_metricflow_semantics/snapshots/test_dimension_lookup.py/dict/test_get_invariant__obj_0.txt new file mode 100644 index 000000000..a0e7daa28 --- /dev/null +++ b/metricflow-semantics/tests_metricflow_semantics/snapshots/test_dimension_lookup.py/dict/test_get_invariant__obj_0.txt @@ -0,0 +1,12 @@ +{ + 'account_month': DimensionInvariant(dimension_type=CATEGORICAL, is_partition=False), + 'acquired_ds': DimensionInvariant(dimension_type=TIME, is_partition=False), + 'country': DimensionInvariant(dimension_type=CATEGORICAL, is_partition=False), + 'customer_atomic_weight': DimensionInvariant(dimension_type=CATEGORICAL, is_partition=False), + 'customer_name': DimensionInvariant(dimension_type=CATEGORICAL, is_partition=False), + 'ds': DimensionInvariant(dimension_type=TIME, is_partition=False), + 'ds_partitioned': DimensionInvariant(dimension_type=TIME, is_partition=True), + 'extra_dim': DimensionInvariant(dimension_type=CATEGORICAL, is_partition=False), + 'third_hop_ds': DimensionInvariant(dimension_type=TIME, is_partition=False), + 'value': DimensionInvariant(dimension_type=CATEGORICAL, is_partition=False), +}