Skip to content

Commit

Permalink
/* PR_START p--short-term-perf 32 */ Add DimensionLookup.
Browse files Browse the repository at this point in the history
  • Loading branch information
plypaul committed Oct 30, 2024
1 parent 76b36c0 commit ed1cfef
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Dict, Sequence

from dbt_semantic_interfaces.protocols import SemanticModel
from dbt_semantic_interfaces.references import DimensionReference
from dbt_semantic_interfaces.type_enums import DimensionType

from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat


@dataclass(frozen=True)
class DimensionInvariant:
"""For a given manifest, all defined dimensions with the same name should have these same properties."""

dimension_type: DimensionType
is_partition: bool


class DimensionLookup:
"""Looks up properties related to dimensions."""

def __init__(self, semantic_models: Sequence[SemanticModel]) -> None: # noqa: D107
self._dimension_reference_to_invariant: Dict[DimensionReference, DimensionInvariant] = {}
for semantic_model in semantic_models:
for dimension in semantic_model.dimensions:
invariant = DimensionInvariant(
dimension_type=dimension.type,
is_partition=dimension.is_partition,
)
dimension_reference = dimension.reference
existing_invariant = self._dimension_reference_to_invariant.get(dimension_reference)
if existing_invariant is not None and existing_invariant != invariant:
raise ValueError(
str(
LazyFormat(
"Dimensions with the same name have been defined with conflicting values that "
"should have been the same in a given semantic manifest. This should have been caught "
"during validation.",
dimension_reference=dimension_reference,
existing_invariant=existing_invariant,
conflicting_invariant=invariant,
semantic_model_reference=semantic_model.reference,
)
)
)

self._dimension_reference_to_invariant[dimension_reference] = invariant

def get_invariant(self, dimension_reference: DimensionReference) -> DimensionInvariant:
"""Get invariants for the given dimension in the semantic manifest."""
# dimension_reference might be a TimeDimensionReference, so change types.
dimension_reference = DimensionReference(element_name=dimension_reference.element_name)
invariant = self._dimension_reference_to_invariant[dimension_reference]
if invariant is None:
raise ValueError(
str(
LazyFormat(
"Unknown dimension reference",
dimension_reference=dimension_reference,
known_dimension_references=list(self._dimension_reference_to_invariant.keys()),
)
)
)

return invariant
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from metricflow_semantics.errors.error_classes import InvalidSemanticModelError
from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
from metricflow_semantics.mf_logging.pretty_print import mf_pformat
from metricflow_semantics.model.semantics.dimension_lookup import DimensionLookup
from metricflow_semantics.model.semantics.element_group import ElementGrouper
from metricflow_semantics.model.semantics.measure_lookup import MeasureLookup
from metricflow_semantics.model.semantics.semantic_model_helper import SemanticModelHelper
Expand Down Expand Up @@ -73,6 +74,7 @@ def __init__(self, model: SemanticManifest, custom_granularities: Dict[str, Expa
self._measure_reference_to_agg_time_dimension_specs: Dict[MeasureReference, Sequence[TimeDimensionSpec]] = {}

self._measure_lookup = MeasureLookup(sorted_semantic_models, custom_granularities)
self._dimension_lookup = DimensionLookup(sorted_semantic_models)

def get_dimension_references(self) -> Sequence[DimensionReference]:
"""Retrieve all dimension references from the collection of semantic models."""
Expand Down Expand Up @@ -340,3 +342,7 @@ def _get_defined_time_granularity(self, time_dimension_reference: TimeDimensionR
@property
def measure_lookup(self) -> MeasureLookup: # noqa: D102
return self._measure_lookup

@property
def dimension_lookup(self) -> DimensionLookup: # noqa: D102
return self._dimension_lookup
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from __future__ import annotations

import pytest
from _pytest.fixtures import FixtureRequest
from dbt_semantic_interfaces.implementations.semantic_manifest import PydanticSemanticManifest
from metricflow_semantics.model.semantics.dimension_lookup import DimensionLookup
from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration
from metricflow_semantics.test_helpers.snapshot_helpers import assert_object_snapshot_equal


@pytest.fixture(scope="module")
def dimension_lookup( # noqa: D103
partitioned_multi_hop_join_semantic_manifest: PydanticSemanticManifest,
) -> DimensionLookup:
return DimensionLookup(partitioned_multi_hop_join_semantic_manifest.semantic_models)


def test_get_invariant(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
partitioned_multi_hop_join_semantic_manifest: PydanticSemanticManifest,
dimension_lookup: DimensionLookup,
) -> None:
"""Test invariants for all dimensions.
Uses `partitioned_multi_hop_join_semantic_manifest` to show an example of different `is_partition` values.
"""
dimension_references = []
for semantic_model in partitioned_multi_hop_join_semantic_manifest.semantic_models:
for dimension in semantic_model.dimensions:
dimension_references.append(dimension.reference)

sorted_dimension_references = sorted(dimension_references)
result = {
dimension_reference.element_name: dimension_lookup.get_invariant(dimension_reference)
for dimension_reference in sorted_dimension_references
}
assert_object_snapshot_equal(
request=request, mf_test_configuration=mf_test_configuration, obj_id="obj_0", obj=result
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
'account_month': DimensionInvariant(dimension_type=CATEGORICAL, is_partition=False),
'acquired_ds': DimensionInvariant(dimension_type=TIME, is_partition=False),
'country': DimensionInvariant(dimension_type=CATEGORICAL, is_partition=False),
'customer_atomic_weight': DimensionInvariant(dimension_type=CATEGORICAL, is_partition=False),
'customer_name': DimensionInvariant(dimension_type=CATEGORICAL, is_partition=False),
'ds': DimensionInvariant(dimension_type=TIME, is_partition=False),
'ds_partitioned': DimensionInvariant(dimension_type=TIME, is_partition=True),
'extra_dim': DimensionInvariant(dimension_type=CATEGORICAL, is_partition=False),
'third_hop_ds': DimensionInvariant(dimension_type=TIME, is_partition=False),
'value': DimensionInvariant(dimension_type=CATEGORICAL, is_partition=False),
}

0 comments on commit ed1cfef

Please sign in to comment.