-
Notifications
You must be signed in to change notification settings - Fork 97
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
/* PR_START p--short-term-perf 35 */ Add syn. manifest generator.
- Loading branch information
Showing
9 changed files
with
534 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
36 changes: 36 additions & 0 deletions
36
tests_metricflow/performance/categorical_dimension_generator.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
from __future__ import annotations | ||
|
||
from functools import cached_property | ||
|
||
from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet | ||
|
||
|
||
class CategoricalDimensionGenerator: | ||
"""Helps generate the categorical dimensions in the semantic manifest. | ||
The index for the dimension refers to the index when all unique dimensions in the semantic manifest are enumerated. | ||
""" | ||
|
||
def __init__(self, parameter_set: SyntheticManifestParameterSet) -> None: # noqa: D107 | ||
self._parameter_set = parameter_set | ||
|
||
def get_dimension_name(self, dimension_index: int) -> str: # noqa: D102 | ||
"""Return the name of the dimension for the given index.""" | ||
return f"dimension_{dimension_index:03}" | ||
|
||
@cached_property | ||
def unique_dimension_count(self) -> int: # noqa: D102 | ||
return ( | ||
self._parameter_set.categorical_dimensions_per_semantic_model | ||
* self._parameter_set.dimension_semantic_model_count | ||
) | ||
|
||
def get_next_wrapped_index(self, dimension_index: int) -> int: | ||
"""Return the next valid dimension index, wrapping back to 0 if it reaches the last index.""" | ||
if dimension_index < 0: | ||
raise ValueError(f"{dimension_index=} should be > 0") | ||
|
||
if dimension_index >= self.unique_dimension_count: | ||
raise ValueError(f"{dimension_index=} should be < {self.unique_dimension_count}") | ||
|
||
return (dimension_index + 1) % self.unique_dimension_count |
90 changes: 90 additions & 0 deletions
90
tests_metricflow/performance/dimension_semantic_model_generator.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
from __future__ import annotations | ||
|
||
from typing import Sequence | ||
|
||
from dbt_semantic_interfaces.implementations.elements.dimension import PydanticDimension | ||
from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity | ||
from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation | ||
from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel | ||
from dbt_semantic_interfaces.type_enums import DimensionType, EntityType | ||
|
||
from tests_metricflow.performance.categorical_dimension_generator import CategoricalDimensionGenerator | ||
from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet | ||
|
||
|
||
class DimensionSemanticModelGenerator: | ||
"""Helps generate a semantic model containing dimensions. | ||
Each of the generated semantic models contain an entity common to the semantic models containing measures so that | ||
any measure can be queried by any dimension. | ||
""" | ||
|
||
def __init__( # noqa: D107 | ||
self, | ||
parameter_set: SyntheticManifestParameterSet, | ||
categorical_dimension_generator: CategoricalDimensionGenerator, | ||
) -> None: | ||
self._parameter_set = parameter_set | ||
self._dimension_generator = categorical_dimension_generator | ||
|
||
def generate_semantic_models(self) -> Sequence[PydanticSemanticModel]: # noqa: D102 | ||
semantic_models = [] | ||
for semantic_model_index in range(self._parameter_set.dimension_semantic_model_count): | ||
entities = [ | ||
PydanticEntity( | ||
name=self._get_dimension_semantic_model_primary_entity_name(semantic_model_index), | ||
type=EntityType.PRIMARY, | ||
), | ||
PydanticEntity( | ||
name=self._parameter_set.common_entity_name, | ||
type=EntityType.UNIQUE, | ||
), | ||
] | ||
|
||
dimensions = [ | ||
PydanticDimension( | ||
name=self._get_dimension_name( | ||
index_in_manifest=semantic_model_index, | ||
index_in_model=dimension_index, | ||
), | ||
type=DimensionType.CATEGORICAL, | ||
) | ||
for dimension_index in range(self._parameter_set.categorical_dimensions_per_semantic_model) | ||
] | ||
|
||
semantic_model_name = self._get_dimension_semantic_model_name(semantic_model_index) | ||
semantic_models.append( | ||
PydanticSemanticModel( | ||
name=semantic_model_name, | ||
node_relation=PydanticNodeRelation( | ||
schema_name="demo", | ||
alias=semantic_model_name, | ||
), | ||
entities=entities, | ||
dimensions=dimensions, | ||
) | ||
) | ||
|
||
return semantic_models | ||
|
||
def _get_dimension_semantic_model_name(self, index_in_manifest: int) -> str: | ||
return f"dimension_model_{index_in_manifest:03}" | ||
|
||
def _get_dimension_semantic_model_primary_entity_name(self, semantic_model_index: int) -> str: | ||
return f"{self._get_dimension_semantic_model_name(semantic_model_index)}_primary_entity" | ||
|
||
def _get_dimension_name(self, index_in_manifest: int, index_in_model: int) -> str: | ||
"""Get the name of the dimension given the index. | ||
Args: | ||
index_in_manifest: The index of the semantic model in the manifest. e.g. the 2nd semantic model in the | ||
semantic manifest. | ||
index_in_model: The index of the dimension in the semantic model. e.g. the 2nd dimension in the semantic | ||
model. | ||
Returns: | ||
The name of the dimension given the index. | ||
""" | ||
return self._dimension_generator.get_dimension_name( | ||
index_in_manifest * self._parameter_set.categorical_dimensions_per_semantic_model + index_in_model | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from __future__ import annotations | ||
|
||
from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet | ||
|
||
|
||
class MeasureGenerator: | ||
"""Helps generate the measures in the semantic manifest. | ||
The index for the measure refers to the index when measures in the semantic manifest are enumerated. | ||
""" | ||
|
||
def __init__(self, parameter_set: SyntheticManifestParameterSet) -> None: # noqa: D107 | ||
self._parameter_set = parameter_set | ||
|
||
def get_measure_name(self, measure_index: int) -> str: # noqa: D102 | ||
return f"measure_{measure_index:03}" | ||
|
||
@property | ||
def unique_measure_count(self) -> int: # noqa: D102 | ||
return self._parameter_set.measures_per_semantic_model * self._parameter_set.measure_semantic_model_count | ||
|
||
def get_next_wrapped_index(self, measure_index: int) -> int: | ||
"""Return the next valid measure index, wrapping back to 0 if it reaches the last index.""" | ||
if measure_index < 0: | ||
raise ValueError(f"{measure_index=} should be > 0") | ||
|
||
if measure_index >= self.unique_measure_count: | ||
raise ValueError(f"{measure_index=} should be < {self.unique_measure_count}") | ||
|
||
return (measure_index + 1) % self.unique_measure_count |
89 changes: 89 additions & 0 deletions
89
tests_metricflow/performance/measure_semantic_model_generator.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
from __future__ import annotations | ||
|
||
from typing import Sequence | ||
|
||
from dbt_semantic_interfaces.implementations.elements.dimension import PydanticDimension, PydanticDimensionTypeParams | ||
from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity | ||
from dbt_semantic_interfaces.implementations.elements.measure import PydanticMeasure | ||
from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation | ||
from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel | ||
from dbt_semantic_interfaces.type_enums import AggregationType, DimensionType, EntityType, TimeGranularity | ||
|
||
from tests_metricflow.performance.measure_generator import MeasureGenerator | ||
from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet | ||
|
||
|
||
class MeasureSemanticModelGenerator: | ||
"""Helps generate semantic models containing measures. | ||
Each of the generated semantic models contain an entity common to the semantic models containing dimensions so that any | ||
measure can be queried by any dimension. | ||
""" | ||
|
||
def __init__( # noqa: D107 | ||
self, | ||
parameter_set: SyntheticManifestParameterSet, | ||
measure_generator: MeasureGenerator, | ||
) -> None: | ||
self._parameter_set = parameter_set | ||
self._measure_generator = measure_generator | ||
|
||
def generate_semantic_models(self) -> Sequence[PydanticSemanticModel]: # noqa: D102 | ||
semantic_models = [] | ||
measures_per_semantic_model = self._parameter_set.measures_per_semantic_model | ||
next_measure_index = 0 | ||
|
||
for semantic_model_index in range(self._parameter_set.measure_semantic_model_count): | ||
measures = [] | ||
|
||
for _ in range(measures_per_semantic_model): | ||
measures.append( | ||
PydanticMeasure( | ||
name=self._measure_generator.get_measure_name(next_measure_index), | ||
agg=AggregationType.SUM, | ||
agg_time_dimension="ds", | ||
) | ||
) | ||
next_measure_index = self._measure_generator.get_next_wrapped_index(next_measure_index) | ||
|
||
entities = [ | ||
PydanticEntity( | ||
name=self._get_primary_entity_name_for_measure_semantic_model(semantic_model_index), | ||
type=EntityType.PRIMARY, | ||
), | ||
PydanticEntity( | ||
name=self._parameter_set.common_entity_name, | ||
type=EntityType.UNIQUE, | ||
), | ||
] | ||
|
||
dimensions = [ | ||
PydanticDimension( | ||
name="ds", | ||
type=DimensionType.TIME, | ||
type_params=PydanticDimensionTypeParams( | ||
time_granularity=TimeGranularity.DAY, | ||
), | ||
), | ||
] | ||
semantic_model_name = self._get_measure_semantic_model_name(semantic_model_index) | ||
semantic_models.append( | ||
PydanticSemanticModel( | ||
name=semantic_model_name, | ||
node_relation=PydanticNodeRelation( | ||
schema_name="demo", | ||
alias=semantic_model_name, | ||
), | ||
measures=measures, | ||
entities=entities, | ||
dimensions=dimensions, | ||
) | ||
) | ||
|
||
return semantic_models | ||
|
||
def _get_measure_semantic_model_name(self, semantic_model_index: int) -> str: | ||
return f"measure_model_{semantic_model_index:03}" | ||
|
||
def _get_primary_entity_name_for_measure_semantic_model(self, semantic_model_index: int) -> str: | ||
return f"measure_model_{semantic_model_index:03}_primary_entity" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
from __future__ import annotations | ||
|
||
from dataclasses import dataclass | ||
from typing import Sequence | ||
|
||
from dbt_semantic_interfaces.implementations.metric import ( | ||
PydanticMetric, | ||
PydanticMetricInput, | ||
PydanticMetricInputMeasure, | ||
PydanticMetricTypeParams, | ||
) | ||
from dbt_semantic_interfaces.type_enums import MetricType | ||
|
||
from tests_metricflow.performance.measure_generator import MeasureGenerator | ||
from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet | ||
|
||
|
||
@dataclass(frozen=True) | ||
class MetricIndex: | ||
"""Index for a generated metric in the semantic manifest. | ||
Since metrics can be defined through other metrics, the `depth_index` describes the number of parents for a given | ||
metric in the generated manifest. For example, a `depth_index=0` describes a simple metric that does not depend on | ||
any other metrics. `depth_index=1` describes a derived metric that is defined using all metrics at `depth_index=0`. | ||
The `width_index` enumerates the nth metric generated for the given depth (name needs improvement). | ||
""" | ||
|
||
depth_index: int | ||
width_index: int | ||
|
||
def __post_init__(self) -> None: # noqa: D105 | ||
if self.depth_index < 0: | ||
raise ValueError(f"{self.depth_index=} should be >= 0") | ||
if self.width_index < 0: | ||
raise ValueError(f"{self.width_index=} should be >=0") | ||
|
||
|
||
class MetricGenerator: | ||
"""Helps generate metrics for the synthetic manifest.""" | ||
|
||
def __init__( # noqa: D107 | ||
self, parameter_set: SyntheticManifestParameterSet, measure_generator: MeasureGenerator | ||
) -> None: | ||
self._parameter_set = parameter_set | ||
self._measure_generator = measure_generator | ||
|
||
def generate_metrics(self) -> Sequence[PydanticMetric]: # noqa: D102 | ||
metrics = [] | ||
for depth_index in range(self._parameter_set.max_metric_depth): | ||
for width_index in range(self._parameter_set.max_metric_width): | ||
metrics.append(self._generate_metric(MetricIndex(depth_index=depth_index, width_index=width_index))) | ||
|
||
return metrics | ||
|
||
def get_first_index_at_max_depth(self) -> MetricIndex: | ||
"""For the highest possible metric depth in the semantic manifest, return the index of the first metric.""" | ||
return MetricIndex( | ||
depth_index=self._parameter_set.max_metric_depth - 1, | ||
width_index=0, | ||
) | ||
|
||
def get_next_wrapped_width_index(self, metric_index: MetricIndex) -> MetricIndex: | ||
"""Return the index of the next metric at the same depth level.""" | ||
return MetricIndex( | ||
depth_index=metric_index.depth_index, | ||
width_index=(metric_index.width_index + 1) % self._parameter_set.max_metric_width, | ||
) | ||
|
||
def get_metric_name(self, index: MetricIndex) -> str: # noqa: D102 | ||
return f"metric_{index.depth_index}_{index.width_index:03}" | ||
|
||
def _metric_indexes_at_depth(self, depth_index: int) -> Sequence[MetricIndex]: | ||
return tuple( | ||
MetricIndex(depth_index=depth_index, width_index=width_index) | ||
for width_index in range(self._parameter_set.max_metric_width) | ||
) | ||
|
||
def _generate_metric(self, metric_index: MetricIndex) -> PydanticMetric: | ||
if metric_index.depth_index == 0: | ||
return PydanticMetric( | ||
name=self.get_metric_name(metric_index), | ||
type=MetricType.SIMPLE, | ||
type_params=PydanticMetricTypeParams( | ||
measure=PydanticMetricInputMeasure( | ||
name=self._measure_generator.get_measure_name( | ||
measure_index=metric_index.width_index % self._measure_generator.unique_measure_count | ||
) | ||
) | ||
), | ||
) | ||
else: | ||
input_metric_names = tuple( | ||
self.get_metric_name(lower_depth_metric_index) | ||
for lower_depth_metric_index in self._metric_indexes_at_depth(metric_index.depth_index - 1) | ||
) | ||
return PydanticMetric( | ||
name=self.get_metric_name(metric_index), | ||
type=MetricType.DERIVED, | ||
type_params=PydanticMetricTypeParams( | ||
metrics=[PydanticMetricInput(name=input_metric_name) for input_metric_name in input_metric_names], | ||
expr=" + ".join(input_metric_names), | ||
), | ||
) |
Oops, something went wrong.