Skip to content

Commit

Permalink
/* PR_START p--short-term-perf 35 */ Add syn. manifest generator.
Browse files Browse the repository at this point in the history
  • Loading branch information
plypaul committed Oct 30, 2024
1 parent 85662b2 commit f536bd7
Show file tree
Hide file tree
Showing 9 changed files with 534 additions and 0 deletions.
Empty file.
36 changes: 36 additions & 0 deletions tests_metricflow/performance/categorical_dimension_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

from functools import cached_property

from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet


class CategoricalDimensionGenerator:
"""Helps generate the categorical dimensions in the semantic manifest.
The index for the dimension refers to the index when all unique dimensions in the semantic manifest are enumerated.
"""

def __init__(self, parameter_set: SyntheticManifestParameterSet) -> None: # noqa: D107
self._parameter_set = parameter_set

def get_dimension_name(self, dimension_index: int) -> str: # noqa: D102
"""Return the name of the dimension for the given index."""
return f"dimension_{dimension_index:03}"

@cached_property
def unique_dimension_count(self) -> int: # noqa: D102
return (
self._parameter_set.categorical_dimensions_per_semantic_model
* self._parameter_set.dimension_semantic_model_count
)

def get_next_wrapped_index(self, dimension_index: int) -> int:
"""Return the next valid dimension index, wrapping back to 0 if it reaches the last index."""
if dimension_index < 0:
raise ValueError(f"{dimension_index=} should be > 0")

if dimension_index >= self.unique_dimension_count:
raise ValueError(f"{dimension_index=} should be < {self.unique_dimension_count}")

return (dimension_index + 1) % self.unique_dimension_count
90 changes: 90 additions & 0 deletions tests_metricflow/performance/dimension_semantic_model_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from __future__ import annotations

from typing import Sequence

from dbt_semantic_interfaces.implementations.elements.dimension import PydanticDimension
from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity
from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation
from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel
from dbt_semantic_interfaces.type_enums import DimensionType, EntityType

from tests_metricflow.performance.categorical_dimension_generator import CategoricalDimensionGenerator
from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet


class DimensionSemanticModelGenerator:
"""Helps generate a semantic model containing dimensions.
Each of the generated semantic models contain an entity common to the semantic models containing measures so that
any measure can be queried by any dimension.
"""

def __init__( # noqa: D107
self,
parameter_set: SyntheticManifestParameterSet,
categorical_dimension_generator: CategoricalDimensionGenerator,
) -> None:
self._parameter_set = parameter_set
self._dimension_generator = categorical_dimension_generator

def generate_semantic_models(self) -> Sequence[PydanticSemanticModel]: # noqa: D102
semantic_models = []
for semantic_model_index in range(self._parameter_set.dimension_semantic_model_count):
entities = [
PydanticEntity(
name=self._get_dimension_semantic_model_primary_entity_name(semantic_model_index),
type=EntityType.PRIMARY,
),
PydanticEntity(
name=self._parameter_set.common_entity_name,
type=EntityType.UNIQUE,
),
]

dimensions = [
PydanticDimension(
name=self._get_dimension_name(
index_in_manifest=semantic_model_index,
index_in_model=dimension_index,
),
type=DimensionType.CATEGORICAL,
)
for dimension_index in range(self._parameter_set.categorical_dimensions_per_semantic_model)
]

semantic_model_name = self._get_dimension_semantic_model_name(semantic_model_index)
semantic_models.append(
PydanticSemanticModel(
name=semantic_model_name,
node_relation=PydanticNodeRelation(
schema_name="demo",
alias=semantic_model_name,
),
entities=entities,
dimensions=dimensions,
)
)

return semantic_models

def _get_dimension_semantic_model_name(self, index_in_manifest: int) -> str:
return f"dimension_model_{index_in_manifest:03}"

def _get_dimension_semantic_model_primary_entity_name(self, semantic_model_index: int) -> str:
return f"{self._get_dimension_semantic_model_name(semantic_model_index)}_primary_entity"

def _get_dimension_name(self, index_in_manifest: int, index_in_model: int) -> str:
"""Get the name of the dimension given the index.
Args:
index_in_manifest: The index of the semantic model in the manifest. e.g. the 2nd semantic model in the
semantic manifest.
index_in_model: The index of the dimension in the semantic model. e.g. the 2nd dimension in the semantic
model.
Returns:
The name of the dimension given the index.
"""
return self._dimension_generator.get_dimension_name(
index_in_manifest * self._parameter_set.categorical_dimensions_per_semantic_model + index_in_model
)
30 changes: 30 additions & 0 deletions tests_metricflow/performance/measure_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from __future__ import annotations

from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet


class MeasureGenerator:
"""Helps generate the measures in the semantic manifest.
The index for the measure refers to the index when measures in the semantic manifest are enumerated.
"""

def __init__(self, parameter_set: SyntheticManifestParameterSet) -> None: # noqa: D107
self._parameter_set = parameter_set

def get_measure_name(self, measure_index: int) -> str: # noqa: D102
return f"measure_{measure_index:03}"

@property
def unique_measure_count(self) -> int: # noqa: D102
return self._parameter_set.measures_per_semantic_model * self._parameter_set.measure_semantic_model_count

def get_next_wrapped_index(self, measure_index: int) -> int:
"""Return the next valid measure index, wrapping back to 0 if it reaches the last index."""
if measure_index < 0:
raise ValueError(f"{measure_index=} should be > 0")

if measure_index >= self.unique_measure_count:
raise ValueError(f"{measure_index=} should be < {self.unique_measure_count}")

return (measure_index + 1) % self.unique_measure_count
89 changes: 89 additions & 0 deletions tests_metricflow/performance/measure_semantic_model_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from __future__ import annotations

from typing import Sequence

from dbt_semantic_interfaces.implementations.elements.dimension import PydanticDimension, PydanticDimensionTypeParams
from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity
from dbt_semantic_interfaces.implementations.elements.measure import PydanticMeasure
from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation
from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel
from dbt_semantic_interfaces.type_enums import AggregationType, DimensionType, EntityType, TimeGranularity

from tests_metricflow.performance.measure_generator import MeasureGenerator
from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet


class MeasureSemanticModelGenerator:
"""Helps generate semantic models containing measures.
Each of the generated semantic models contain an entity common to the semantic models containing dimensions so that any
measure can be queried by any dimension.
"""

def __init__( # noqa: D107
self,
parameter_set: SyntheticManifestParameterSet,
measure_generator: MeasureGenerator,
) -> None:
self._parameter_set = parameter_set
self._measure_generator = measure_generator

def generate_semantic_models(self) -> Sequence[PydanticSemanticModel]: # noqa: D102
semantic_models = []
measures_per_semantic_model = self._parameter_set.measures_per_semantic_model
next_measure_index = 0

for semantic_model_index in range(self._parameter_set.measure_semantic_model_count):
measures = []

for _ in range(measures_per_semantic_model):
measures.append(
PydanticMeasure(
name=self._measure_generator.get_measure_name(next_measure_index),
agg=AggregationType.SUM,
agg_time_dimension="ds",
)
)
next_measure_index = self._measure_generator.get_next_wrapped_index(next_measure_index)

entities = [
PydanticEntity(
name=self._get_primary_entity_name_for_measure_semantic_model(semantic_model_index),
type=EntityType.PRIMARY,
),
PydanticEntity(
name=self._parameter_set.common_entity_name,
type=EntityType.UNIQUE,
),
]

dimensions = [
PydanticDimension(
name="ds",
type=DimensionType.TIME,
type_params=PydanticDimensionTypeParams(
time_granularity=TimeGranularity.DAY,
),
),
]
semantic_model_name = self._get_measure_semantic_model_name(semantic_model_index)
semantic_models.append(
PydanticSemanticModel(
name=semantic_model_name,
node_relation=PydanticNodeRelation(
schema_name="demo",
alias=semantic_model_name,
),
measures=measures,
entities=entities,
dimensions=dimensions,
)
)

return semantic_models

def _get_measure_semantic_model_name(self, semantic_model_index: int) -> str:
return f"measure_model_{semantic_model_index:03}"

def _get_primary_entity_name_for_measure_semantic_model(self, semantic_model_index: int) -> str:
return f"measure_model_{semantic_model_index:03}_primary_entity"
104 changes: 104 additions & 0 deletions tests_metricflow/performance/metric_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Sequence

from dbt_semantic_interfaces.implementations.metric import (
PydanticMetric,
PydanticMetricInput,
PydanticMetricInputMeasure,
PydanticMetricTypeParams,
)
from dbt_semantic_interfaces.type_enums import MetricType

from tests_metricflow.performance.measure_generator import MeasureGenerator
from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet


@dataclass(frozen=True)
class MetricIndex:
"""Index for a generated metric in the semantic manifest.
Since metrics can be defined through other metrics, the `depth_index` describes the number of parents for a given
metric in the generated manifest. For example, a `depth_index=0` describes a simple metric that does not depend on
any other metrics. `depth_index=1` describes a derived metric that is defined using all metrics at `depth_index=0`.
The `width_index` enumerates the nth metric generated for the given depth (name needs improvement).
"""

depth_index: int
width_index: int

def __post_init__(self) -> None: # noqa: D105
if self.depth_index < 0:
raise ValueError(f"{self.depth_index=} should be >= 0")
if self.width_index < 0:
raise ValueError(f"{self.width_index=} should be >=0")


class MetricGenerator:
"""Helps generate metrics for the synthetic manifest."""

def __init__( # noqa: D107
self, parameter_set: SyntheticManifestParameterSet, measure_generator: MeasureGenerator
) -> None:
self._parameter_set = parameter_set
self._measure_generator = measure_generator

def generate_metrics(self) -> Sequence[PydanticMetric]: # noqa: D102
metrics = []
for depth_index in range(self._parameter_set.max_metric_depth):
for width_index in range(self._parameter_set.max_metric_width):
metrics.append(self._generate_metric(MetricIndex(depth_index=depth_index, width_index=width_index)))

return metrics

def get_first_index_at_max_depth(self) -> MetricIndex:
"""For the highest possible metric depth in the semantic manifest, return the index of the first metric."""
return MetricIndex(
depth_index=self._parameter_set.max_metric_depth - 1,
width_index=0,
)

def get_next_wrapped_width_index(self, metric_index: MetricIndex) -> MetricIndex:
"""Return the index of the next metric at the same depth level."""
return MetricIndex(
depth_index=metric_index.depth_index,
width_index=(metric_index.width_index + 1) % self._parameter_set.max_metric_width,
)

def get_metric_name(self, index: MetricIndex) -> str: # noqa: D102
return f"metric_{index.depth_index}_{index.width_index:03}"

def _metric_indexes_at_depth(self, depth_index: int) -> Sequence[MetricIndex]:
return tuple(
MetricIndex(depth_index=depth_index, width_index=width_index)
for width_index in range(self._parameter_set.max_metric_width)
)

def _generate_metric(self, metric_index: MetricIndex) -> PydanticMetric:
if metric_index.depth_index == 0:
return PydanticMetric(
name=self.get_metric_name(metric_index),
type=MetricType.SIMPLE,
type_params=PydanticMetricTypeParams(
measure=PydanticMetricInputMeasure(
name=self._measure_generator.get_measure_name(
measure_index=metric_index.width_index % self._measure_generator.unique_measure_count
)
)
),
)
else:
input_metric_names = tuple(
self.get_metric_name(lower_depth_metric_index)
for lower_depth_metric_index in self._metric_indexes_at_depth(metric_index.depth_index - 1)
)
return PydanticMetric(
name=self.get_metric_name(metric_index),
type=MetricType.DERIVED,
type_params=PydanticMetricTypeParams(
metrics=[PydanticMetricInput(name=input_metric_name) for input_metric_name in input_metric_names],
expr=" + ".join(input_metric_names),
),
)
Loading

0 comments on commit f536bd7

Please sign in to comment.