diff --git a/tests_metricflow/performance/__init__.py b/tests_metricflow/performance/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests_metricflow/performance/categorical_dimension_generator.py b/tests_metricflow/performance/categorical_dimension_generator.py new file mode 100644 index 000000000..8334aae80 --- /dev/null +++ b/tests_metricflow/performance/categorical_dimension_generator.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from functools import cached_property + +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +class CategoricalDimensionGenerator: + """Helps generate the categorical dimensions in the semantic manifest. + + The index for the dimension refers to the index when all unique dimensions in the semantic manifest are enumerated. + """ + + def __init__(self, parameter_set: SyntheticManifestParameterSet) -> None: # noqa: D107 + self._parameter_set = parameter_set + + def get_dimension_name(self, dimension_index: int) -> str: # noqa: D102 + """Return the name of the dimension for the given index.""" + return f"dimension_{dimension_index:03}" + + @cached_property + def unique_dimension_count(self) -> int: # noqa: D102 + return ( + self._parameter_set.categorical_dimensions_per_semantic_model + * self._parameter_set.dimension_semantic_model_count + ) + + def get_next_wrapped_index(self, dimension_index: int) -> int: + """Return the next valid dimension index, wrapping back to 0 if it reaches the last index.""" + if dimension_index < 0: + raise ValueError(f"{dimension_index=} should be > 0") + + if dimension_index >= self.unique_dimension_count: + raise ValueError(f"{dimension_index=} should be < {self.unique_dimension_count}") + + return (dimension_index + 1) % self.unique_dimension_count diff --git a/tests_metricflow/performance/dimension_semantic_model_generator.py b/tests_metricflow/performance/dimension_semantic_model_generator.py new file mode 100644 index 000000000..8e41bb4ea --- /dev/null +++ b/tests_metricflow/performance/dimension_semantic_model_generator.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +from typing import Sequence + +from dbt_semantic_interfaces.implementations.elements.dimension import PydanticDimension +from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity +from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation +from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel +from dbt_semantic_interfaces.type_enums import DimensionType, EntityType + +from tests_metricflow.performance.categorical_dimension_generator import CategoricalDimensionGenerator +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +class DimensionSemanticModelGenerator: + """Helps generate a semantic model containing dimensions. + + Each of the generated semantic models contain an entity common to the semantic models containing measures so that + any measure can be queried by any dimension. + """ + + def __init__( # noqa: D107 + self, + parameter_set: SyntheticManifestParameterSet, + categorical_dimension_generator: CategoricalDimensionGenerator, + ) -> None: + self._parameter_set = parameter_set + self._dimension_generator = categorical_dimension_generator + + def generate_semantic_models(self) -> Sequence[PydanticSemanticModel]: # noqa: D102 + semantic_models = [] + for semantic_model_index in range(self._parameter_set.dimension_semantic_model_count): + entities = [ + PydanticEntity( + name=self._get_dimension_semantic_model_primary_entity_name(semantic_model_index), + type=EntityType.PRIMARY, + ), + PydanticEntity( + name=self._parameter_set.common_entity_name, + type=EntityType.UNIQUE, + ), + ] + + dimensions = [ + PydanticDimension( + name=self._get_dimension_name( + index_in_manifest=semantic_model_index, + index_in_model=dimension_index, + ), + type=DimensionType.CATEGORICAL, + ) + for dimension_index in range(self._parameter_set.categorical_dimensions_per_semantic_model) + ] + + semantic_model_name = self._get_dimension_semantic_model_name(semantic_model_index) + semantic_models.append( + PydanticSemanticModel( + name=semantic_model_name, + node_relation=PydanticNodeRelation( + schema_name="demo", + alias=semantic_model_name, + ), + entities=entities, + dimensions=dimensions, + ) + ) + + return semantic_models + + def _get_dimension_semantic_model_name(self, index_in_manifest: int) -> str: + return f"dimension_model_{index_in_manifest:03}" + + def _get_dimension_semantic_model_primary_entity_name(self, semantic_model_index: int) -> str: + return f"{self._get_dimension_semantic_model_name(semantic_model_index)}_primary_entity" + + def _get_dimension_name(self, index_in_manifest: int, index_in_model: int) -> str: + """Get the name of the dimension given the index. + + Args: + index_in_manifest: The index of the semantic model in the manifest. e.g. the 2nd semantic model in the + semantic manifest. + index_in_model: The index of the dimension in the semantic model. e.g. the 2nd dimension in the semantic + model. + + Returns: + The name of the dimension given the index. + """ + return self._dimension_generator.get_dimension_name( + index_in_manifest * self._parameter_set.categorical_dimensions_per_semantic_model + index_in_model + ) diff --git a/tests_metricflow/performance/measure_generator.py b/tests_metricflow/performance/measure_generator.py new file mode 100644 index 000000000..508b3033c --- /dev/null +++ b/tests_metricflow/performance/measure_generator.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +class MeasureGenerator: + """Helps generate the measures in the semantic manifest. + + The index for the measure refers to the index when measures in the semantic manifest are enumerated. + """ + + def __init__(self, parameter_set: SyntheticManifestParameterSet) -> None: # noqa: D107 + self._parameter_set = parameter_set + + def get_measure_name(self, measure_index: int) -> str: # noqa: D102 + return f"measure_{measure_index:03}" + + @property + def unique_measure_count(self) -> int: # noqa: D102 + return self._parameter_set.measures_per_semantic_model * self._parameter_set.measure_semantic_model_count + + def get_next_wrapped_index(self, measure_index: int) -> int: + """Return the next valid measure index, wrapping back to 0 if it reaches the last index.""" + if measure_index < 0: + raise ValueError(f"{measure_index=} should be > 0") + + if measure_index >= self.unique_measure_count: + raise ValueError(f"{measure_index=} should be < {self.unique_measure_count}") + + return (measure_index + 1) % self.unique_measure_count diff --git a/tests_metricflow/performance/measure_semantic_model_generator.py b/tests_metricflow/performance/measure_semantic_model_generator.py new file mode 100644 index 000000000..808bfec9b --- /dev/null +++ b/tests_metricflow/performance/measure_semantic_model_generator.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +from typing import Sequence + +from dbt_semantic_interfaces.implementations.elements.dimension import PydanticDimension, PydanticDimensionTypeParams +from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity +from dbt_semantic_interfaces.implementations.elements.measure import PydanticMeasure +from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation +from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel +from dbt_semantic_interfaces.type_enums import AggregationType, DimensionType, EntityType, TimeGranularity + +from tests_metricflow.performance.measure_generator import MeasureGenerator +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +class MeasureSemanticModelGenerator: + """Helps generate semantic models containing measures. + + Each of the generated semantic models contain an entity common to the semantic models containing dimensions so that any + measure can be queried by any dimension. + """ + + def __init__( # noqa: D107 + self, + parameter_set: SyntheticManifestParameterSet, + measure_generator: MeasureGenerator, + ) -> None: + self._parameter_set = parameter_set + self._measure_generator = measure_generator + + def generate_semantic_models(self) -> Sequence[PydanticSemanticModel]: # noqa: D102 + semantic_models = [] + measures_per_semantic_model = self._parameter_set.measures_per_semantic_model + next_measure_index = 0 + + for semantic_model_index in range(self._parameter_set.measure_semantic_model_count): + measures = [] + + for _ in range(measures_per_semantic_model): + measures.append( + PydanticMeasure( + name=self._measure_generator.get_measure_name(next_measure_index), + agg=AggregationType.SUM, + agg_time_dimension="ds", + ) + ) + next_measure_index = self._measure_generator.get_next_wrapped_index(next_measure_index) + + entities = [ + PydanticEntity( + name=self._get_primary_entity_name_for_measure_semantic_model(semantic_model_index), + type=EntityType.PRIMARY, + ), + PydanticEntity( + name=self._parameter_set.common_entity_name, + type=EntityType.UNIQUE, + ), + ] + + dimensions = [ + PydanticDimension( + name="ds", + type=DimensionType.TIME, + type_params=PydanticDimensionTypeParams( + time_granularity=TimeGranularity.DAY, + ), + ), + ] + semantic_model_name = self._get_measure_semantic_model_name(semantic_model_index) + semantic_models.append( + PydanticSemanticModel( + name=semantic_model_name, + node_relation=PydanticNodeRelation( + schema_name="demo", + alias=semantic_model_name, + ), + measures=measures, + entities=entities, + dimensions=dimensions, + ) + ) + + return semantic_models + + def _get_measure_semantic_model_name(self, semantic_model_index: int) -> str: + return f"measure_model_{semantic_model_index:03}" + + def _get_primary_entity_name_for_measure_semantic_model(self, semantic_model_index: int) -> str: + return f"measure_model_{semantic_model_index:03}_primary_entity" diff --git a/tests_metricflow/performance/metric_generator.py b/tests_metricflow/performance/metric_generator.py new file mode 100644 index 000000000..1683993f2 --- /dev/null +++ b/tests_metricflow/performance/metric_generator.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Sequence + +from dbt_semantic_interfaces.implementations.metric import ( + PydanticMetric, + PydanticMetricInput, + PydanticMetricInputMeasure, + PydanticMetricTypeParams, +) +from dbt_semantic_interfaces.type_enums import MetricType + +from tests_metricflow.performance.measure_generator import MeasureGenerator +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +@dataclass(frozen=True) +class MetricIndex: + """Index for a generated metric in the semantic manifest. + + Since metrics can be defined through other metrics, the `depth_index` describes the number of parents for a given + metric in the generated manifest. For example, a `depth_index=0` describes a simple metric that does not depend on + any other metrics. `depth_index=1` describes a derived metric that is defined using all metrics at `depth_index=0`. + + The `width_index` enumerates the nth metric generated for the given depth (name needs improvement). + """ + + depth_index: int + width_index: int + + def __post_init__(self) -> None: # noqa: D105 + if self.depth_index < 0: + raise ValueError(f"{self.depth_index=} should be >= 0") + if self.width_index < 0: + raise ValueError(f"{self.width_index=} should be >=0") + + +class MetricGenerator: + """Helps generate metrics for the synthetic manifest.""" + + def __init__( # noqa: D107 + self, parameter_set: SyntheticManifestParameterSet, measure_generator: MeasureGenerator + ) -> None: + self._parameter_set = parameter_set + self._measure_generator = measure_generator + + def generate_metrics(self) -> Sequence[PydanticMetric]: # noqa: D102 + metrics = [] + for depth_index in range(self._parameter_set.max_metric_depth): + for width_index in range(self._parameter_set.max_metric_width): + metrics.append(self._generate_metric(MetricIndex(depth_index=depth_index, width_index=width_index))) + + return metrics + + def get_first_index_at_max_depth(self) -> MetricIndex: + """For the highest possible metric depth in the semantic manifest, return the index of the first metric.""" + return MetricIndex( + depth_index=self._parameter_set.max_metric_depth - 1, + width_index=0, + ) + + def get_next_wrapped_width_index(self, metric_index: MetricIndex) -> MetricIndex: + """Return the index of the next metric at the same depth level.""" + return MetricIndex( + depth_index=metric_index.depth_index, + width_index=(metric_index.width_index + 1) % self._parameter_set.max_metric_width, + ) + + def get_metric_name(self, index: MetricIndex) -> str: # noqa: D102 + return f"metric_{index.depth_index}_{index.width_index:03}" + + def _metric_indexes_at_depth(self, depth_index: int) -> Sequence[MetricIndex]: + return tuple( + MetricIndex(depth_index=depth_index, width_index=width_index) + for width_index in range(self._parameter_set.max_metric_width) + ) + + def _generate_metric(self, metric_index: MetricIndex) -> PydanticMetric: + if metric_index.depth_index == 0: + return PydanticMetric( + name=self.get_metric_name(metric_index), + type=MetricType.SIMPLE, + type_params=PydanticMetricTypeParams( + measure=PydanticMetricInputMeasure( + name=self._measure_generator.get_measure_name( + measure_index=metric_index.width_index % self._measure_generator.unique_measure_count + ) + ) + ), + ) + else: + input_metric_names = tuple( + self.get_metric_name(lower_depth_metric_index) + for lower_depth_metric_index in self._metric_indexes_at_depth(metric_index.depth_index - 1) + ) + return PydanticMetric( + name=self.get_metric_name(metric_index), + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams( + metrics=[PydanticMetricInput(name=input_metric_name) for input_metric_name in input_metric_names], + expr=" + ".join(input_metric_names), + ), + ) diff --git a/tests_metricflow/performance/saved_query_generator.py b/tests_metricflow/performance/saved_query_generator.py new file mode 100644 index 000000000..3e647a3a8 --- /dev/null +++ b/tests_metricflow/performance/saved_query_generator.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from typing import Sequence + +from dbt_semantic_interfaces.implementations.saved_query import PydanticSavedQuery, PydanticSavedQueryQueryParams +from dbt_semantic_interfaces.references import EntityReference +from metricflow_semantics.naming.object_builder_scheme import ObjectBuilderNamingScheme +from metricflow_semantics.specs.dimension_spec import DimensionSpec + +from tests_metricflow.performance.categorical_dimension_generator import CategoricalDimensionGenerator +from tests_metricflow.performance.metric_generator import MetricGenerator +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +class SavedQueryGenerator: + """Helps generate saved queries for the synthetic manifest.""" + + def __init__( # noqa: D107 + self, + parameter_set: SyntheticManifestParameterSet, + metric_generator: MetricGenerator, + categorical_dimension_generator: CategoricalDimensionGenerator, + ) -> None: + self._parameter_set = parameter_set + self._metric_generator = metric_generator + self._dimension_generator = categorical_dimension_generator + self._naming_scheme = ObjectBuilderNamingScheme() + + def _get_saved_query_name(self, saved_query_index: int) -> str: + return f"saved_query_{saved_query_index:03}" + + def generate_saved_queries(self) -> Sequence[PydanticSavedQuery]: # noqa: D102 + saved_queries = [] + next_metric_index = self._metric_generator.get_first_index_at_max_depth() + next_categorical_dimension_index = 0 + + for saved_query_index in range(self._parameter_set.saved_query_count): + metrics = [] + for _ in range(self._parameter_set.metrics_per_saved_query): + metrics.append(self._metric_generator.get_metric_name(next_metric_index)) + next_metric_index = self._metric_generator.get_next_wrapped_width_index(next_metric_index) + categorical_dimensions = [] + for _ in range(self._parameter_set.categorical_dimensions_per_saved_query): + categorical_dimensions.append( + self._naming_scheme.input_str( + DimensionSpec( + element_name=self._dimension_generator.get_dimension_name(next_categorical_dimension_index), + entity_links=(EntityReference(self._parameter_set.common_entity_name),), + ) + ) + ) + next_categorical_dimension_index = self._dimension_generator.get_next_wrapped_index( + next_categorical_dimension_index + ) + + saved_queries.append( + PydanticSavedQuery( + name=self._get_saved_query_name(saved_query_index), + query_params=PydanticSavedQueryQueryParams( + metrics=metrics, + group_by=categorical_dimensions, + ), + ) + ) + + return saved_queries diff --git a/tests_metricflow/performance/semantic_manifest_generator.py b/tests_metricflow/performance/semantic_manifest_generator.py new file mode 100644 index 000000000..a6904ec4a --- /dev/null +++ b/tests_metricflow/performance/semantic_manifest_generator.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from typing import List + +from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation +from dbt_semantic_interfaces.implementations.project_configuration import PydanticProjectConfiguration +from dbt_semantic_interfaces.implementations.semantic_manifest import PydanticSemanticManifest +from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel +from dbt_semantic_interfaces.implementations.time_spine import PydanticTimeSpine, PydanticTimeSpinePrimaryColumn +from dbt_semantic_interfaces.type_enums import TimeGranularity + +from tests_metricflow.performance.categorical_dimension_generator import CategoricalDimensionGenerator +from tests_metricflow.performance.dimension_semantic_model_generator import DimensionSemanticModelGenerator +from tests_metricflow.performance.measure_generator import MeasureGenerator +from tests_metricflow.performance.measure_semantic_model_generator import MeasureSemanticModelGenerator +from tests_metricflow.performance.metric_generator import MetricGenerator +from tests_metricflow.performance.saved_query_generator import SavedQueryGenerator +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +class SyntheticManifestGenerator: + """Generates a synthetic semantic manifest that can be used for performance testing.""" + + def __init__(self, parameter_set: SyntheticManifestParameterSet) -> None: # noqa: D107 + self._parameter_set = parameter_set + self._measure_generator = MeasureGenerator(parameter_set) + self._categorical_dimension_generator = CategoricalDimensionGenerator(parameter_set) + self._measure_semantic_model_generator = MeasureSemanticModelGenerator( + parameter_set=parameter_set, + measure_generator=self._measure_generator, + ) + self._dimension_semantic_model_generator = DimensionSemanticModelGenerator( + parameter_set=parameter_set, + categorical_dimension_generator=self._categorical_dimension_generator, + ) + self._metric_generator = MetricGenerator( + parameter_set=parameter_set, + measure_generator=self._measure_generator, + ) + self._saved_query_generator = SavedQueryGenerator( + parameter_set=parameter_set, + metric_generator=self._metric_generator, + categorical_dimension_generator=self._categorical_dimension_generator, + ) + + def generate_manifest(self) -> PydanticSemanticManifest: + """Generate a manifest using the given parameters.""" + semantic_models: List[PydanticSemanticModel] = [] + + semantic_models.extend(self._measure_semantic_model_generator.generate_semantic_models()) + semantic_models.extend(self._dimension_semantic_model_generator.generate_semantic_models()) + + return PydanticSemanticManifest( + semantic_models=semantic_models, + metrics=self._metric_generator.generate_metrics(), + project_configuration=PydanticProjectConfiguration( + time_spines=[ + PydanticTimeSpine( + node_relation=PydanticNodeRelation( + alias="time_spine_source_table", + schema_name="demo", + ), + primary_column=PydanticTimeSpinePrimaryColumn( + name="ds", + time_granularity=TimeGranularity.DAY, + ), + ) + ] + ), + saved_queries=self._saved_query_generator.generate_saved_queries(), + ) diff --git a/tests_metricflow/performance/synthetic_manifest_parameter_set.py b/tests_metricflow/performance/synthetic_manifest_parameter_set.py new file mode 100644 index 000000000..e34a7a391 --- /dev/null +++ b/tests_metricflow/performance/synthetic_manifest_parameter_set.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class SyntheticManifestParameterSet: + """Describes how to generate a synthetic manifest for performance testing. + + Goals are: + * Allow modeling of similar patterns seen in production manifests. + * Make generation straightforward. + * Minimize the number of parameters required. + + Notes: + * The synthetic manifest groups semantic models into two types - ones containing measures, and others containing dimensions. + * A dimension with the same name does not appear in multiple semantic models. + * Al semantic models contain a common entity so that any measure can be queried by any dimension. + * The metric `depth` describes the number of hops that are required to get to the simple metric when following the + definition tree. + * Metrics at `depth=0` are simple metrics. Metrics at other depth values are derived. + * Each metric is defined using all possible metrics at a lower depth. + * The number of metrics that are generated with a given `depth` is called the `width`. + * A random seed can be added later. + """ + + # The number of semantic models to generate that contain measures. + measure_semantic_model_count: int + # For each semantic model containing measures, the number of measures that it should contain. + measures_per_semantic_model: int + + # The number of semantic models to generate that contain dimensions. + dimension_semantic_model_count: int + # For each semantic model containing measures, the number of dimensions that it should contain. + categorical_dimensions_per_semantic_model: int + + # See class docstring. + max_metric_depth: int + max_metric_width: int + + # The number of saved queries to generate and the number of elements in each. + saved_query_count: int + metrics_per_saved_query: int + categorical_dimensions_per_saved_query: int + + # The name of the entity that is common to semantic models containing measures and the semantic model + # containing dimensions. + common_entity_name: str = "common_entity"