diff --git a/.changes/unreleased/Features-20240614-140515.yaml b/.changes/unreleased/Features-20240614-140515.yaml new file mode 100644 index 00000000..2eb4eba3 --- /dev/null +++ b/.changes/unreleased/Features-20240614-140515.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add default_granularity to metric spec. +time: 2024-06-14T14:05:15.355931-07:00 +custom: + Author: courtneyholcomb + Issue: "290" diff --git a/Makefile b/Makefile index ebb91ee9..7127e3cc 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ test: export FORMAT_JSON_LOGS="1" && hatch -v run dev-env:pytest -n auto tests lint: - hatch run dev-env:pre-commit run --show-diff-on-failure --color=always --all-files + hatch run dev-env:pre-commit run --color=always --all-files json_schema: hatch run dev-env:python dbt_semantic_interfaces/parsing/generate_json_schema_file.py diff --git a/dbt_semantic_interfaces/implementations/metric.py b/dbt_semantic_interfaces/implementations/metric.py index 6e05566b..4a6313dd 100644 --- a/dbt_semantic_interfaces/implementations/metric.py +++ b/dbt_semantic_interfaces/implementations/metric.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Dict, List, Optional, Sequence +from typing import Any, Dict, List, Optional, Sequence, Set from typing_extensions import override @@ -16,7 +16,7 @@ PydanticWhereFilterIntersection, ) from dbt_semantic_interfaces.implementations.metadata import PydanticMetadata -from dbt_semantic_interfaces.protocols import MetricConfig, ProtocolHint +from dbt_semantic_interfaces.protocols import Metric, MetricConfig, ProtocolHint from dbt_semantic_interfaces.references import MeasureReference, MetricReference from dbt_semantic_interfaces.type_enums import ( ConversionCalculationType, @@ -191,9 +191,13 @@ def _implements_protocol(self) -> MetricConfig: # noqa: D meta: Dict[str, Any] = Field(default_factory=dict) -class PydanticMetric(HashableBaseModel, ModelWithMetadataParsing): +class PydanticMetric(HashableBaseModel, ModelWithMetadataParsing, ProtocolHint[Metric]): """Describes a metric.""" + @override + def _implements_protocol(self) -> Metric: # noqa: D + return self + name: str description: Optional[str] type: MetricType @@ -202,6 +206,7 @@ class PydanticMetric(HashableBaseModel, ModelWithMetadataParsing): metadata: Optional[PydanticMetadata] label: Optional[str] = None config: Optional[PydanticMetricConfig] + default_granularity: Optional[TimeGranularity] = None @property def input_measures(self) -> Sequence[PydanticMetricInputMeasure]: @@ -228,3 +233,31 @@ def input_metrics(self) -> Sequence[PydanticMetricInput]: return (self.type_params.numerator, self.type_params.denominator) else: assert_values_exhausted(self.type) + + @staticmethod + def all_input_measures_for_metric( + metric: Metric, metric_index: Dict[MetricReference, Metric] + ) -> Set[MeasureReference]: + """Gets all input measures for the metric, including those defined on input metrics (recursively).""" + measures: Set[MeasureReference] = set() + if metric.type is MetricType.SIMPLE or metric.type is MetricType.CUMULATIVE: + assert ( + metric.type_params.measure is not None + ), f"Metric {metric.name} should have a measure defined, but it does not." + measures.add(metric.type_params.measure.measure_reference) + elif metric.type is MetricType.DERIVED or metric.type is MetricType.RATIO: + for input_metric in metric.input_metrics: + nested_metric = metric_index.get(input_metric.as_reference) + assert nested_metric, f"Could not find metric {input_metric.name} in semantic manifest." + measures.update( + PydanticMetric.all_input_measures_for_metric(metric=nested_metric, metric_index=metric_index) + ) + elif metric.type is MetricType.CONVERSION: + conversion_type_params = metric.type_params.conversion_type_params + assert conversion_type_params, "Conversion metric should have conversion_type_params." + measures.add(conversion_type_params.base_measure.measure_reference) + measures.add(conversion_type_params.conversion_measure.measure_reference) + else: + assert_values_exhausted(metric.type) + + return measures diff --git a/dbt_semantic_interfaces/implementations/semantic_model.py b/dbt_semantic_interfaces/implementations/semantic_model.py index 90b4a89e..10c85d33 100644 --- a/dbt_semantic_interfaces/implementations/semantic_model.py +++ b/dbt_semantic_interfaces/implementations/semantic_model.py @@ -19,6 +19,7 @@ SemanticModelDefaults, ) from dbt_semantic_interfaces.references import ( + DimensionReference, EntityReference, LinkableElementReference, MeasureReference, @@ -168,7 +169,7 @@ def get_measure(self, measure_reference: MeasureReference) -> PydanticMeasure: f"No dimension with name ({measure_reference.element_name}) in semantic_model with name ({self.name})" ) - def get_dimension(self, dimension_reference: LinkableElementReference) -> PydanticDimension: # noqa: D + def get_dimension(self, dimension_reference: DimensionReference) -> PydanticDimension: # noqa: D for dim in self.dimensions: if dim.reference == dimension_reference: return dim diff --git a/dbt_semantic_interfaces/parsing/generated_json_schemas/default_explicit_schema.json b/dbt_semantic_interfaces/parsing/generated_json_schemas/default_explicit_schema.json index ad6a5785..114cbf79 100644 --- a/dbt_semantic_interfaces/parsing/generated_json_schemas/default_explicit_schema.json +++ b/dbt_semantic_interfaces/parsing/generated_json_schemas/default_explicit_schema.json @@ -458,6 +458,32 @@ "config": { "$ref": "#/definitions/metric_config_schema" }, + "default_granularity": { + "enum": [ + "NANOSECOND", + "MICROSECOND", + "MILLISECOND", + "SECOND", + "MINUTE", + "HOUR", + "DAY", + "WEEK", + "MONTH", + "QUARTER", + "YEAR", + "nanosecond", + "microsecond", + "millisecond", + "second", + "minute", + "hour", + "day", + "week", + "month", + "quarter", + "year" + ] + }, "description": { "type": "string" }, diff --git a/dbt_semantic_interfaces/parsing/schemas.py b/dbt_semantic_interfaces/parsing/schemas.py index 96d45143..06eb7485 100644 --- a/dbt_semantic_interfaces/parsing/schemas.py +++ b/dbt_semantic_interfaces/parsing/schemas.py @@ -304,6 +304,7 @@ "description": {"type": "string"}, "label": {"type": "string"}, "config": {"$ref": "metric_config_schema"}, + "default_granularity": {"enum": time_granularity_values}, }, "additionalProperties": False, "required": ["name", "type", "type_params"], diff --git a/dbt_semantic_interfaces/protocols/metric.py b/dbt_semantic_interfaces/protocols/metric.py index 71925e49..fca26e31 100644 --- a/dbt_semantic_interfaces/protocols/metric.py +++ b/dbt_semantic_interfaces/protocols/metric.py @@ -325,3 +325,14 @@ def config(self) -> Optional[MetricConfig]: # noqa: D def label(self) -> Optional[str]: """Returns a string representing a human readable label for the metric.""" pass + + @property + @abstractmethod + def default_granularity(self) -> Optional[TimeGranularity]: + """Default grain used for the metric. + + This will be used in a couple of circumstances: + - as the default grain for metric_time if no grain is specified + - as the window function order by when reaggregating cumulative metrics for non-default grains + """ + pass diff --git a/dbt_semantic_interfaces/references.py b/dbt_semantic_interfaces/references.py index ec696bfa..825f1aa7 100644 --- a/dbt_semantic_interfaces/references.py +++ b/dbt_semantic_interfaces/references.py @@ -47,6 +47,7 @@ class EntityReference(LinkableElementReference): # noqa: D class TimeDimensionReference(DimensionReference): # noqa: D pass + @property def dimension_reference(self) -> DimensionReference: # noqa: D return DimensionReference(element_name=self.element_name) diff --git a/dbt_semantic_interfaces/test_utils.py b/dbt_semantic_interfaces/test_utils.py index addd1b93..a6c52f76 100644 --- a/dbt_semantic_interfaces/test_utils.py +++ b/dbt_semantic_interfaces/test_utils.py @@ -24,7 +24,7 @@ PydanticSemanticModel, ) from dbt_semantic_interfaces.parsing.objects import YamlConfigFile -from dbt_semantic_interfaces.type_enums import MetricType +from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity logger = logging.getLogger(__name__) @@ -123,6 +123,7 @@ def metric_with_guaranteed_meta( type_params: PydanticMetricTypeParams, metadata: PydanticMetadata = default_meta(), description: str = "adhoc metric", + default_granularity: Optional[TimeGranularity] = None, ) -> PydanticMetric: """Creates a metric with the given input. @@ -135,6 +136,7 @@ def metric_with_guaranteed_meta( type_params=type_params, filter=None, metadata=metadata, + default_granularity=default_granularity, ) diff --git a/dbt_semantic_interfaces/transformations/default_grain.py b/dbt_semantic_interfaces/transformations/default_grain.py new file mode 100644 index 00000000..0b31a8f0 --- /dev/null +++ b/dbt_semantic_interfaces/transformations/default_grain.py @@ -0,0 +1,48 @@ +from typing import Set + +from typing_extensions import override + +from dbt_semantic_interfaces.implementations.semantic_manifest import ( + PydanticSemanticManifest, +) +from dbt_semantic_interfaces.protocols import ProtocolHint +from dbt_semantic_interfaces.references import ( + DimensionReference, + TimeDimensionReference, +) +from dbt_semantic_interfaces.transformations.transform_rule import ( + SemanticManifestTransformRule, +) +from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity + + +class SetDefaultGrainRule(ProtocolHint[SemanticManifestTransformRule[PydanticSemanticManifest]]): + """If default_granularity is not set for a metric, set it to DAY if available, else the smallest available grain.""" + + @override + def _implements_protocol(self) -> SemanticManifestTransformRule[PydanticSemanticManifest]: # noqa: D + return self + + @staticmethod + def transform_model(semantic_manifest: PydanticSemanticManifest) -> PydanticSemanticManifest: + """For each metric, set default_granularity to DAY or smallest granularity supported by all agg_time_dims.""" + for metric in semantic_manifest.metrics: + if metric.default_granularity: + continue + + default_granularity = TimeGranularity.DAY + seen_agg_time_dimensions: Set[TimeDimensionReference] = set() + for semantic_model in semantic_manifest.semantic_models: + for measure_ref in set(metric.measure_references).intersection(semantic_model.measure_references): + agg_time_dimension_ref = semantic_model.checked_agg_time_dimension_for_measure(measure_ref) + if agg_time_dimension_ref in seen_agg_time_dimensions: + continue + seen_agg_time_dimensions.add(agg_time_dimension_ref) + dimension = semantic_model.get_dimension(DimensionReference(agg_time_dimension_ref.element_name)) + if ( + dimension.type_params + and dimension.type_params.time_granularity.to_int() > default_granularity.to_int() + ): + default_granularity = dimension.type_params.time_granularity + + return semantic_manifest diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py index ac206bdd..bced1a64 100644 --- a/dbt_semantic_interfaces/validations/metrics.py +++ b/dbt_semantic_interfaces/validations/metrics.py @@ -1,17 +1,30 @@ import traceback -from typing import Generic, List, Optional, Sequence +from typing import Dict, Generic, List, Optional, Sequence from dbt_semantic_interfaces.errors import ParsingException -from dbt_semantic_interfaces.implementations.metric import PydanticMetricTimeWindow +from dbt_semantic_interfaces.implementations.metric import ( + PydanticMetric, + PydanticMetricTimeWindow, +) from dbt_semantic_interfaces.protocols import ( ConversionTypeParams, + Dimension, Metric, SemanticManifest, SemanticManifestT, SemanticModel, ) -from dbt_semantic_interfaces.references import MeasureReference, MetricModelReference -from dbt_semantic_interfaces.type_enums import AggregationType, MetricType +from dbt_semantic_interfaces.references import ( + DimensionReference, + MeasureReference, + MetricModelReference, + MetricReference, +) +from dbt_semantic_interfaces.type_enums import ( + AggregationType, + MetricType, + TimeGranularity, +) from dbt_semantic_interfaces.validations.unique_valid_name import UniqueAndValidNameRule from dbt_semantic_interfaces.validations.validator_helpers import ( FileContext, @@ -562,3 +575,100 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati conversion_semantic_model=conversion_semantic_model, ) return issues + + +class DefaultGrainRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]): + """Checks that default_granularity set for metric is queryable for that metric.""" + + @staticmethod + def _min_queryable_granularity_for_metric( + metric: Metric, + metric_index: Dict[MetricReference, Metric], + measure_to_agg_time_dimension: Dict[MeasureReference, Dimension], + ) -> TimeGranularity: + """Get the minimum time granularity this metric is allowed to be queried with. + + This should be the largest granularity that any of the metric's agg_time_dimensions is defined at. + Defaults to DAY in the + """ + min_queryable_granularity: Optional[TimeGranularity] = None + for measure_reference in PydanticMetric.all_input_measures_for_metric(metric=metric, metric_index=metric_index): + agg_time_dimension = measure_to_agg_time_dimension.get(measure_reference) + assert agg_time_dimension, f"Measure '{measure_reference.element_name}' not found in semantic manifest." + if not agg_time_dimension.type_params: + continue + defined_time_granularity = agg_time_dimension.type_params.time_granularity + if not min_queryable_granularity or defined_time_granularity.to_int() > min_queryable_granularity.to_int(): + min_queryable_granularity = defined_time_granularity + + return min_queryable_granularity or TimeGranularity.DAY + + @staticmethod + @validate_safely( + whats_being_done="running model validation ensuring a metric's default_granularity is valid for the metric" + ) + def _validate_metric( + metric: Metric, + metric_index: Dict[MetricReference, Metric], + measure_to_agg_time_dimension: Dict[MeasureReference, Dimension], + ) -> Sequence[ValidationIssue]: # noqa: D + issues: List[ValidationIssue] = [] + context = MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ) + + if metric.default_granularity: + min_queryable_granularity = DefaultGrainRule._min_queryable_granularity_for_metric( + metric=metric, metric_index=metric_index, measure_to_agg_time_dimension=measure_to_agg_time_dimension + ) + valid_granularities = [ + granularity.name + for granularity in TimeGranularity + if granularity.to_int() >= min_queryable_granularity.to_int() + ] + if metric.default_granularity.name not in valid_granularities: + issues.append( + ValidationError( + context=context, + message=( + f"`default_granularity` for metric '{metric.name}' must be >= " + f"{min_queryable_granularity.name}. Valid options are those that are >= the largest " + f"granularity defined for the metric's measures' agg_time_dimensions. Got: " + f"{metric.default_granularity.name}. Valid options: {valid_granularities}" + ), + ) + ) + + return issues + + @staticmethod + @validate_safely(whats_being_done="running manifest validation ensuring metric default_granularitys are valid") + def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: + """Validate that the default_granularity for each metric is queryable for that metric. + + TODO: figure out a more efficient way to reference other aspects of the model. This validation essentially + requires parsing the entire model, which could be slow and likely is repeated work. The blocker is that the + inputs to validations are protocols, which don't easily store parsed metadata. + """ + issues: List[ValidationIssue] = [] + + measure_to_agg_time_dimension: Dict[MeasureReference, Dimension] = {} + for semantic_model in semantic_manifest.semantic_models: + dimension_index = {DimensionReference(dimension.name): dimension for dimension in semantic_model.dimensions} + for measure in semantic_model.measures: + agg_time_dimension_ref = semantic_model.checked_agg_time_dimension_for_measure(measure.reference) + agg_time_dimension = dimension_index.get(agg_time_dimension_ref.dimension_reference) + assert ( + agg_time_dimension + ), f"Dimension '{agg_time_dimension_ref.element_name}' not found in semantic manifest." + measure_to_agg_time_dimension[measure.reference] = agg_time_dimension + + metric_index = {MetricReference(metric.name): metric for metric in semantic_manifest.metrics} + for metric in semantic_manifest.metrics or []: + issues += DefaultGrainRule._validate_metric( + metric=metric, + metric_index=metric_index, + measure_to_agg_time_dimension=measure_to_agg_time_dimension, + ) + return issues diff --git a/pyproject.toml b/pyproject.toml index 736e009d..60bb3415 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dbt-semantic-interfaces" -version = "0.6.1" +version = "0.6.2.dev0" description = 'The shared semantic layer definitions that dbt-core and MetricFlow use' readme = "README.md" requires-python = ">=3.8" diff --git a/tests/fixtures/semantic_manifest_yamls/simple_semantic_manifest/metrics.yaml b/tests/fixtures/semantic_manifest_yamls/simple_semantic_manifest/metrics.yaml index f5f74eb7..0d8eea31 100644 --- a/tests/fixtures/semantic_manifest_yamls/simple_semantic_manifest/metrics.yaml +++ b/tests/fixtures/semantic_manifest_yamls/simple_semantic_manifest/metrics.yaml @@ -154,6 +154,7 @@ metric: name: "trailing_2_months_revenue" description: "trailing_2_months_revenue" type: cumulative + default_granularity: month type_params: measure: name: txn_revenue diff --git a/tests/validations/test_metrics.py b/tests/validations/test_metrics.py index db10f2a6..a84cbe6a 100644 --- a/tests/validations/test_metrics.py +++ b/tests/validations/test_metrics.py @@ -46,6 +46,7 @@ CUMULATIVE_TYPE_PARAMS_SUPPORTED, ConversionMetricRule, CumulativeMetricRule, + DefaultGrainRule, DerivedMetricRule, WhereFiltersAreParseable, ) @@ -713,3 +714,122 @@ def test_cumulative_metrics() -> None: # noqa: D missing_error_strings.add(expected_str) assert len(missing_error_strings) == 0, "Failed to match one or more expected issues: " f"{missing_error_strings} in {set([x.as_readable_str() for x in build_issues])}" + + +def test_default_granularity() -> None: + """Test that default grain is validated appropriately.""" + week_measure_name = "foo" + month_measure_name = "boo" + week_time_dim_name = "ds__week" + month_time_dim_name = "ds__month" + model_validator = SemanticManifestValidator[PydanticSemanticManifest]([DefaultGrainRule()]) + validation_results = model_validator.validate_semantic_manifest( + PydanticSemanticManifest( + semantic_models=[ + semantic_model_with_guaranteed_meta( + name="semantic_model", + measures=[ + PydanticMeasure( + name=month_measure_name, agg=AggregationType.SUM, agg_time_dimension=month_time_dim_name + ), + PydanticMeasure( + name=week_measure_name, agg=AggregationType.SUM, agg_time_dimension=week_time_dim_name + ), + ], + dimensions=[ + PydanticDimension( + name=month_time_dim_name, + type=DimensionType.TIME, + type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.MONTH), + ), + PydanticDimension( + name=week_time_dim_name, + type=DimensionType.TIME, + type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.WEEK), + ), + ], + ), + ], + metrics=[ + # Simple metrics + metric_with_guaranteed_meta( + name="month_metric_with_no_default_granularity_set", + type=MetricType.SIMPLE, + type_params=PydanticMetricTypeParams( + measure=PydanticMetricInputMeasure(name=month_measure_name), + ), + ), + metric_with_guaranteed_meta( + name="week_metric_with_valid_default_granularity", + type=MetricType.SIMPLE, + type_params=PydanticMetricTypeParams( + measure=PydanticMetricInputMeasure(name=week_measure_name), + ), + default_granularity=TimeGranularity.MONTH, + ), + metric_with_guaranteed_meta( + name="month_metric_with_invalid_default_granularity", + type=MetricType.SIMPLE, + type_params=PydanticMetricTypeParams( + measure=PydanticMetricInputMeasure(name=month_measure_name), + ), + default_granularity=TimeGranularity.WEEK, + ), + # Derived metrics + metric_with_guaranteed_meta( + name="derived_metric_with_no_default_granularity_set", + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams( + metrics=[ + PydanticMetricInput(name="week_metric_with_valid_default_granularity"), + ], + expr="week_metric_with_valid_default_granularity + 1", + ), + ), + metric_with_guaranteed_meta( + name="derived_metric_with_valid_default_granularity", + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams( + metrics=[ + PydanticMetricInput(name="week_metric_with_valid_default_granularity"), + PydanticMetricInput(name="month_metric_with_no_default_granularity_set"), + ], + expr=( + "week_metric_with_valid_default_granularity + month_metric_with_no_default_granularity_set" + ), + ), + default_granularity=TimeGranularity.YEAR, + ), + metric_with_guaranteed_meta( + name="derived_metric_with_invalid_default_granularity", + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams( + metrics=[ + PydanticMetricInput(name="week_metric_with_valid_default_granularity"), + PydanticMetricInput(name="month_metric_with_no_default_granularity_set"), + ], + expr=( + "week_metric_with_valid_default_granularity + month_metric_with_no_default_granularity_set" + ), + ), + default_granularity=TimeGranularity.DAY, + ), + ], + project_configuration=EXAMPLE_PROJECT_CONFIGURATION, + ) + ) + + build_issues = validation_results.all_issues + assert len(build_issues) == 2 + expected_substr1 = ( + "`default_granularity` for metric 'month_metric_with_invalid_default_granularity' must be >= MONTH." + ) + expected_substr2 = ( + "`default_granularity` for metric 'derived_metric_with_invalid_default_granularity' must be >= MONTH." + ) + missing_error_strings = set() + for expected_str in [expected_substr1, expected_substr2]: + if not any(actual_str.as_readable_str().find(expected_str) != -1 for actual_str in build_issues): + missing_error_strings.add(expected_str) + assert len(missing_error_strings) == 0, "Failed to match one or more expected issues: " + f"{missing_error_strings} in {set([x.as_readable_str() for x in build_issues])}"