diff --git a/.changes/unreleased/Features-20231127-150021.yaml b/.changes/unreleased/Features-20231127-150021.yaml new file mode 100644 index 00000000..a957f293 --- /dev/null +++ b/.changes/unreleased/Features-20231127-150021.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Added validation for conversion metric configurations. +time: 2023-11-27T15:00:21.734245-05:00 +custom: + Author: WilliamDee + Issue: "211" diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py index 6c213de9..4533a1df 100644 --- a/dbt_semantic_interfaces/validations/metrics.py +++ b/dbt_semantic_interfaces/validations/metrics.py @@ -1,15 +1,17 @@ import traceback -from typing import Generic, List, Sequence +from typing import Generic, List, Optional, Sequence from dbt_semantic_interfaces.errors import ParsingException from dbt_semantic_interfaces.implementations.metric import PydanticMetricTimeWindow from dbt_semantic_interfaces.protocols import ( + ConversionTypeParams, Metric, SemanticManifest, SemanticManifestT, + SemanticModel, ) -from dbt_semantic_interfaces.references import MetricModelReference -from dbt_semantic_interfaces.type_enums import MetricType +from dbt_semantic_interfaces.references import MeasureReference, MetricModelReference +from dbt_semantic_interfaces.type_enums import AggregationType, MetricType from dbt_semantic_interfaces.validations.unique_valid_name import UniqueAndValidNameRule from dbt_semantic_interfaces.validations.validator_helpers import ( FileContext, @@ -261,3 +263,211 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati for metric in semantic_manifest.metrics or []: issues += WhereFiltersAreParseable._validate_metric(metric) return issues + + +class ConversionMetricRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]): + """Checks that conversion metrics are configured properly.""" + + @staticmethod + @validate_safely(whats_being_done="checking that the params of metric are valid if it is a conversion metric") + def _validate_type_params(metric: Metric, conversion_type_params: ConversionTypeParams) -> List[ValidationIssue]: + issues: List[ValidationIssue] = [] + + window = conversion_type_params.window + if window: + try: + window_str = f"{window.count} {window.granularity.value}" + PydanticMetricTimeWindow.parse(window_str) + except ParsingException as e: + issues.append( + ValidationError( + context=MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ), + message="".join(traceback.format_exception_only(type(e), value=e)), + extra_detail="".join(traceback.format_tb(e.__traceback__)), + ) + ) + return issues + + @staticmethod + @validate_safely(whats_being_done="checks that the entity exists in the base/conversion semantic model") + def _validate_entity_exists( + metric: Metric, entity: str, base_semantic_model: SemanticModel, conversion_semantic_model: SemanticModel + ) -> List[ValidationIssue]: + issues: List[ValidationIssue] = [] + + if entity not in {entity.name for entity in base_semantic_model.entities}: + issues.append( + ValidationError( + context=MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ), + message=f"Entity: {entity} not found in base semantic model: {base_semantic_model.name}.", + ) + ) + if entity not in {entity.name for entity in conversion_semantic_model.entities}: + issues.append( + ValidationError( + context=MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ), + message=f"Entity: {entity} not found in " + f"conversion semantic model: {conversion_semantic_model.name}.", + ) + ) + return issues + + @staticmethod + @validate_safely(whats_being_done="checks that the provided measures are valid for conversion metrics") + def _validate_measures( + metric: Metric, base_semantic_model: SemanticModel, conversion_semantic_model: SemanticModel + ) -> List[ValidationIssue]: + issues: List[ValidationIssue] = [] + + def _validate_measure(measure_reference: MeasureReference, semantic_model: SemanticModel) -> None: + measure = None + for model_measure in semantic_model.measures: + if model_measure.reference == measure_reference: + measure = model_measure + break + + assert measure, f"Measure '{model_measure.name}' wasn't found in semantic model '{semantic_model.name}'" + + if ( + measure.agg != AggregationType.COUNT + and measure.agg != AggregationType.COUNT_DISTINCT + and (measure.agg != AggregationType.SUM or measure.expr != "1") + ): + issues.append( + ValidationError( + context=MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ), + message=f"For conversion metrics, the measure must be COUNT/SUM(1)/COUNT_DISTINCT. " + f"Measure: {measure.name} is agg type: {measure.agg}", + ) + ) + + conversion_type_params = metric.type_params.conversion_type_params + assert ( + conversion_type_params is not None + ), "For a conversion metric, type_params.conversion_type_params must exist." + _validate_measure( + measure_reference=conversion_type_params.base_measure.measure_reference, + semantic_model=base_semantic_model, + ) + _validate_measure( + measure_reference=conversion_type_params.conversion_measure.measure_reference, + semantic_model=conversion_semantic_model, + ) + return issues + + @staticmethod + @validate_safely(whats_being_done="checks that the provided constant properties are valid") + def _validate_constant_properties( + metric: Metric, base_semantic_model: SemanticModel, conversion_semantic_model: SemanticModel + ) -> List[ValidationIssue]: + issues: List[ValidationIssue] = [] + + def _elements_in_model(references: List[str], semantic_model: SemanticModel) -> None: + linkable_elements = [entity.name for entity in semantic_model.entities] + [ + dimension.name for dimension in semantic_model.dimensions + ] + for reference in references: + if reference not in linkable_elements: + issues.append( + ValidationError( + context=MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ), + message=f"The provided constant property: {reference}, " + f"cannot be found in semantic model {semantic_model.name}", + ) + ) + + conversion_type_params = metric.type_params.conversion_type_params + assert ( + conversion_type_params is not None + ), "For a conversion metric, type_params.conversion_type_params must exist." + constant_properties = conversion_type_params.constant_properties or [] + base_properties = [] + conversion_properties = [] + for constant_property in constant_properties: + base_properties.append(constant_property.base_property) + conversion_properties.append(constant_property.conversion_property) + + _elements_in_model(references=base_properties, semantic_model=base_semantic_model) + _elements_in_model(references=conversion_properties, semantic_model=conversion_semantic_model) + return issues + + @staticmethod + def _get_semantic_model_from_measure( + measure_reference: MeasureReference, semantic_manifest: SemanticManifest + ) -> Optional[SemanticModel]: + """Retrieve the semantic model from a given measure reference.""" + semantic_model = None + for model in semantic_manifest.semantic_models: + if measure_reference in {measure.reference for measure in model.measures}: + semantic_model = model + break + return semantic_model + + @staticmethod + @validate_safely(whats_being_done="running manifest validation ensuring conversion metrics are valid") + def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D + issues: List[ValidationIssue] = [] + + for metric in semantic_manifest.metrics or []: + if metric.type == MetricType.CONVERSION: + # Validates that the measure exists and corresponds to a semantic model + assert ( + metric.type_params.conversion_type_params is not None + ), "For a conversion metric, type_params.conversion_type_params must exist." + + base_semantic_model = ConversionMetricRule._get_semantic_model_from_measure( + measure_reference=metric.type_params.conversion_type_params.base_measure.measure_reference, + semantic_manifest=semantic_manifest, + ) + conversion_semantic_model = ConversionMetricRule._get_semantic_model_from_measure( + measure_reference=metric.type_params.conversion_type_params.conversion_measure.measure_reference, + semantic_manifest=semantic_manifest, + ) + if base_semantic_model is None or conversion_semantic_model is None: + # If measure's don't exist, stop this metric's validation as it will fail later validations + issues.append( + ValidationError( + context=MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ), + message=f"For metric '{metric.name}', conversion measures specified was not found.", + ) + ) + continue + + issues += ConversionMetricRule._validate_entity_exists( + metric=metric, + entity=metric.type_params.conversion_type_params.entity, + base_semantic_model=base_semantic_model, + conversion_semantic_model=conversion_semantic_model, + ) + issues += ConversionMetricRule._validate_measures( + metric=metric, + base_semantic_model=base_semantic_model, + conversion_semantic_model=conversion_semantic_model, + ) + issues += ConversionMetricRule._validate_type_params( + metric=metric, conversion_type_params=metric.type_params.conversion_type_params + ) + issues += ConversionMetricRule._validate_constant_properties( + metric=metric, + base_semantic_model=base_semantic_model, + conversion_semantic_model=conversion_semantic_model, + ) + return issues diff --git a/dbt_semantic_interfaces/validations/semantic_manifest_validator.py b/dbt_semantic_interfaces/validations/semantic_manifest_validator.py index 2a7de2ea..2093a9bc 100644 --- a/dbt_semantic_interfaces/validations/semantic_manifest_validator.py +++ b/dbt_semantic_interfaces/validations/semantic_manifest_validator.py @@ -24,6 +24,7 @@ SemanticModelMeasuresUniqueRule, ) from dbt_semantic_interfaces.validations.metrics import ( + ConversionMetricRule, CumulativeMetricRule, DerivedMetricRule, WhereFiltersAreParseable, @@ -89,6 +90,7 @@ class SemanticManifestValidator(Generic[SemanticManifestT]): MetricLabelsRule[SemanticManifestT](), SemanticModelLabelsRule[SemanticManifestT](), EntityLabelsRule[SemanticManifestT](), + ConversionMetricRule[SemanticManifestT](), ) def __init__( diff --git a/tests/validations/test_metrics.py b/tests/validations/test_metrics.py index 9b8fbedf..da6bbf70 100644 --- a/tests/validations/test_metrics.py +++ b/tests/validations/test_metrics.py @@ -13,6 +13,8 @@ PydanticWhereFilterIntersection, ) from dbt_semantic_interfaces.implementations.metric import ( + PydanticConstantPropertyInput, + PydanticConversionTypeParams, PydanticMetricInput, PydanticMetricInputMeasure, PydanticMetricTimeWindow, @@ -39,6 +41,7 @@ TimeGranularity, ) from dbt_semantic_interfaces.validations.metrics import ( + ConversionMetricRule, DerivedMetricRule, WhereFiltersAreParseable, ) @@ -421,3 +424,128 @@ def test_where_filter_validations_bad_input_metric_filter( # noqa: D match=f"trying to parse filter for input metric `{input_metric.name}` on metric `{metric.name}`", ): validator.checked_validations(manifest) + + +def test_conversion_metrics() -> None: # noqa: D + base_measure_name = "base_measure" + conversion_measure_name = "conversion_measure" + entity = "entity" + invalid_entity = "bad" + invalid_measure = "invalid_measure" + window = PydanticMetricTimeWindow.parse("7 days") + validator = SemanticManifestValidator[PydanticSemanticManifest]([ConversionMetricRule()]) + result = validator.validate_semantic_manifest( + PydanticSemanticManifest( + semantic_models=[ + semantic_model_with_guaranteed_meta( + name="base", + measures=[ + PydanticMeasure( + name=base_measure_name, agg=AggregationType.COUNT, agg_time_dimension="ds", expr="1" + ), + PydanticMeasure(name=invalid_measure, agg=AggregationType.MAX, agg_time_dimension="ds"), + ], + dimensions=[ + PydanticDimension( + name="ds", + type=DimensionType.TIME, + type_params=PydanticDimensionTypeParams( + time_granularity=TimeGranularity.DAY, + ), + ), + ], + entities=[ + PydanticEntity(name=entity, type=EntityType.PRIMARY), + ], + ), + semantic_model_with_guaranteed_meta( + name="conversion", + measures=[ + PydanticMeasure( + name=conversion_measure_name, agg=AggregationType.COUNT, agg_time_dimension="ds", expr="1" + ) + ], + dimensions=[ + PydanticDimension( + name="ds", + type=DimensionType.TIME, + type_params=PydanticDimensionTypeParams( + time_granularity=TimeGranularity.DAY, + ), + ), + ], + entities=[ + PydanticEntity(name=entity, type=EntityType.PRIMARY), + ], + ), + ], + metrics=[ + metric_with_guaranteed_meta( + name="proper_metric", + type=MetricType.CONVERSION, + type_params=PydanticMetricTypeParams( + conversion_type_params=PydanticConversionTypeParams( + base_measure=PydanticMetricInputMeasure(name=base_measure_name), + conversion_measure=PydanticMetricInputMeasure(name=conversion_measure_name), + window=window, + entity=entity, + ) + ), + ), + metric_with_guaranteed_meta( + name="bad_measure_metric", + type=MetricType.CONVERSION, + type_params=PydanticMetricTypeParams( + conversion_type_params=PydanticConversionTypeParams( + base_measure=PydanticMetricInputMeasure(name=invalid_measure), + conversion_measure=PydanticMetricInputMeasure(name=conversion_measure_name), + window=window, + entity=entity, + ) + ), + ), + metric_with_guaranteed_meta( + name="entity_doesnt_exist", + type=MetricType.CONVERSION, + type_params=PydanticMetricTypeParams( + conversion_type_params=PydanticConversionTypeParams( + base_measure=PydanticMetricInputMeasure(name=base_measure_name), + conversion_measure=PydanticMetricInputMeasure(name=conversion_measure_name), + window=window, + entity=invalid_entity, + ) + ), + ), + metric_with_guaranteed_meta( + name="constant_property_doesnt_exist", + type=MetricType.CONVERSION, + type_params=PydanticMetricTypeParams( + conversion_type_params=PydanticConversionTypeParams( + base_measure=PydanticMetricInputMeasure(name=base_measure_name), + conversion_measure=PydanticMetricInputMeasure(name=conversion_measure_name), + window=window, + entity=entity, + constant_properties=[ + PydanticConstantPropertyInput(base_property="bad_dim", conversion_property="bad_dim2") + ], + ) + ), + ), + ], + project_configuration=EXAMPLE_PROJECT_CONFIGURATION, + ) + ) + + build_issues = result.errors + assert len(build_issues) == 5 + expected_substr1 = f"{invalid_entity} not found in base semantic model" + expected_substr2 = f"{invalid_entity} not found in conversion semantic model" + expected_substr3 = "the measure must be COUNT/SUM(1)/COUNT_DISTINCT" + expected_substr4 = "The provided constant property: bad_dim, cannot be found" + expected_substr5 = "The provided constant property: bad_dim2, cannot be found" + missing_error_strings = set() + for expected_str in [expected_substr1, expected_substr2, expected_substr3, expected_substr4, expected_substr5]: + if not any(actual_str.as_readable_str().find(expected_str) != -1 for actual_str in build_issues): + missing_error_strings.add(expected_str) + assert len(missing_error_strings) == 0, "Failed to match one or more expected errors: " + f"{missing_error_strings} in {set([x.as_readable_str() for x in build_issues])}"