From a39e1fe54be8f463442b0c63c9b15c5cf17eebdf Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Tue, 26 Sep 2023 14:33:01 -0700 Subject: [PATCH] Add validation rule checking that labels are unique on semantic models --- dbt_semantic_interfaces/validations/labels.py | 105 +++++++++++++++++- .../semantic_manifest_validator.py | 6 +- .../semantic_models/accounts_source.yaml | 7 ++ tests/validations/test_labels.py | 87 ++++++++++++++- 4 files changed, 200 insertions(+), 5 deletions(-) diff --git a/dbt_semantic_interfaces/validations/labels.py b/dbt_semantic_interfaces/validations/labels.py index 014db9ff..5b1ea23c 100644 --- a/dbt_semantic_interfaces/validations/labels.py +++ b/dbt_semantic_interfaces/validations/labels.py @@ -1,7 +1,8 @@ import logging -from typing import Dict, Generic, List, Sequence +from collections import defaultdict +from typing import DefaultDict, Dict, Generic, List, Sequence -from dbt_semantic_interfaces.protocols import Metric, SemanticManifestT +from dbt_semantic_interfaces.protocols import Metric, SemanticManifestT, SemanticModel from dbt_semantic_interfaces.validations.validator_helpers import ( FileContext, SemanticManifestValidationRule, @@ -41,3 +42,103 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati issues += MetricLabelsRule._check_metric(metric=metric, existing_labels=labels_to_metrics) return issues + + +class SemanticModelLabelsRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]): + """Checks that the labels are unique across semantic models.""" + + @staticmethod + @validate_safely("checking that a semantic model has a unique label") + def _check_semantic_model( + semantic_model: SemanticModel, existing_labels: Dict[str, str] + ) -> Sequence[ValidationIssue]: # noqa: D + if semantic_model.label in existing_labels: + return ( + ValidationError( + context=FileContext.from_metadata(semantic_model.metadata), + message=f"Can't use label `{semantic_model.label}` for semantic model `{semantic_model.name}` " + f"as it's already used for semantic model `{existing_labels[semantic_model.label]}`", + ), + ) + elif semantic_model.label is not None: + existing_labels[semantic_model.label] = semantic_model.name + + return () + + @staticmethod + @validate_safely("checking that a semantic model's dimension labels are unique within itself") + def _check_semantic_model_dimensions(semantic_model: SemanticModel) -> Sequence[ValidationIssue]: + issues: List[ValidationIssue] = [] + label_counts: DefaultDict[str, int] = defaultdict(lambda: 0) + for dimension in semantic_model.dimensions: + if dimension.label is not None: + label_counts[dimension.label] = label_counts[dimension.label] + 1 + + for label, count in label_counts.items(): + if count > 1: + issues.append( + ValidationError( + context=FileContext.from_metadata(semantic_model.metadata), + message=f"Dimension labels must be unique within a semantic model. The label `{label}` was " + f"used for {count} dimensions on semantic model `{semantic_model.name}", + ) + ) + + return issues + + @staticmethod + @validate_safely("checking that a semantic model's entity labels are unique within itself") + def _check_semantic_model_entities(semantic_model: SemanticModel) -> Sequence[ValidationIssue]: + issues: List[ValidationIssue] = [] + label_counts: DefaultDict[str, int] = defaultdict(lambda: 0) + for entity in semantic_model.entities: + if entity.label is not None: + label_counts[entity.label] = label_counts[entity.label] + 1 + + for label, count in label_counts.items(): + if count > 1: + issues.append( + ValidationError( + context=FileContext.from_metadata(semantic_model.metadata), + message=f"Entity labels must be unique within a semantic model. The label `{label}` was used " + f"for {count} entities on semantic model `{semantic_model.name}", + ) + ) + + return issues + + @staticmethod + @validate_safely("checking that a semantic model's measure labels are unique within itself") + def _check_semantic_model_measures(semantic_model: SemanticModel) -> Sequence[ValidationIssue]: + issues: List[ValidationIssue] = [] + label_counts: DefaultDict[str, int] = defaultdict(lambda: 0) + for measure in semantic_model.measures: + if measure.label is not None: + label_counts[measure.label] = label_counts[measure.label] + 1 + + for label, count in label_counts.items(): + if count > 1: + issues.append( + ValidationError( + context=FileContext.from_metadata(semantic_model.metadata), + message=f"Measure labels must be unique within a semantic model. The label `{label}` was used " + f"for {count} measures on semantic model `{semantic_model.name}", + ) + ) + + return issues + + @staticmethod + @validate_safely("checking labels on semantic models and their sub objects") + def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D + issues: List[ValidationIssue] = [] + labels_to_semantic_models: Dict[str, str] = {} + for semantic_model in semantic_manifest.semantic_models: + issues += SemanticModelLabelsRule._check_semantic_model( + semantic_model=semantic_model, existing_labels=labels_to_semantic_models + ) + issues += SemanticModelLabelsRule._check_semantic_model_dimensions(semantic_model=semantic_model) + issues += SemanticModelLabelsRule._check_semantic_model_entities(semantic_model=semantic_model) + issues += SemanticModelLabelsRule._check_semantic_model_measures(semantic_model=semantic_model) + + return issues diff --git a/dbt_semantic_interfaces/validations/semantic_manifest_validator.py b/dbt_semantic_interfaces/validations/semantic_manifest_validator.py index 7c1d10e1..77caa631 100644 --- a/dbt_semantic_interfaces/validations/semantic_manifest_validator.py +++ b/dbt_semantic_interfaces/validations/semantic_manifest_validator.py @@ -10,7 +10,10 @@ from dbt_semantic_interfaces.validations.dimension_const import DimensionConsistencyRule from dbt_semantic_interfaces.validations.element_const import ElementConsistencyRule from dbt_semantic_interfaces.validations.entities import NaturalEntityConfigurationRule -from dbt_semantic_interfaces.validations.labels import MetricLabelsRule +from dbt_semantic_interfaces.validations.labels import ( + MetricLabelsRule, + SemanticModelLabelsRule, +) from dbt_semantic_interfaces.validations.measures import ( CountAggregationExprRule, MeasureConstraintAliasesRule, @@ -83,6 +86,7 @@ class SemanticManifestValidator(Generic[SemanticManifestT]): WhereFiltersAreParseable[SemanticManifestT](), SavedQueryRule[SemanticManifestT](), MetricLabelsRule[SemanticManifestT](), + SemanticModelLabelsRule[SemanticManifestT](), ) def __init__( diff --git a/tests/fixtures/semantic_manifest_yamls/simple_semantic_manifest/semantic_models/accounts_source.yaml b/tests/fixtures/semantic_manifest_yamls/simple_semantic_manifest/semantic_models/accounts_source.yaml index 64167b1a..68032840 100644 --- a/tests/fixtures/semantic_manifest_yamls/simple_semantic_manifest/semantic_models/accounts_source.yaml +++ b/tests/fixtures/semantic_manifest_yamls/simple_semantic_manifest/semantic_models/accounts_source.yaml @@ -2,6 +2,7 @@ semantic_model: name: accounts_source description: accounts_source + label: Accounts Source node_relation: schema_name: $source_schema @@ -13,9 +14,11 @@ semantic_model: measures: - name: account_balance agg: sum + label: Account Balance - name: total_account_balance_first_day agg: sum + label: Total Account Balance on First Day expr: account_balance non_additive_dimension: name: ds @@ -23,6 +26,7 @@ semantic_model: - name: current_account_balance_by_user agg: sum + label: Current Account Banance by User expr: account_balance non_additive_dimension: name: ds @@ -33,10 +37,12 @@ semantic_model: dimensions: - name: ds type: time + label: Metric Time type_params: time_granularity: day - name: account_type type: categorical + label: Account Type primary_entity: account @@ -44,3 +50,4 @@ semantic_model: - name: user type: foreign expr: user_id + label: User diff --git a/tests/validations/test_labels.py b/tests/validations/test_labels.py index 396d7cec..f4ccfc2b 100644 --- a/tests/validations/test_labels.py +++ b/tests/validations/test_labels.py @@ -5,8 +5,14 @@ from dbt_semantic_interfaces.implementations.semantic_manifest import ( PydanticSemanticManifest, ) -from dbt_semantic_interfaces.test_utils import find_metric_with -from dbt_semantic_interfaces.validations.labels import MetricLabelsRule +from dbt_semantic_interfaces.test_utils import ( + find_metric_with, + find_semantic_model_with, +) +from dbt_semantic_interfaces.validations.labels import ( + MetricLabelsRule, + SemanticModelLabelsRule, +) from dbt_semantic_interfaces.validations.semantic_manifest_validator import ( SemanticManifestValidator, ) @@ -39,3 +45,80 @@ def test_duplicate_metric_label( # noqa: D SemanticManifestValidator[PydanticSemanticManifest]( [MetricLabelsRule[PydanticSemanticManifest]()] ).checked_validations(manifest) + + +def test_semantic_model_label_happy_path( # noqa: D + simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest, +) -> None: + manifest = deepcopy(simple_semantic_manifest__with_primary_transforms) + SemanticManifestValidator[PydanticSemanticManifest]( + [SemanticModelLabelsRule[PydanticSemanticManifest]()] + ).checked_validations(manifest) + + +def test_semantic_model_with_duplicate_labels( # noqa: D + simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest, +) -> None: + manifest = deepcopy(simple_semantic_manifest__with_primary_transforms) + semantic_model, _ = find_semantic_model_with(manifest, lambda semantic_model: semantic_model.label is not None) + duplicate = deepcopy(semantic_model) + duplicate.name = duplicate.name + "_duplicate" + manifest.semantic_models.append(duplicate) + with pytest.raises( + SemanticManifestValidationException, + match=rf"Can't use label `{semantic_model.label}` for semantic model", + ): + SemanticManifestValidator[PydanticSemanticManifest]( + [SemanticModelLabelsRule[PydanticSemanticManifest]()] + ).checked_validations(manifest) + + +def test_semantic_model_with_duplicate_dimension_labels( # noqa: D + simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest, +) -> None: + manifest = deepcopy(simple_semantic_manifest__with_primary_transforms) + semantic_model, _ = find_semantic_model_with(manifest, lambda semantic_model: len(semantic_model.dimensions) >= 2) + label = "Duplicate Label Name" + semantic_model.dimensions[0].label = label + semantic_model.dimensions[1].label = label + with pytest.raises( + SemanticManifestValidationException, + match=rf"Dimension labels must be unique within a semantic model. The label `{label}`", + ): + SemanticManifestValidator[PydanticSemanticManifest]( + [SemanticModelLabelsRule[PydanticSemanticManifest]()] + ).checked_validations(manifest) + + +def test_semantic_model_with_duplicate_entity_labels( # noqa: D + simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest, +) -> None: + manifest = deepcopy(simple_semantic_manifest__with_primary_transforms) + semantic_model, _ = find_semantic_model_with(manifest, lambda semantic_model: len(semantic_model.entities) >= 2) + label = "Duplicate Label Name" + semantic_model.entities[0].label = label + semantic_model.entities[1].label = label + with pytest.raises( + SemanticManifestValidationException, + match=rf"Entity labels must be unique within a semantic model. The label `{label}`", + ): + SemanticManifestValidator[PydanticSemanticManifest]( + [SemanticModelLabelsRule[PydanticSemanticManifest]()] + ).checked_validations(manifest) + + +def test_semantic_model_with_duplicate_measure_labels( # noqa: D + simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest, +) -> None: + manifest = deepcopy(simple_semantic_manifest__with_primary_transforms) + semantic_model, _ = find_semantic_model_with(manifest, lambda semantic_model: len(semantic_model.measures) >= 2) + label = "Duplicate Label Name" + semantic_model.measures[0].label = label + semantic_model.measures[1].label = label + with pytest.raises( + SemanticManifestValidationException, + match=rf"Measure labels must be unique within a semantic model. The label `{label}`", + ): + SemanticManifestValidator[PydanticSemanticManifest]( + [SemanticModelLabelsRule[PydanticSemanticManifest]()] + ).checked_validations(manifest)