Skip to content

Commit

Permalink
Add validation rule checking that labels are unique on semantic models
Browse files Browse the repository at this point in the history
  • Loading branch information
QMalcolm committed Oct 6, 2023
1 parent 6247725 commit b3f443e
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 5 deletions.
105 changes: 103 additions & 2 deletions dbt_semantic_interfaces/validations/labels.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import logging
from typing import Dict, Generic, List, Sequence
from collections import defaultdict
from typing import DefaultDict, Dict, Generic, List, Sequence

from dbt_semantic_interfaces.protocols import Metric, SemanticManifestT
from dbt_semantic_interfaces.protocols import Metric, SemanticManifestT, SemanticModel
from dbt_semantic_interfaces.validations.validator_helpers import (
FileContext,
SemanticManifestValidationRule,
Expand Down Expand Up @@ -41,3 +42,103 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
issues += MetricLabelsRule._check_metric(metric=metric, existing_labels=labels_to_metrics)

return issues


class SemanticModelLabelsRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
"""Checks that the labels are unique across semantic models."""

@staticmethod
@validate_safely("checking that a semantic model has a unique label")
def _check_semantic_model(
semantic_model: SemanticModel, existing_labels: Dict[str, str]
) -> Sequence[ValidationIssue]: # noqa: D
if semantic_model.label in existing_labels:
return (
ValidationError(
context=FileContext.from_metadata(semantic_model.metadata),
message=f"Can't use label `{semantic_model.label}` for semantic model `{semantic_model.name}` "
f"as it's already used for semantic model `{existing_labels[semantic_model.label]}`",
),
)
elif semantic_model.label is not None:
existing_labels[semantic_model.label] = semantic_model.name

return ()

@staticmethod
@validate_safely("checking that a semantic model's dimension labels are unique within itself")
def _check_semantic_model_dimensions(semantic_model: SemanticModel) -> Sequence[ValidationIssue]:
issues: List[ValidationIssue] = []
label_counts: DefaultDict[str, int] = defaultdict(lambda: 0)
for dimension in semantic_model.dimensions:
if dimension.label is not None:
label_counts[dimension.label] = label_counts[dimension.label] + 1

for label, count in label_counts.items():
if count > 1:
issues.append(
ValidationError(
context=FileContext.from_metadata(semantic_model.metadata),
message=f"Dimension labels must be unique within a semantic model. The label `{label}` was "
f"used for {count} dimensions on semantic model `{semantic_model.name}",
)
)

return issues

@staticmethod
@validate_safely("checking that a semantic model's entity labels are unique within itself")
def _check_semantic_model_entities(semantic_model: SemanticModel) -> Sequence[ValidationIssue]:
issues: List[ValidationIssue] = []
label_counts: DefaultDict[str, int] = defaultdict(lambda: 0)
for entity in semantic_model.entities:
if entity.label is not None:
label_counts[entity.label] = label_counts[entity.label] + 1

for label, count in label_counts.items():
if count > 1:
issues.append(
ValidationError(
context=FileContext.from_metadata(semantic_model.metadata),
message=f"Entity labels must be unique within a semantic model. The label `{label}` was used "
f"for {count} entities on semantic model `{semantic_model.name}",
)
)

return issues

@staticmethod
@validate_safely("checking that a semantic model's measure labels are unique within itself")
def _check_semantic_model_measures(semantic_model: SemanticModel) -> Sequence[ValidationIssue]:
issues: List[ValidationIssue] = []
label_counts: DefaultDict[str, int] = defaultdict(lambda: 0)
for measure in semantic_model.measures:
if measure.label is not None:
label_counts[measure.label] = label_counts[measure.label] + 1

for label, count in label_counts.items():
if count > 1:
issues.append(
ValidationError(
context=FileContext.from_metadata(semantic_model.metadata),
message=f"Measure labels must be unique within a semantic model. The label `{label}` was used "
f"for {count} measures on semantic model `{semantic_model.name}",
)
)

return issues

@staticmethod
@validate_safely("checking labels on semantic models and their sub objects")
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D
issues: List[ValidationIssue] = []
labels_to_semantic_models: Dict[str, str] = {}
for semantic_model in semantic_manifest.semantic_models:
issues += SemanticModelLabelsRule._check_semantic_model(
semantic_model=semantic_model, existing_labels=labels_to_semantic_models
)
issues += SemanticModelLabelsRule._check_semantic_model_dimensions(semantic_model=semantic_model)
issues += SemanticModelLabelsRule._check_semantic_model_entities(semantic_model=semantic_model)
issues += SemanticModelLabelsRule._check_semantic_model_measures(semantic_model=semantic_model)

return issues
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
from dbt_semantic_interfaces.validations.dimension_const import DimensionConsistencyRule
from dbt_semantic_interfaces.validations.element_const import ElementConsistencyRule
from dbt_semantic_interfaces.validations.entities import NaturalEntityConfigurationRule
from dbt_semantic_interfaces.validations.labels import MetricLabelsRule
from dbt_semantic_interfaces.validations.labels import (
MetricLabelsRule,
SemanticModelLabelsRule,
)
from dbt_semantic_interfaces.validations.measures import (
CountAggregationExprRule,
MeasureConstraintAliasesRule,
Expand Down Expand Up @@ -83,6 +86,7 @@ class SemanticManifestValidator(Generic[SemanticManifestT]):
WhereFiltersAreParseable[SemanticManifestT](),
SavedQueryRule[SemanticManifestT](),
MetricLabelsRule[SemanticManifestT](),
SemanticModelLabelsRule[SemanticManifestT](),
)

def __init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
semantic_model:
name: accounts_source
description: accounts_source
label: Accounts Source

node_relation:
schema_name: $source_schema
Expand All @@ -13,16 +14,19 @@ semantic_model:
measures:
- name: account_balance
agg: sum
label: Account Balance

- name: total_account_balance_first_day
agg: sum
label: Total Account Balance on First Day
expr: account_balance
non_additive_dimension:
name: ds
window_choice: min

- name: current_account_balance_by_user
agg: sum
label: Current Account Banance by User
expr: account_balance
non_additive_dimension:
name: ds
Expand All @@ -33,14 +37,17 @@ semantic_model:
dimensions:
- name: ds
type: time
label: Metric Time
type_params:
time_granularity: day
- name: account_type
type: categorical
label: Account Type

primary_entity: account

entities:
- name: user
type: foreign
expr: user_id
label: User
87 changes: 85 additions & 2 deletions tests/validations/test_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@
from dbt_semantic_interfaces.implementations.semantic_manifest import (
PydanticSemanticManifest,
)
from dbt_semantic_interfaces.test_utils import find_metric_with
from dbt_semantic_interfaces.validations.labels import MetricLabelsRule
from dbt_semantic_interfaces.test_utils import (
find_metric_with,
find_semantic_model_with,
)
from dbt_semantic_interfaces.validations.labels import (
MetricLabelsRule,
SemanticModelLabelsRule,
)
from dbt_semantic_interfaces.validations.semantic_manifest_validator import (
SemanticManifestValidator,
)
Expand Down Expand Up @@ -39,3 +45,80 @@ def test_duplicate_metric_label( # noqa: D
SemanticManifestValidator[PydanticSemanticManifest](
[MetricLabelsRule[PydanticSemanticManifest]()]
).checked_validations(manifest)


def test_semantic_model_label_happy_path( # noqa: D
simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
) -> None:
manifest = deepcopy(simple_semantic_manifest__with_primary_transforms)
SemanticManifestValidator[PydanticSemanticManifest](
[SemanticModelLabelsRule[PydanticSemanticManifest]()]
).checked_validations(manifest)


def test_semantic_model_with_duplicate_labels( # noqa: D
simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
) -> None:
manifest = deepcopy(simple_semantic_manifest__with_primary_transforms)
semantic_model, _ = find_semantic_model_with(manifest, lambda semantic_model: semantic_model.label is not None)
duplicate = deepcopy(semantic_model)
duplicate.name = duplicate.name + "_duplicate"
manifest.semantic_models.append(duplicate)
with pytest.raises(
SemanticManifestValidationException,
match=rf"Can't use label `{semantic_model.label}` for semantic model",
):
SemanticManifestValidator[PydanticSemanticManifest](
[SemanticModelLabelsRule[PydanticSemanticManifest]()]
).checked_validations(manifest)


def test_semantic_model_with_duplicate_dimension_labels( # noqa: D
simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
) -> None:
manifest = deepcopy(simple_semantic_manifest__with_primary_transforms)
semantic_model, _ = find_semantic_model_with(manifest, lambda semantic_model: len(semantic_model.dimensions) >= 2)
label = "Duplicate Label Name"
semantic_model.dimensions[0].label = label
semantic_model.dimensions[1].label = label
with pytest.raises(
SemanticManifestValidationException,
match=rf"Dimension labels must be unique within a semantic model. The label `{label}`",
):
SemanticManifestValidator[PydanticSemanticManifest](
[SemanticModelLabelsRule[PydanticSemanticManifest]()]
).checked_validations(manifest)


def test_semantic_model_with_duplicate_entity_labels( # noqa: D
simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
) -> None:
manifest = deepcopy(simple_semantic_manifest__with_primary_transforms)
semantic_model, _ = find_semantic_model_with(manifest, lambda semantic_model: len(semantic_model.entities) >= 2)
label = "Duplicate Label Name"
semantic_model.entities[0].label = label
semantic_model.entities[1].label = label
with pytest.raises(
SemanticManifestValidationException,
match=rf"Entity labels must be unique within a semantic model. The label `{label}`",
):
SemanticManifestValidator[PydanticSemanticManifest](
[SemanticModelLabelsRule[PydanticSemanticManifest]()]
).checked_validations(manifest)


def test_semantic_model_with_duplicate_measure_labels( # noqa: D
simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
) -> None:
manifest = deepcopy(simple_semantic_manifest__with_primary_transforms)
semantic_model, _ = find_semantic_model_with(manifest, lambda semantic_model: len(semantic_model.measures) >= 2)
label = "Duplicate Label Name"
semantic_model.measures[0].label = label
semantic_model.measures[1].label = label
with pytest.raises(
SemanticManifestValidationException,
match=rf"Measure labels must be unique within a semantic model. The label `{label}`",
):
SemanticManifestValidator[PydanticSemanticManifest](
[SemanticModelLabelsRule[PydanticSemanticManifest]()]
).checked_validations(manifest)

0 comments on commit b3f443e

Please sign in to comment.