Skip to content

Commit

Permalink
Refactor validation for metric time windows
Browse files Browse the repository at this point in the history
  • Loading branch information
courtneyholcomb committed Nov 26, 2024
1 parent 00abab6 commit 9151740
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 106 deletions.
27 changes: 4 additions & 23 deletions dbt_semantic_interfaces/implementations/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def _from_yaml_value(cls, input: PydanticParseableValueType) -> PydanticMetricTi
The MetricTimeWindow is always expected to be provided as a string in user-defined YAML configs.
"""
if isinstance(input, str):
return PydanticMetricTimeWindow.parse(window=input.lower(), custom_granularity_names=(), strict=False)
return PydanticMetricTimeWindow.parse(window=input.lower())
else:
raise ValueError(
f"MetricTimeWindow inputs from model configs are expected to always be of type string, but got "
Expand All @@ -102,12 +102,8 @@ def window_string(self) -> str:
return f"{self.count} {self.granularity}"

@staticmethod
def parse(window: str, custom_granularity_names: Sequence[str], strict: bool = True) -> PydanticMetricTimeWindow:
"""Returns window values if parsing succeeds, None otherwise.
If strict=True, then the granularity in the window must exist as a valid granularity.
Use strict=True for when you have all valid granularities, otherwise use strict=False.
"""
def parse(window: str) -> PydanticMetricTimeWindow:
"""Returns window values if parsing succeeds, None otherwise."""
parts = window.lower().split(" ")
if len(parts) != 2:
raise ParsingException(
Expand All @@ -116,22 +112,6 @@ def parse(window: str, custom_granularity_names: Sequence[str], strict: bool = T
)

granularity = parts[1]

valid_time_granularities = {item.value.lower() for item in TimeGranularity} | set(
c.lower() for c in custom_granularity_names
)

# if we switched to python 3.9 this could just be `granularity = parts[0].removesuffix('s')
if granularity.endswith("s") and granularity[:-1] in valid_time_granularities:
# months -> month
granularity = granularity[:-1]

if strict and granularity not in valid_time_granularities:
raise ParsingException(
f"Invalid time granularity {granularity} in metric window string: ({window})",
)
# If not strict and not standard granularity, it may be a custom grain, so validations happens later

count = parts[0]
if not count.isdigit():
raise ParsingException(f"Invalid count ({count}) in cumulative metric window string: ({window})")
Expand Down Expand Up @@ -225,6 +205,7 @@ def _implements_protocol(self) -> Metric: # noqa: D

@classmethod
def parse_obj(cls, input: Any) -> PydanticMetric:
"""Adds custom parsing to the default method."""
data = deepcopy(input)

# Ensure grain_to_date is lowercased
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from dbt_semantic_interfaces.transformations.transform_rule import (
SemanticManifestTransformRule,
)
from dbt_semantic_interfaces.type_enums import MetricType
from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity


class RemovePluralFromWindowGranularityRule(ProtocolHint[SemanticManifestTransformRule[PydanticSemanticManifest]]):
Expand All @@ -33,12 +33,18 @@ def _update_metric(
semantic_manifest: PydanticSemanticManifest, metric_name: str, custom_granularity_names: Sequence[str]
) -> None:
"""Mutates all the MetricTimeWindow by reparsing to remove the trailing 's'."""
valid_time_granularities = {item.value.lower() for item in TimeGranularity} | set(
c.lower() for c in custom_granularity_names
)

def reparse_window(window: PydanticMetricTimeWindow) -> PydanticMetricTimeWindow:
def trim_trailing_s(window: PydanticMetricTimeWindow) -> PydanticMetricTimeWindow:
"""Reparse the window to remove the trailing 's'."""
return PydanticMetricTimeWindow.parse(
window=window.window_string, custom_granularity_names=custom_granularity_names
)
granularity = window.granularity
if granularity.endswith("s") and granularity[:-1] in valid_time_granularities:
# months -> month
granularity = granularity[:-1]
window.granularity = granularity
return window

matched_metric = next(
iter((metric for metric in semantic_manifest.metrics if metric.name == metric_name)), None
Expand All @@ -49,22 +55,23 @@ def reparse_window(window: PydanticMetricTimeWindow) -> PydanticMetricTimeWindow
matched_metric.type_params.cumulative_type_params
and matched_metric.type_params.cumulative_type_params.window
):
matched_metric.type_params.cumulative_type_params.window = reparse_window(
matched_metric.type_params.cumulative_type_params.window = trim_trailing_s(
matched_metric.type_params.cumulative_type_params.window
)

elif matched_metric.type is MetricType.CONVERSION:
if (
matched_metric.type_params.conversion_type_params
and matched_metric.type_params.conversion_type_params.window
):
matched_metric.type_params.conversion_type_params.window = reparse_window(
matched_metric.type_params.conversion_type_params.window = trim_trailing_s(
matched_metric.type_params.conversion_type_params.window
)

elif matched_metric.type is MetricType.DERIVED or matched_metric.type is MetricType.RATIO:
for input_metric in matched_metric.input_metrics:
if input_metric.offset_window:
input_metric.offset_window = reparse_window(input_metric.offset_window)
input_metric.offset_window = trim_trailing_s(input_metric.offset_window)
elif matched_metric.type is MetricType.SIMPLE:
pass
else:
Expand Down
100 changes: 59 additions & 41 deletions dbt_semantic_interfaces/validations/metrics.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
import traceback
from typing import Dict, Generic, List, Optional, Sequence

from dbt_semantic_interfaces.errors import ParsingException
from dbt_semantic_interfaces.implementations.metric import (
PydanticMetric,
PydanticMetricTimeWindow,
)
from dbt_semantic_interfaces.implementations.metric import PydanticMetric
from dbt_semantic_interfaces.protocols import (
ConversionTypeParams,
Dimension,
Metric,
MetricInputMeasure,
MetricTimeWindow,
SemanticManifest,
SemanticManifestT,
SemanticModel,
Expand Down Expand Up @@ -46,9 +42,9 @@
class CumulativeMetricRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
"""Checks that cumulative metrics are configured properly."""

@staticmethod
@classmethod
@validate_safely(whats_being_done="running model validation ensuring cumulative metrics are valid")
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D
def validate_manifest(cls, semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D
issues: List[ValidationIssue] = []

custom_granularity_names = [
Expand Down Expand Up @@ -109,19 +105,31 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
)

if window:
try:
# TODO: Should not call an implementation class.
PydanticMetricTimeWindow.parse(
window=window.window_string, custom_granularity_names=custom_granularity_names
)
except ParsingException as e:
issues.append(
ValidationError(
context=metric_context,
message="".join(traceback.format_exception_only(type(e), value=e)),
extra_detail="".join(traceback.format_tb(e.__traceback__)),
)
)
issues += cls.validate_metric_time_window(
metric_context=metric_context, window=window, custom_granularities=custom_granularity_names
)

return issues

@classmethod
def validate_metric_time_window( # noqa: D
cls, metric_context: MetricContext, window: MetricTimeWindow, custom_granularities: Sequence[str]
) -> Sequence[ValidationIssue]:
issues: List[ValidationIssue] = []

valid_granularities = set(custom_granularities) | {item.value.lower() for item in TimeGranularity}
window_granularity = window.granularity
if window_granularity.endswith("s") and window_granularity[:-1] in valid_granularities:
# months -> month
window_granularity = window_granularity[:-1]

if window_granularity not in valid_granularities:
issues.append(
ValidationError(
context=metric_context,
message=f"Invalid time granularity {window_granularity} in window: ({window})",
)
)

return issues

Expand Down Expand Up @@ -188,17 +196,24 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V

@staticmethod
@validate_safely(whats_being_done="checking that input metric time offset params are valid")
def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]:
def _validate_time_offset_params(metric: Metric, custom_granularities: Sequence[str]) -> List[ValidationIssue]:
issues: List[ValidationIssue] = []

metric_context = MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
)
for input_metric in metric.type_params.metrics or []:
if input_metric.offset_window:
issues += CumulativeMetricRule.validate_metric_time_window(
metric_context=metric_context,
window=input_metric.offset_window,
custom_granularities=custom_granularities,
)
if input_metric.offset_window and input_metric.offset_to_grain:
issues.append(
ValidationError(
context=MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
),
context=metric_context,
message=f"Both offset_window and offset_to_grain set for derived metric '{metric.name}' on "
f"input metric '{input_metric.name}'. Please set one or the other.",
)
Expand Down Expand Up @@ -247,10 +262,18 @@ def _validate_expr(metric: Metric) -> List[ValidationIssue]:
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D
issues: List[ValidationIssue] = []

custom_granularity_names = [
granularity.name
for time_spine in semantic_manifest.project_configuration.time_spines
for granularity in time_spine.custom_granularities
]

issues += DerivedMetricRule._validate_input_metrics_exist(semantic_manifest=semantic_manifest)
for metric in semantic_manifest.metrics or []:
issues += DerivedMetricRule._validate_alias_collision(metric=metric)
issues += DerivedMetricRule._validate_time_offset_params(metric=metric)
issues += DerivedMetricRule._validate_time_offset_params(
metric=metric, custom_granularities=custom_granularity_names
)
issues += DerivedMetricRule._validate_expr(metric=metric)
return issues

Expand All @@ -267,20 +290,15 @@ def _validate_type_params(

window = conversion_type_params.window
if window:
try:
window_str = f"{window.count} {window.granularity}"
PydanticMetricTimeWindow.parse(window=window_str, custom_granularity_names=custom_granularity_names)
except ParsingException as e:
issues.append(
ValidationError(
context=MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
),
message="".join(traceback.format_exception_only(type(e), value=e)),
extra_detail="".join(traceback.format_tb(e.__traceback__)),
)
)
issues += CumulativeMetricRule.validate_metric_time_window(
metric_context=MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
),
window=window,
custom_granularities=custom_granularity_names,
)

return issues

@staticmethod
Expand Down
56 changes: 42 additions & 14 deletions tests/parsing/test_metric_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@
)
from dbt_semantic_interfaces.parsing.dir_to_model import (
parse_yaml_files_to_semantic_manifest,
parse_yaml_files_to_validation_ready_semantic_manifest,
)
from dbt_semantic_interfaces.parsing.objects import YamlConfigFile
from dbt_semantic_interfaces.transformations.semantic_manifest_transformer import (
PydanticSemanticManifestTransformer,
)
from dbt_semantic_interfaces.type_enums import (
ConversionCalculationType,
MetricType,
Expand Down Expand Up @@ -220,7 +218,9 @@ def test_cumulative_window_old_metric_parsing() -> None:
)
file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents)

build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE])
build_result = parse_yaml_files_to_validation_ready_semantic_manifest(
[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]
)

assert len(build_result.semantic_manifest.metrics) == 1
metric = build_result.semantic_manifest.metrics[0]
Expand All @@ -246,7 +246,9 @@ def test_cumulative_window_metric_parsing() -> None:
)
file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents)

build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE])
build_result = parse_yaml_files_to_validation_ready_semantic_manifest(
[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]
)

assert len(build_result.semantic_manifest.metrics) == 1
metric = build_result.semantic_manifest.metrics[0]
Expand Down Expand Up @@ -318,6 +320,29 @@ def test_derived_metric_offset_window_parsing() -> None:
"""Test for parsing a derived metric with an offset window."""
yaml_contents = textwrap.dedent(
"""\
semantic_model:
name: sample_semantic_model
node_relation:
schema_name: some_schema
alias: source_table
defaults:
agg_time_dimension: ds
entities:
- name: example_entity
type: primary
role: test_role
expr: example_id
measures:
- name: bookings
agg: sum
expr: 1
create_metric: true
dimensions:
- name: ds
type: time
type_params:
time_granularity: day
---
metric:
name: derived_offset_test
type: derived
Expand All @@ -332,10 +357,12 @@ def test_derived_metric_offset_window_parsing() -> None:
)
file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents)

build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE])
build_result = parse_yaml_files_to_validation_ready_semantic_manifest(
[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]
)

assert len(build_result.issues.all_issues) == 0
assert len(build_result.semantic_manifest.metrics) == 1
assert len(build_result.semantic_manifest.metrics) == 2
metric = build_result.semantic_manifest.metrics[0]
assert metric.name == "derived_offset_test"
assert metric.type is MetricType.DERIVED
Expand Down Expand Up @@ -389,13 +416,12 @@ def test_derived_metric_offset_to_grain_parsing() -> None:
)
file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents)

build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE])

# Apply transformations to ensure lowercasing
manifest = PydanticSemanticManifestTransformer().transform(build_result.semantic_manifest)
build_result = parse_yaml_files_to_validation_ready_semantic_manifest(
[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]
)

assert len(manifest.metrics) == 2
metric = [m for m in manifest.metrics if m.name == "derived_offset_to_grain_test"][0]
assert len(build_result.semantic_manifest.metrics) == 2
metric = [m for m in build_result.semantic_manifest.metrics if m.name == "derived_offset_to_grain_test"][0]
assert metric
assert metric.type is MetricType.DERIVED
assert metric.type_params.metrics and len(metric.type_params.metrics) == 2
Expand Down Expand Up @@ -522,7 +548,9 @@ def test_conversion_metric_parsing() -> None:
)
file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents)

build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE])
build_result = parse_yaml_files_to_validation_ready_semantic_manifest(
[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]
)

assert len(build_result.semantic_manifest.metrics) == 1
metric = build_result.semantic_manifest.metrics[0]
Expand Down
Loading

0 comments on commit 9151740

Please sign in to comment.