Skip to content

Commit

Permalink
Add default_granularity to metric spec (#291)
Browse files Browse the repository at this point in the history
Resolves #290

### Description
Now that we are enabling sub-daily granularities, the current behavior
of `metric_time` will likely be unexpected to users. If they change any
of their time dimensions to use a grain smaller than DAY, `metric_time`
will default to that grain. To avoid this confusion, add a parameter
that allows users to choose which grain they want their metric to
default to. This will default to DAY if not set to avoid the unexpected
behavior change. (If DAY is smaller than the defined granularity, will
default to the smallest available granularity.)


### Checklist

- [x] I have read [the contributing
guide](https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/CONTRIBUTING.md)
and understand what's expected of me
- [x] I have signed the
[CLA](https://docs.getdbt.com/docs/contributor-license-agreements)
- [x] This PR includes tests, or tests are not required/relevant for
this PR
- [x] I have run `changie new` to [create a changelog
entry](https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/CONTRIBUTING.md#adding-a-changelog-entry)
  • Loading branch information
courtneyholcomb authored Jun 28, 2024
1 parent d41075e commit 1eb584b
Show file tree
Hide file tree
Showing 14 changed files with 371 additions and 11 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240614-140515.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add default_granularity to metric spec.
time: 2024-06-14T14:05:15.355931-07:00
custom:
Author: courtneyholcomb
Issue: "290"
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ test:
export FORMAT_JSON_LOGS="1" && hatch -v run dev-env:pytest -n auto tests

lint:
hatch run dev-env:pre-commit run --show-diff-on-failure --color=always --all-files
hatch run dev-env:pre-commit run --color=always --all-files

json_schema:
hatch run dev-env:python dbt_semantic_interfaces/parsing/generate_json_schema_file.py
39 changes: 36 additions & 3 deletions dbt_semantic_interfaces/implementations/metric.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Any, Dict, List, Optional, Sequence
from typing import Any, Dict, List, Optional, Sequence, Set

from typing_extensions import override

Expand All @@ -16,7 +16,7 @@
PydanticWhereFilterIntersection,
)
from dbt_semantic_interfaces.implementations.metadata import PydanticMetadata
from dbt_semantic_interfaces.protocols import MetricConfig, ProtocolHint
from dbt_semantic_interfaces.protocols import Metric, MetricConfig, ProtocolHint
from dbt_semantic_interfaces.references import MeasureReference, MetricReference
from dbt_semantic_interfaces.type_enums import (
ConversionCalculationType,
Expand Down Expand Up @@ -191,9 +191,13 @@ def _implements_protocol(self) -> MetricConfig: # noqa: D
meta: Dict[str, Any] = Field(default_factory=dict)


class PydanticMetric(HashableBaseModel, ModelWithMetadataParsing):
class PydanticMetric(HashableBaseModel, ModelWithMetadataParsing, ProtocolHint[Metric]):
"""Describes a metric."""

@override
def _implements_protocol(self) -> Metric: # noqa: D
return self

name: str
description: Optional[str]
type: MetricType
Expand All @@ -202,6 +206,7 @@ class PydanticMetric(HashableBaseModel, ModelWithMetadataParsing):
metadata: Optional[PydanticMetadata]
label: Optional[str] = None
config: Optional[PydanticMetricConfig]
default_granularity: Optional[TimeGranularity] = None

@property
def input_measures(self) -> Sequence[PydanticMetricInputMeasure]:
Expand All @@ -228,3 +233,31 @@ def input_metrics(self) -> Sequence[PydanticMetricInput]:
return (self.type_params.numerator, self.type_params.denominator)
else:
assert_values_exhausted(self.type)

@staticmethod
def all_input_measures_for_metric(
metric: Metric, metric_index: Dict[MetricReference, Metric]
) -> Set[MeasureReference]:
"""Gets all input measures for the metric, including those defined on input metrics (recursively)."""
measures: Set[MeasureReference] = set()
if metric.type is MetricType.SIMPLE or metric.type is MetricType.CUMULATIVE:
assert (
metric.type_params.measure is not None
), f"Metric {metric.name} should have a measure defined, but it does not."
measures.add(metric.type_params.measure.measure_reference)
elif metric.type is MetricType.DERIVED or metric.type is MetricType.RATIO:
for input_metric in metric.input_metrics:
nested_metric = metric_index.get(input_metric.as_reference)
assert nested_metric, f"Could not find metric {input_metric.name} in semantic manifest."
measures.update(
PydanticMetric.all_input_measures_for_metric(metric=nested_metric, metric_index=metric_index)
)
elif metric.type is MetricType.CONVERSION:
conversion_type_params = metric.type_params.conversion_type_params
assert conversion_type_params, "Conversion metric should have conversion_type_params."
measures.add(conversion_type_params.base_measure.measure_reference)
measures.add(conversion_type_params.conversion_measure.measure_reference)
else:
assert_values_exhausted(metric.type)

return measures
3 changes: 2 additions & 1 deletion dbt_semantic_interfaces/implementations/semantic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
SemanticModelDefaults,
)
from dbt_semantic_interfaces.references import (
DimensionReference,
EntityReference,
LinkableElementReference,
MeasureReference,
Expand Down Expand Up @@ -168,7 +169,7 @@ def get_measure(self, measure_reference: MeasureReference) -> PydanticMeasure:
f"No dimension with name ({measure_reference.element_name}) in semantic_model with name ({self.name})"
)

def get_dimension(self, dimension_reference: LinkableElementReference) -> PydanticDimension: # noqa: D
def get_dimension(self, dimension_reference: DimensionReference) -> PydanticDimension: # noqa: D
for dim in self.dimensions:
if dim.reference == dimension_reference:
return dim
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,32 @@
"config": {
"$ref": "#/definitions/metric_config_schema"
},
"default_granularity": {
"enum": [
"NANOSECOND",
"MICROSECOND",
"MILLISECOND",
"SECOND",
"MINUTE",
"HOUR",
"DAY",
"WEEK",
"MONTH",
"QUARTER",
"YEAR",
"nanosecond",
"microsecond",
"millisecond",
"second",
"minute",
"hour",
"day",
"week",
"month",
"quarter",
"year"
]
},
"description": {
"type": "string"
},
Expand Down
1 change: 1 addition & 0 deletions dbt_semantic_interfaces/parsing/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@
"description": {"type": "string"},
"label": {"type": "string"},
"config": {"$ref": "metric_config_schema"},
"default_granularity": {"enum": time_granularity_values},
},
"additionalProperties": False,
"required": ["name", "type", "type_params"],
Expand Down
11 changes: 11 additions & 0 deletions dbt_semantic_interfaces/protocols/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,3 +325,14 @@ def config(self) -> Optional[MetricConfig]: # noqa: D
def label(self) -> Optional[str]:
"""Returns a string representing a human readable label for the metric."""
pass

@property
@abstractmethod
def default_granularity(self) -> Optional[TimeGranularity]:
"""Default grain used for the metric.
This will be used in a couple of circumstances:
- as the default grain for metric_time if no grain is specified
- as the window function order by when reaggregating cumulative metrics for non-default grains
"""
pass
1 change: 1 addition & 0 deletions dbt_semantic_interfaces/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class EntityReference(LinkableElementReference): # noqa: D
class TimeDimensionReference(DimensionReference): # noqa: D
pass

@property
def dimension_reference(self) -> DimensionReference: # noqa: D
return DimensionReference(element_name=self.element_name)

Expand Down
4 changes: 3 additions & 1 deletion dbt_semantic_interfaces/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
PydanticSemanticModel,
)
from dbt_semantic_interfaces.parsing.objects import YamlConfigFile
from dbt_semantic_interfaces.type_enums import MetricType
from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -123,6 +123,7 @@ def metric_with_guaranteed_meta(
type_params: PydanticMetricTypeParams,
metadata: PydanticMetadata = default_meta(),
description: str = "adhoc metric",
default_granularity: Optional[TimeGranularity] = None,
) -> PydanticMetric:
"""Creates a metric with the given input.
Expand All @@ -135,6 +136,7 @@ def metric_with_guaranteed_meta(
type_params=type_params,
filter=None,
metadata=metadata,
default_granularity=default_granularity,
)


Expand Down
48 changes: 48 additions & 0 deletions dbt_semantic_interfaces/transformations/default_grain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from typing import Set

from typing_extensions import override

from dbt_semantic_interfaces.implementations.semantic_manifest import (
PydanticSemanticManifest,
)
from dbt_semantic_interfaces.protocols import ProtocolHint
from dbt_semantic_interfaces.references import (
DimensionReference,
TimeDimensionReference,
)
from dbt_semantic_interfaces.transformations.transform_rule import (
SemanticManifestTransformRule,
)
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity


class SetDefaultGrainRule(ProtocolHint[SemanticManifestTransformRule[PydanticSemanticManifest]]):
"""If default_granularity is not set for a metric, set it to DAY if available, else the smallest available grain."""

@override
def _implements_protocol(self) -> SemanticManifestTransformRule[PydanticSemanticManifest]: # noqa: D
return self

@staticmethod
def transform_model(semantic_manifest: PydanticSemanticManifest) -> PydanticSemanticManifest:
"""For each metric, set default_granularity to DAY or smallest granularity supported by all agg_time_dims."""
for metric in semantic_manifest.metrics:
if metric.default_granularity:
continue

default_granularity = TimeGranularity.DAY
seen_agg_time_dimensions: Set[TimeDimensionReference] = set()
for semantic_model in semantic_manifest.semantic_models:
for measure_ref in set(metric.measure_references).intersection(semantic_model.measure_references):
agg_time_dimension_ref = semantic_model.checked_agg_time_dimension_for_measure(measure_ref)
if agg_time_dimension_ref in seen_agg_time_dimensions:
continue
seen_agg_time_dimensions.add(agg_time_dimension_ref)
dimension = semantic_model.get_dimension(DimensionReference(agg_time_dimension_ref.element_name))
if (
dimension.type_params
and dimension.type_params.time_granularity.to_int() > default_granularity.to_int()
):
default_granularity = dimension.type_params.time_granularity

return semantic_manifest
118 changes: 114 additions & 4 deletions dbt_semantic_interfaces/validations/metrics.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,30 @@
import traceback
from typing import Generic, List, Optional, Sequence
from typing import Dict, Generic, List, Optional, Sequence

from dbt_semantic_interfaces.errors import ParsingException
from dbt_semantic_interfaces.implementations.metric import PydanticMetricTimeWindow
from dbt_semantic_interfaces.implementations.metric import (
PydanticMetric,
PydanticMetricTimeWindow,
)
from dbt_semantic_interfaces.protocols import (
ConversionTypeParams,
Dimension,
Metric,
SemanticManifest,
SemanticManifestT,
SemanticModel,
)
from dbt_semantic_interfaces.references import MeasureReference, MetricModelReference
from dbt_semantic_interfaces.type_enums import AggregationType, MetricType
from dbt_semantic_interfaces.references import (
DimensionReference,
MeasureReference,
MetricModelReference,
MetricReference,
)
from dbt_semantic_interfaces.type_enums import (
AggregationType,
MetricType,
TimeGranularity,
)
from dbt_semantic_interfaces.validations.unique_valid_name import UniqueAndValidNameRule
from dbt_semantic_interfaces.validations.validator_helpers import (
FileContext,
Expand Down Expand Up @@ -562,3 +575,100 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
conversion_semantic_model=conversion_semantic_model,
)
return issues


class DefaultGrainRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
"""Checks that default_granularity set for metric is queryable for that metric."""

@staticmethod
def _min_queryable_granularity_for_metric(
metric: Metric,
metric_index: Dict[MetricReference, Metric],
measure_to_agg_time_dimension: Dict[MeasureReference, Dimension],
) -> TimeGranularity:
"""Get the minimum time granularity this metric is allowed to be queried with.
This should be the largest granularity that any of the metric's agg_time_dimensions is defined at.
Defaults to DAY in the
"""
min_queryable_granularity: Optional[TimeGranularity] = None
for measure_reference in PydanticMetric.all_input_measures_for_metric(metric=metric, metric_index=metric_index):
agg_time_dimension = measure_to_agg_time_dimension.get(measure_reference)
assert agg_time_dimension, f"Measure '{measure_reference.element_name}' not found in semantic manifest."
if not agg_time_dimension.type_params:
continue
defined_time_granularity = agg_time_dimension.type_params.time_granularity
if not min_queryable_granularity or defined_time_granularity.to_int() > min_queryable_granularity.to_int():
min_queryable_granularity = defined_time_granularity

return min_queryable_granularity or TimeGranularity.DAY

@staticmethod
@validate_safely(
whats_being_done="running model validation ensuring a metric's default_granularity is valid for the metric"
)
def _validate_metric(
metric: Metric,
metric_index: Dict[MetricReference, Metric],
measure_to_agg_time_dimension: Dict[MeasureReference, Dimension],
) -> Sequence[ValidationIssue]: # noqa: D
issues: List[ValidationIssue] = []
context = MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
)

if metric.default_granularity:
min_queryable_granularity = DefaultGrainRule._min_queryable_granularity_for_metric(
metric=metric, metric_index=metric_index, measure_to_agg_time_dimension=measure_to_agg_time_dimension
)
valid_granularities = [
granularity.name
for granularity in TimeGranularity
if granularity.to_int() >= min_queryable_granularity.to_int()
]
if metric.default_granularity.name not in valid_granularities:
issues.append(
ValidationError(
context=context,
message=(
f"`default_granularity` for metric '{metric.name}' must be >= "
f"{min_queryable_granularity.name}. Valid options are those that are >= the largest "
f"granularity defined for the metric's measures' agg_time_dimensions. Got: "
f"{metric.default_granularity.name}. Valid options: {valid_granularities}"
),
)
)

return issues

@staticmethod
@validate_safely(whats_being_done="running manifest validation ensuring metric default_granularitys are valid")
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]:
"""Validate that the default_granularity for each metric is queryable for that metric.
TODO: figure out a more efficient way to reference other aspects of the model. This validation essentially
requires parsing the entire model, which could be slow and likely is repeated work. The blocker is that the
inputs to validations are protocols, which don't easily store parsed metadata.
"""
issues: List[ValidationIssue] = []

measure_to_agg_time_dimension: Dict[MeasureReference, Dimension] = {}
for semantic_model in semantic_manifest.semantic_models:
dimension_index = {DimensionReference(dimension.name): dimension for dimension in semantic_model.dimensions}
for measure in semantic_model.measures:
agg_time_dimension_ref = semantic_model.checked_agg_time_dimension_for_measure(measure.reference)
agg_time_dimension = dimension_index.get(agg_time_dimension_ref.dimension_reference)
assert (
agg_time_dimension
), f"Dimension '{agg_time_dimension_ref.element_name}' not found in semantic manifest."
measure_to_agg_time_dimension[measure.reference] = agg_time_dimension

metric_index = {MetricReference(metric.name): metric for metric in semantic_manifest.metrics}
for metric in semantic_manifest.metrics or []:
issues += DefaultGrainRule._validate_metric(
metric=metric,
metric_index=metric_index,
measure_to_agg_time_dimension=measure_to_agg_time_dimension,
)
return issues
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dbt-semantic-interfaces"
version = "0.6.1"
version = "0.6.2.dev0"
description = 'The shared semantic layer definitions that dbt-core and MetricFlow use'
readme = "README.md"
requires-python = ">=3.8"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ metric:
name: "trailing_2_months_revenue"
description: "trailing_2_months_revenue"
type: cumulative
default_granularity: month
type_params:
measure:
name: txn_revenue
Expand Down
Loading

0 comments on commit 1eb584b

Please sign in to comment.