Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add default_granularity to metric spec #291

Merged
merged 6 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240614-140515.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add default_granularity to metric spec.
time: 2024-06-14T14:05:15.355931-07:00
custom:
Author: courtneyholcomb
Issue: "290"
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ test:
export FORMAT_JSON_LOGS="1" && hatch -v run dev-env:pytest -n auto tests

lint:
hatch run dev-env:pre-commit run --show-diff-on-failure --color=always --all-files
hatch run dev-env:pre-commit run --color=always --all-files

json_schema:
hatch run dev-env:python dbt_semantic_interfaces/parsing/generate_json_schema_file.py
44 changes: 41 additions & 3 deletions dbt_semantic_interfaces/implementations/metric.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Any, Dict, List, Optional, Sequence
from typing import Any, Dict, List, Optional, Sequence, Set

from typing_extensions import override

Expand All @@ -16,7 +16,12 @@
PydanticWhereFilterIntersection,
)
from dbt_semantic_interfaces.implementations.metadata import PydanticMetadata
from dbt_semantic_interfaces.protocols import MetricConfig, ProtocolHint
from dbt_semantic_interfaces.protocols import (
Metric,
MetricConfig,
MetricInputMeasure,
ProtocolHint,
)
from dbt_semantic_interfaces.references import MeasureReference, MetricReference
from dbt_semantic_interfaces.type_enums import (
ConversionCalculationType,
Expand Down Expand Up @@ -191,9 +196,13 @@ def _implements_protocol(self) -> MetricConfig: # noqa: D
meta: Dict[str, Any] = Field(default_factory=dict)


class PydanticMetric(HashableBaseModel, ModelWithMetadataParsing):
class PydanticMetric(HashableBaseModel, ModelWithMetadataParsing, ProtocolHint[Metric]):
"""Describes a metric."""

@override
def _implements_protocol(self) -> Metric: # noqa: D
return self

name: str
description: Optional[str]
type: MetricType
Expand All @@ -202,6 +211,7 @@ class PydanticMetric(HashableBaseModel, ModelWithMetadataParsing):
metadata: Optional[PydanticMetadata]
label: Optional[str] = None
config: Optional[PydanticMetricConfig]
default_granularity: Optional[TimeGranularity] = None

@property
def input_measures(self) -> Sequence[PydanticMetricInputMeasure]:
Expand All @@ -228,3 +238,31 @@ def input_metrics(self) -> Sequence[PydanticMetricInput]:
return (self.type_params.numerator, self.type_params.denominator)
else:
assert_values_exhausted(self.type)

@staticmethod
def all_input_measures_for_metric(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this method be in the protocol?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think not. I don't think we want to require implementing this logic any place that implements a metric.

metric: Metric, metric_index: Dict[MetricReference, Metric]
) -> Set[MetricInputMeasure]:
"""Gets all input measures for the metric, including those defined on input metrics (recursively)."""
measures = set()
if metric.type is MetricType.SIMPLE or metric.type is MetricType.CUMULATIVE:
assert (
metric.type_params.measure is not None
), f"Metric {metric.name} should have a measure defined, but it does not."
measures.add(metric.type_params.measure)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not blocking, but heads up that this could end up with duplicate measures.

Ie.,
if a metric that has 2 measures is the same but has a different input filter, then the set() wouldn't count them as duplicates as the MetricInputMeasure will have a different filter field even tho it's referencing the same measure.

If you don't want duplicate measures showing up in this, you could return Set[MeasureReference] instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call!!

elif metric.type is MetricType.DERIVED or metric.type is MetricType.RATIO:
for input_metric in metric.input_metrics:
nested_metric = metric_index.get(MetricReference(input_metric.name))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could just use the input_metric.as_reference property

assert nested_metric, f"Could not find metric {input_metric.name} in semantic manifest."
measures.update(
PydanticMetric.all_input_measures_for_metric(metric=nested_metric, metric_index=metric_index)
)
elif metric.type is MetricType.CONVERSION:
conversion_type_params = metric.type_params.conversion_type_params
assert conversion_type_params, "Conversion metric should have conversion_type_params."
measures.add(conversion_type_params.base_measure)
measures.add(conversion_type_params.conversion_measure)
else:
assert_values_exhausted(metric.type)

return measures
3 changes: 2 additions & 1 deletion dbt_semantic_interfaces/implementations/semantic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
SemanticModelDefaults,
)
from dbt_semantic_interfaces.references import (
DimensionReference,
EntityReference,
LinkableElementReference,
MeasureReference,
Expand Down Expand Up @@ -168,7 +169,7 @@ def get_measure(self, measure_reference: MeasureReference) -> PydanticMeasure:
f"No dimension with name ({measure_reference.element_name}) in semantic_model with name ({self.name})"
)

def get_dimension(self, dimension_reference: LinkableElementReference) -> PydanticDimension: # noqa: D
def get_dimension(self, dimension_reference: DimensionReference) -> PydanticDimension: # noqa: D
for dim in self.dimensions:
if dim.reference == dimension_reference:
return dim
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,32 @@
"config": {
"$ref": "#/definitions/metric_config_schema"
},
"default_granularity": {
"enum": [
"NANOSECOND",
"MICROSECOND",
"MILLISECOND",
"SECOND",
"MINUTE",
"HOUR",
"DAY",
"WEEK",
"MONTH",
"QUARTER",
"YEAR",
"nanosecond",
"microsecond",
"millisecond",
"second",
"minute",
"hour",
"day",
"week",
"month",
"quarter",
"year"
]
},
"description": {
"type": "string"
},
Expand Down
1 change: 1 addition & 0 deletions dbt_semantic_interfaces/parsing/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@
"description": {"type": "string"},
"label": {"type": "string"},
"config": {"$ref": "metric_config_schema"},
"default_granularity": {"enum": time_granularity_values},
},
"additionalProperties": False,
"required": ["name", "type", "type_params"],
Expand Down
11 changes: 11 additions & 0 deletions dbt_semantic_interfaces/protocols/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,3 +325,14 @@ def config(self) -> Optional[MetricConfig]: # noqa: D
def label(self) -> Optional[str]:
"""Returns a string representing a human readable label for the metric."""
pass

@property
@abstractmethod
def default_granularity(self) -> Optional[TimeGranularity]:
"""Default grain used for the metric.

This will be used in a couple of circumstances:
- as the default grain for metric_time if no grain is specified
- as the window function order by when reaggregating cumulative metrics for non-default grains
"""
pass
1 change: 1 addition & 0 deletions dbt_semantic_interfaces/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class EntityReference(LinkableElementReference): # noqa: D
class TimeDimensionReference(DimensionReference): # noqa: D
pass

@property
def dimension_reference(self) -> DimensionReference: # noqa: D
return DimensionReference(element_name=self.element_name)

Expand Down
4 changes: 3 additions & 1 deletion dbt_semantic_interfaces/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
PydanticSemanticModel,
)
from dbt_semantic_interfaces.parsing.objects import YamlConfigFile
from dbt_semantic_interfaces.type_enums import MetricType
from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -123,6 +123,7 @@ def metric_with_guaranteed_meta(
type_params: PydanticMetricTypeParams,
metadata: PydanticMetadata = default_meta(),
description: str = "adhoc metric",
default_granularity: Optional[TimeGranularity] = None,
) -> PydanticMetric:
"""Creates a metric with the given input.

Expand All @@ -135,6 +136,7 @@ def metric_with_guaranteed_meta(
type_params=type_params,
filter=None,
metadata=metadata,
default_granularity=default_granularity,
)


Expand Down
48 changes: 48 additions & 0 deletions dbt_semantic_interfaces/transformations/default_grain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from typing import Set

from typing_extensions import override

from dbt_semantic_interfaces.implementations.semantic_manifest import (
PydanticSemanticManifest,
)
from dbt_semantic_interfaces.protocols import ProtocolHint
from dbt_semantic_interfaces.references import (
DimensionReference,
TimeDimensionReference,
)
from dbt_semantic_interfaces.transformations.transform_rule import (
SemanticManifestTransformRule,
)
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity


class SetDefaultGrainRule(ProtocolHint[SemanticManifestTransformRule[PydanticSemanticManifest]]):
"""If default_granularity is not set for a metric, set it to DAY if available, else the smallest available grain."""

@override
def _implements_protocol(self) -> SemanticManifestTransformRule[PydanticSemanticManifest]: # noqa: D
return self

@staticmethod
def transform_model(semantic_manifest: PydanticSemanticManifest) -> PydanticSemanticManifest:
"""For each metric, set default_granularity to DAY or smallest granularity supported by all agg_time_dims."""
for metric in semantic_manifest.metrics:
if metric.default_granularity:
continue

default_granularity = TimeGranularity.DAY
seen_agg_time_dimensions: Set[TimeDimensionReference] = set()
for semantic_model in semantic_manifest.semantic_models:
for measure_ref in set(metric.measure_references).intersection(semantic_model.measure_references):
agg_time_dimension_ref = semantic_model.checked_agg_time_dimension_for_measure(measure_ref)
if agg_time_dimension_ref in seen_agg_time_dimensions:
continue
seen_agg_time_dimensions.add(agg_time_dimension_ref)
dimension = semantic_model.get_dimension(DimensionReference(agg_time_dimension_ref.element_name))
if (
dimension.type_params
and dimension.type_params.time_granularity.to_int() > default_granularity.to_int()
):
default_granularity = dimension.type_params.time_granularity

return semantic_manifest
118 changes: 114 additions & 4 deletions dbt_semantic_interfaces/validations/metrics.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,30 @@
import traceback
from typing import Generic, List, Optional, Sequence
from typing import Dict, Generic, List, Optional, Sequence

from dbt_semantic_interfaces.errors import ParsingException
from dbt_semantic_interfaces.implementations.metric import PydanticMetricTimeWindow
from dbt_semantic_interfaces.implementations.metric import (
PydanticMetric,
PydanticMetricTimeWindow,
)
from dbt_semantic_interfaces.protocols import (
ConversionTypeParams,
Dimension,
Metric,
SemanticManifest,
SemanticManifestT,
SemanticModel,
)
from dbt_semantic_interfaces.references import MeasureReference, MetricModelReference
from dbt_semantic_interfaces.type_enums import AggregationType, MetricType
from dbt_semantic_interfaces.references import (
DimensionReference,
MeasureReference,
MetricModelReference,
MetricReference,
)
from dbt_semantic_interfaces.type_enums import (
AggregationType,
MetricType,
TimeGranularity,
)
from dbt_semantic_interfaces.validations.unique_valid_name import UniqueAndValidNameRule
from dbt_semantic_interfaces.validations.validator_helpers import (
FileContext,
Expand Down Expand Up @@ -562,3 +575,100 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
conversion_semantic_model=conversion_semantic_model,
)
return issues


class DefaultGrainRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
"""Checks that default_granularity set for metric is queryable for that metric."""

@staticmethod
def _min_queryable_granularity_for_metric(
metric: Metric,
metric_index: Dict[MetricReference, Metric],
measure_to_agg_time_dimension: Dict[MeasureReference, Dimension],
) -> TimeGranularity:
"""Get the minimum time granularity this metric is allowed to be queried with.

This should be the largest granularity that any of the metric's agg_time_dimensions is defined at.
Defaults to DAY in the
"""
min_queryable_granularity: Optional[TimeGranularity] = None
for input_measure in PydanticMetric.all_input_measures_for_metric(metric=metric, metric_index=metric_index):
agg_time_dimension = measure_to_agg_time_dimension.get(input_measure.measure_reference)
assert agg_time_dimension, f"Measure '{input_measure.name}' not found in semantic manifest."
if not agg_time_dimension.type_params:
continue
defined_time_granularity = agg_time_dimension.type_params.time_granularity
if not min_queryable_granularity or defined_time_granularity.to_int() > min_queryable_granularity.to_int():
min_queryable_granularity = defined_time_granularity

return min_queryable_granularity or TimeGranularity.DAY

@staticmethod
@validate_safely(
whats_being_done="running model validation ensuring a metric's default_granularity is valid for the metric"
)
def _validate_metric(
metric: Metric,
metric_index: Dict[MetricReference, Metric],
measure_to_agg_time_dimension: Dict[MeasureReference, Dimension],
) -> Sequence[ValidationIssue]: # noqa: D
issues: List[ValidationIssue] = []
context = MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
)

if metric.default_granularity:
min_queryable_granularity = DefaultGrainRule._min_queryable_granularity_for_metric(
metric=metric, metric_index=metric_index, measure_to_agg_time_dimension=measure_to_agg_time_dimension
)
valid_granularities = [
granularity.name
for granularity in TimeGranularity
if granularity.to_int() >= min_queryable_granularity.to_int()
]
if metric.default_granularity.name not in valid_granularities:
issues.append(
ValidationError(
context=context,
message=(
f"`default_granularity` for metric '{metric.name}' must be >= "
f"{min_queryable_granularity.name}. Valid options are those that are >= the largest "
f"granularity defined for the metric's measures' agg_time_dimensions. Got: "
f"{metric.default_granularity.name}. Valid options: {valid_granularities}"
),
)
)

return issues

@staticmethod
@validate_safely(whats_being_done="running manifest validation ensuring metric default_granularitys are valid")
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]:
"""Validate that the default_granularity for each metric is queryable for that metric.

TODO: figure out a more efficient way to reference other aspects of the model. This validation essentially
requires parsing the entire model, which could be slow and likely is repeated work. The blocker is that the
inputs to validations are protocols, which don't easily store parsed metadata.
"""
issues: List[ValidationIssue] = []

measure_to_agg_time_dimension: Dict[MeasureReference, Dimension] = {}
for semantic_model in semantic_manifest.semantic_models:
dimension_index = {DimensionReference(dimension.name): dimension for dimension in semantic_model.dimensions}
for measure in semantic_model.measures:
agg_time_dimension_ref = semantic_model.checked_agg_time_dimension_for_measure(measure.reference)
agg_time_dimension = dimension_index.get(agg_time_dimension_ref.dimension_reference)
assert (
agg_time_dimension
), f"Dimension '{agg_time_dimension_ref.element_name}' not found in semantic manifest."
measure_to_agg_time_dimension[measure.reference] = agg_time_dimension

metric_index = {MetricReference(metric.name): metric for metric in semantic_manifest.metrics}
for metric in semantic_manifest.metrics or []:
issues += DefaultGrainRule._validate_metric(
metric=metric,
metric_index=metric_index,
measure_to_agg_time_dimension=measure_to_agg_time_dimension,
)
return issues
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ metric:
name: "trailing_2_months_revenue"
description: "trailing_2_months_revenue"
type: cumulative
default_granularity: month
type_params:
measure:
name: txn_revenue
Expand Down
Loading
Loading