Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set max_metric_default_time_granularity property on PushDownResult #1331

Merged
merged 2 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion extra-hatch-configuration/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Jinja2>=3.1.3
dbt-semantic-interfaces==0.6.5
dbt-semantic-interfaces==0.6.6
more-itertools>=8.10.0, <10.2.0
pydantic>=1.10.0, <3.0
tabulate>=0.8.9
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# dbt Cloud depends on metricflow-semantics (dependency set in dbt-mantle), so DSI must always point to a production version here.
dbt-semantic-interfaces>=0.6.5, <2.0.0
dbt-semantic-interfaces>=0.6.6, <2.0.0
graphviz>=0.18.2, <0.21
python-dateutil>=2.9.0, <2.10.0
rapidfuzz>=3.0, <4.0
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.type_enums import MetricType
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from typing_extensions import override

from metricflow_semantics.mf_logging.formatting import indent
Expand Down Expand Up @@ -60,6 +61,8 @@ class PushDownResult:
candidate_set: GroupByItemCandidateSet
# The issues seen so far while pushing down the result / resolving the ambiguity.
issue_set: MetricFlowQueryResolutionIssueSet
# The largest default time granularity of the metrics seen in the DAG so far. Used to resolve metric_time.
max_metric_default_time_granularity: Optional[TimeGranularity] = None

def __post_init__(self) -> None: # noqa: D105
# If there are no errors, there should be a candidate spec in each candidate set.
Expand All @@ -75,6 +78,7 @@ def filter_candidates_by_pattern(self, spec_pattern: SpecPattern) -> PushDownRes
return PushDownResult(
candidate_set=self.candidate_set.filter_candidates_by_pattern(spec_pattern),
issue_set=self.issue_set,
max_metric_default_time_granularity=self.max_metric_default_time_granularity,
)

def filter_candidates_by_patterns(self, spec_patterns: Sequence[SpecPattern]) -> PushDownResult:
Expand All @@ -89,6 +93,7 @@ def filter_candidates_by_patterns(self, spec_patterns: Sequence[SpecPattern]) ->
return PushDownResult(
candidate_set=candidate_set,
issue_set=self.issue_set,
max_metric_default_time_granularity=self.max_metric_default_time_granularity,
)


Expand Down Expand Up @@ -248,10 +253,19 @@ def _merge_push_down_results_from_parents(
NoParentCandidates.from_parameters(query_resolution_path=current_traversal_path)
)

metric_default_time_granularities = {
parent_candidate_set.max_metric_default_time_granularity
for parent_candidate_set in push_down_results_from_parents.values()
if parent_candidate_set.max_metric_default_time_granularity
}
max_metric_default_time_granularity = (
max(metric_default_time_granularities) if metric_default_time_granularities else None
)
if merged_issue_set.has_errors:
return PushDownResult(
candidate_set=GroupByItemCandidateSet.empty_instance(),
issue_set=merged_issue_set,
max_metric_default_time_granularity=max_metric_default_time_granularity,
)

parent_candidate_sets = tuple(
Expand All @@ -274,10 +288,12 @@ def _merge_push_down_results_from_parents(
parent_issues=(),
)
),
max_metric_default_time_granularity=max_metric_default_time_granularity,
)
return PushDownResult(
candidate_set=intersected_candidate_set,
issue_set=merged_issue_set,
max_metric_default_time_granularity=max_metric_default_time_granularity,
)

@override
Expand Down Expand Up @@ -353,10 +369,18 @@ def visit_metric_node(self, node: MetricGroupByItemResolutionNode) -> PushDownRe
)
)

# If time granularity is not set for the metric, defaults to DAY if available, else the smallest available granularity.
# Note: ignores any granularity set on input metrics.
metric_default_time_granularity = metric.time_granularity or max(
TimeGranularity.DAY,
self._semantic_manifest_lookup.metric_lookup.get_min_queryable_time_granularity(node.metric_reference),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Double checking that we want the default to be the min. queryable time granularity if it's not set in the configs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The product decision was to default to:

  1. DAY, if available (i.e., for dimensions with DAY granularity or smaller)
  2. Min queryable granularity if DAY is not available (for dimensions with WEEK granularity or larger).
    And this should implement that behavior! So this won't change the behavior for existing dimensions - will mainly be a change once people start adding sub-daily dimensions.

)

if matched_items.spec_count == 0:
return PushDownResult(
candidate_set=GroupByItemCandidateSet.empty_instance(),
issue_set=MetricFlowQueryResolutionIssueSet.merge_iterable(issue_sets_to_merge),
max_metric_default_time_granularity=metric_default_time_granularity,
)

return PushDownResult(
Expand All @@ -366,6 +390,7 @@ def visit_metric_node(self, node: MetricGroupByItemResolutionNode) -> PushDownRe
path_from_leaf_node=current_traversal_path,
),
issue_set=MetricFlowQueryResolutionIssueSet.merge_iterable(issue_sets_to_merge),
max_metric_default_time_granularity=metric_default_time_granularity,
)

@override
Expand Down
Loading