Skip to content

Commit

Permalink
Remove default time constraint for queries with cumulative metrics.
Browse files Browse the repository at this point in the history
A previous change (#918) updated
the logic to apply a default time constraint for queries with cumulative metrics
if one was not supplied. However, the fix revealed that the feature was not
working for some time, possibly since the public release. I also realized that
the original logic (and also the fix) is not correct with derived metrics that
use cumulative metrics. There is future work to better handle time constraints /
predicate push down, so removing this for now.
  • Loading branch information
plypaul committed Dec 9, 2023
1 parent 457deb8 commit c814d43
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 54 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231207-170704.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Remove default time constraint for queries with cumulative metrics.
time: 2023-12-07T17:07:04.040509-08:00
custom:
Author: plypaul
Issue: "917"
53 changes: 0 additions & 53 deletions metricflow/engine/metricflow_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
from metricflow.sql.optimizer.optimization_levels import SqlQueryOptimizationLevel
from metricflow.telemetry.models import TelemetryLevel
from metricflow.telemetry.reporter import TelemetryReporter, log_call
from metricflow.time.time_granularity import TimeGranularity
from metricflow.time.time_source import TimeSource

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -320,19 +319,13 @@ def __init__(
sql_client: SqlClient,
time_source: TimeSource = ServerTimeSource(),
column_association_resolver: Optional[ColumnAssociationResolver] = None,
enable_default_time_constraint: bool = True,
) -> None:
"""Initializer for MetricFlowEngine.
For direct calls to construct MetricFlowEngine, do not pass the following parameters,
- time_source
- column_association_resolver
- time_spine_source
- enable_default_time_constraint: In cases where the time constraint is not specified for cumulative
metrics, the time constraint is adjusted to reduce the number of rows produced as cumulative metrics can
produce rows for all time values in the time spine. This was added to avoid adding time constraints
automatically for tests. This could be removed if automatic adjustment is removed, or when the tests are
updated.
These parameters are mainly there to be overridden during tests.
"""
Expand All @@ -343,7 +336,6 @@ def __init__(
)
self._time_source = time_source
self._time_spine_source = semantic_manifest_lookup.time_spine_source
self._enable_default_time_constraint = enable_default_time_constraint
self._source_data_sets: List[SemanticModelDataSet] = []
converter = SemanticModelToDataSetConverter(column_association_resolver=self._column_association_resolver)
for semantic_model in sorted(
Expand Down Expand Up @@ -453,51 +445,6 @@ def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> Me
)
logger.info(f"Query spec is:\n{pformat_big_objects(query_spec)}")

if (
self._semantic_manifest_lookup.metric_lookup.contains_cumulative_or_time_offset_metric(
tuple(metric_spec.reference for metric_spec in query_spec.metric_specs)
)
and self._enable_default_time_constraint
):
if self._time_spine_source.time_column_granularity != TimeGranularity.DAY:
raise RuntimeError(
f"A time spine source with a granularity {self._time_spine_source.time_column_granularity} is not "
f"yet supported."
)
logger.warning(
f"Query spec requires a time spine dataset conforming to the following spec: {self._time_spine_source}. "
)
time_constraint_updated = False

if mf_query_request.time_constraint_start is None:
time_constraint_start = self._time_source.get_time() - datetime.timedelta(days=365)
logger.warning(
"A start time has not be supplied while querying for cumulative metrics. To avoid an excessive "
f"number of rows, the start time will be changed to {time_constraint_start.isoformat()}"
)
time_constraint_updated = True
else:
time_constraint_start = mf_query_request.time_constraint_start

if not mf_query_request.time_constraint_end:
time_constraint_end = self._time_source.get_time()
logger.warning(
"A end time has not be supplied while querying for cumulative metrics. To avoid an excessive "
f"number of rows, the end time will be changed to {time_constraint_end.isoformat()}"
)
time_constraint_updated = True
else:
time_constraint_end = mf_query_request.time_constraint_end

if time_constraint_updated:
query_spec = query_spec.with_time_range_constraint(
TimeRangeConstraint(
start_time=time_constraint_start,
end_time=time_constraint_end,
)
)
logger.warning(f"Query spec updated to:\n{pformat_big_objects(query_spec)}")

output_table: Optional[SqlTable] = None
if mf_query_request.output_table is not None:
output_table = SqlTable.from_string(mf_query_request.output_table)
Expand Down
1 change: 0 additions & 1 deletion metricflow/test/integration/test_configured_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,6 @@ def test_case(
sql_client=sql_client,
column_association_resolver=DunderColumnAssociationResolver(semantic_manifest_lookup),
time_source=ConfigurableTimeSource(as_datetime("2021-01-04")),
enable_default_time_constraint=False,
)

check_query_helpers = CheckQueryHelpers(sql_client)
Expand Down

0 comments on commit c814d43

Please sign in to comment.