Skip to content

Commit

Permalink
Merge pull request #871 from dbt-labs/fix-incorrect-cumulative-metric…
Browse files Browse the repository at this point in the history
…-output
  • Loading branch information
tlento authored Nov 15, 2023
2 parents 0d24aee + e628d6e commit 8e69cda
Show file tree
Hide file tree
Showing 46 changed files with 1,201 additions and 39 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231114-171137.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix error in cumulative metric output when start-time and end-time are specified
time: 2023-11-14T17:11:37.462241-08:00
custom:
Author: tlento
Issue: "869"
4 changes: 2 additions & 2 deletions metricflow/filters/time_constraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _adjust_time_constraint_start_by_window(
time_granularity: TimeGranularity,
time_unit_count: int,
) -> TimeRangeConstraint:
"""Moves the start of the time constraint back by 1 window.
"""Moves the start of the time constraint back by <time_unit_count> windows.
if the metric is weekly-active-users (ie window = 1 week) it moves time_constraint.start one week earlier
"""
Expand All @@ -80,7 +80,7 @@ def adjust_time_constraint_for_cumulative_metric(
) -> TimeRangeConstraint:
"""Given a time constraint for the overall query, adjust it to cover the time range for this metric."""
if granularity is not None:
return self._adjust_time_constraint_start_by_window(granularity, count - 1)
return self._adjust_time_constraint_start_by_window(granularity, count)

# if no window is specified we want to accumulate from the beginning of time
return TimeRangeConstraint(
Expand Down
154 changes: 154 additions & 0 deletions metricflow/test/integration/query_output/test_cumulative_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
from __future__ import annotations

import datetime

import pytest
from _pytest.fixtures import FixtureRequest

from metricflow.engine.metricflow_engine import MetricFlowQueryRequest
from metricflow.protocols.sql_client import SqlClient
from metricflow.test.fixtures.setup_fixtures import MetricFlowTestSessionState
from metricflow.test.integration.conftest import IntegrationTestHelpers
from metricflow.test.snapshot_utils import assert_object_snapshot_equal


@pytest.mark.sql_engine_snapshot
def test_simple_cumulative_metric(
request: FixtureRequest,
mf_test_session_state: MetricFlowTestSessionState,
sql_client: SqlClient,
it_helpers: IntegrationTestHelpers,
) -> None:
"""Tests a query of a cumulative metric with a monthly window and a time constraint adjustment."""
query_result = it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metric_names=["trailing_2_months_revenue"],
group_by_names=["metric_time"],
order_by_names=["metric_time"],
time_constraint_start=datetime.datetime(2020, 2, 1),
time_constraint_end=datetime.datetime(2020, 4, 30),
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_object_snapshot_equal(
request=request,
mf_test_session_state=mf_test_session_state,
obj_id="query_output",
obj=query_result.result_df.to_string(),
sql_client=sql_client,
)


@pytest.mark.sql_engine_snapshot
def test_multiple_cumulative_metrics(
request: FixtureRequest,
mf_test_session_state: MetricFlowTestSessionState,
sql_client: SqlClient,
it_helpers: IntegrationTestHelpers,
) -> None:
"""Tests a query with multiple cumulative metrics to ensure date selections align."""
query_result = it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metric_names=["revenue_all_time", "trailing_2_months_revenue"],
group_by_names=["metric_time"],
order_by_names=["metric_time"],
time_constraint_start=datetime.datetime(2020, 3, 31),
time_constraint_end=datetime.datetime(2020, 5, 31),
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_object_snapshot_equal(
request=request,
mf_test_session_state=mf_test_session_state,
obj_id="query_output",
obj=query_result.result_df.to_string(),
sql_client=sql_client,
)


@pytest.mark.sql_engine_snapshot
def test_non_additive_cumulative_metric(
request: FixtureRequest,
mf_test_session_state: MetricFlowTestSessionState,
sql_client: SqlClient,
it_helpers: IntegrationTestHelpers,
) -> None:
"""Tests a query with a non-additive cumulative metric to ensure the non-additive constraint is applied."""
query_result = it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metric_names=["every_two_days_bookers"],
group_by_names=["metric_time"],
order_by_names=["metric_time"],
time_constraint_start=datetime.datetime(2019, 12, 31),
time_constraint_end=datetime.datetime(2020, 1, 3),
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_object_snapshot_equal(
request=request,
mf_test_session_state=mf_test_session_state,
obj_id="query_output",
obj=query_result.result_df.to_string(),
sql_client=sql_client,
)


@pytest.mark.sql_engine_snapshot
def test_grain_to_date_cumulative_metric(
request: FixtureRequest,
mf_test_session_state: MetricFlowTestSessionState,
sql_client: SqlClient,
it_helpers: IntegrationTestHelpers,
) -> None:
"""Tests a month to date cumulative metric with a constraint to ensure all necessary input data is included."""
query_result = it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metric_names=["revenue_mtd"],
group_by_names=["metric_time"],
order_by_names=["metric_time"],
time_constraint_start=datetime.datetime(2021, 1, 3),
time_constraint_end=datetime.datetime(2021, 1, 6),
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_object_snapshot_equal(
request=request,
mf_test_session_state=mf_test_session_state,
obj_id="query_output",
obj=query_result.result_df.to_string(),
sql_client=sql_client,
)


@pytest.mark.sql_engine_snapshot
def test_cumulative_metric_with_non_adjustable_filter(
request: FixtureRequest,
mf_test_session_state: MetricFlowTestSessionState,
sql_client: SqlClient,
it_helpers: IntegrationTestHelpers,
) -> None:
"""Tests a cumulative metric with a filter that cannot be adjusted to ensure all data is included."""
query_result = it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metric_names=["trailing_2_months_revenue"],
group_by_names=["metric_time"],
order_by_names=["metric_time"],
where_constraint=(
"{{ TimeDimension('metric_time', 'day') }} = '2020-03-15' or "
"{{ TimeDimension('metric_time', 'day') }} = '2020-04-30'"
),
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_object_snapshot_equal(
request=request,
mf_test_session_state=mf_test_session_state,
obj_id="query_output",
obj=query_result.result_df.to_string(),
sql_client=sql_client,
)
32 changes: 19 additions & 13 deletions metricflow/test/integration/test_cases/itest_cumulative_metric.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ integration_test:
metrics: ["trailing_2_months_revenue"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2020-01-05", "2021-01-04"]
time_constraint: ["2020-03-05", "2021-01-04"]
check_query: |
SELECT
SUM(b.txn_revenue) as trailing_2_months_revenue
Expand All @@ -23,6 +23,7 @@ integration_test:
FROM {{ source_schema }}.fct_revenue
) b
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }}
WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }}
GROUP BY a.ds
ORDER BY a.ds
---
Expand All @@ -33,7 +34,7 @@ integration_test:
metrics: ["trailing_2_months_revenue"]
group_bys: ["metric_time", "user__home_state_latest"]
order_bys: ["metric_time", "user__home_state_latest"]
time_constraint: ["2020-01-05", "2021-01-04"]
time_constraint: ["2020-03-05", "2021-01-04"]
check_query: |
SELECT
SUM(revenue) as trailing_2_months_revenue
Expand All @@ -55,6 +56,7 @@ integration_test:
GROUP BY m.created_at, m.revenue, u.home_state_latest
) b
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }}
WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }}
GROUP BY a.ds, user__home_state_latest
ORDER by a.ds, user__home_state_latest
---
Expand All @@ -65,15 +67,15 @@ integration_test:
metrics: ["revenue_all_time"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2020-01-01", "2021-01-05"]
time_constraint: ["2020-03-01", "2021-01-05"]
check_query: |
SELECT
SUM(revenue) AS revenue_all_time
, a.ds AS metric_time__day
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2020-01-01", "2021-01-05") }}
WHERE {{ render_time_constraint("ds", "2020-03-01", "2021-01-05") }}
) a
INNER JOIN (
SELECT
Expand Down Expand Up @@ -102,7 +104,7 @@ integration_test:
metrics: ["revenue_all_time", "trailing_2_months_revenue"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2019-12-31", "2021-01-05"]
time_constraint: ["2020-03-31", "2021-01-05"]
check_query: |
SELECT revenue_all_time, trailing_2_months_revenue, a.ds AS metric_time__day
FROM (
Expand All @@ -116,7 +118,7 @@ integration_test:
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2019-12-31", "2021-01-05") }}
WHERE {{ render_time_constraint("ds", "2020-03-31", "2021-01-05") }}
) a
INNER JOIN (
SELECT
Expand All @@ -135,7 +137,7 @@ integration_test:
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2019-12-31", "2021-01-05") }}
WHERE {{ render_time_constraint("ds", "2020-01-31", "2021-01-05") }}
) a
INNER JOIN (
SELECT
Expand All @@ -147,6 +149,7 @@ integration_test:
GROUP BY a.ds
) b
ON a.ds = b.ds
WHERE {{ render_time_constraint("a.ds", "2020-03-31", "2021-01-05") }}
ORDER BY a.ds
---
integration_test:
Expand All @@ -156,7 +159,7 @@ integration_test:
metrics: ["trailing_2_months_revenue"]
group_bys: ["metric_time__day"]
order_bys: ["metric_time__day"]
time_constraint: ["2020-01-05", "2021-01-04"]
time_constraint: ["2020-03-05", "2021-01-04"]
check_query: |
SELECT
SUM(b.txn_revenue) as trailing_2_months_revenue
Expand All @@ -173,6 +176,7 @@ integration_test:
FROM {{ source_schema }}.fct_revenue
) b
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }}
WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }}
GROUP BY a.ds
ORDER BY a.ds
---
Expand Down Expand Up @@ -218,7 +222,7 @@ integration_test:
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2019-12-31", "2020-01-04") }}
WHERE {{ render_time_constraint("ds", "2019-12-30", "2020-01-04") }}
) a
INNER JOIN (
SELECT
Expand All @@ -237,15 +241,15 @@ integration_test:
metrics: ["every_two_days_bookers"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2020-01-04", "2020-01-04"]
time_constraint: ["2020-01-03", "2020-01-03"]
check_query: |
SELECT
COUNT (DISTINCT(b.guest_id)) as every_two_days_bookers
, a.ds AS metric_time__day
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2020-01-04", "2020-01-04") }}
WHERE {{ render_time_constraint("ds", "2020-01-02", "2020-01-03") }}
) a
INNER JOIN (
SELECT
Expand All @@ -254,6 +258,7 @@ integration_test:
FROM {{ source_schema }}.fct_bookings
) b
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.DAY) }}
WHERE {{ render_time_constraint("a.ds", "2020-01-03", "2020-01-03") }}
GROUP BY a.ds
ORDER BY a.ds
---
Expand All @@ -264,15 +269,15 @@ integration_test:
metrics: ["revenue_mtd"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2020-01-01", "2021-01-05"]
time_constraint: ["2021-01-04", "2021-01-05"]
check_query: |
SELECT
SUM(b.txn_revenue) as revenue_mtd
, a.ds AS metric_time__day
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2020-01-01", "2021-01-05") }}
WHERE {{ render_time_constraint("ds", "2021-01-01", "2021-01-05") }}
) a
INNER JOIN (
SELECT
Expand All @@ -281,6 +286,7 @@ integration_test:
FROM {{ source_schema }}.fct_revenue
) b
ON b.ds <= a.ds AND b.ds >= {{ render_date_trunc("a.ds", TimeGranularity.MONTH) }}
WHERE {{ render_time_constraint("a.ds", "2021-01-04", "2021-01-05") }}
GROUP BY a.ds
ORDER BY a.ds
---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ FROM (
WHERE subq_4.ds BETWEEN '2020-01-01' AND '2020-01-01'
) subq_3
INNER JOIN (
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
subq_1.ds__day
, subq_1.ds__week
Expand Down Expand Up @@ -173,7 +173,7 @@ FROM (
FROM ***************************.fct_revenue revenue_src_10006
) subq_0
) subq_1
WHERE subq_1.metric_time__day BETWEEN '2019-12-01' AND '2020-01-01'
WHERE subq_1.metric_time__day BETWEEN '2019-11-01' AND '2020-01-01'
) subq_2
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ FROM (
INNER JOIN (
-- Read Elements From Semantic Model 'revenue'
-- Metric Time Dimension 'ds'
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
DATE_TRUNC(created_at, day) AS metric_time__day
, DATE_TRUNC(created_at, month) AS metric_time__month
, revenue AS txn_revenue
FROM ***************************.fct_revenue revenue_src_10006
WHERE DATE_TRUNC(created_at, day) BETWEEN '2019-12-01' AND '2020-01-01'
WHERE DATE_TRUNC(created_at, day) BETWEEN '2019-11-01' AND '2020-01-01'
) subq_11
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ FROM (
WHERE subq_4.ds BETWEEN '2020-01-01' AND '2020-01-01'
) subq_3
INNER JOIN (
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
subq_1.ds__day
, subq_1.ds__week
Expand Down Expand Up @@ -173,7 +173,7 @@ FROM (
FROM ***************************.fct_revenue revenue_src_10006
) subq_0
) subq_1
WHERE subq_1.metric_time__day BETWEEN '2019-12-01' AND '2020-01-01'
WHERE subq_1.metric_time__day BETWEEN '2019-11-01' AND '2020-01-01'
) subq_2
ON
(
Expand Down
Loading

0 comments on commit 8e69cda

Please sign in to comment.