Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix incorrect cumulative metric output when start-time and end-time are used #871

Merged
merged 2 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231114-171137.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix error in cumulative metric output when start-time and end-time are specified
time: 2023-11-14T17:11:37.462241-08:00
custom:
Author: tlento
Issue: "869"
4 changes: 2 additions & 2 deletions metricflow/filters/time_constraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _adjust_time_constraint_start_by_window(
time_granularity: TimeGranularity,
time_unit_count: int,
) -> TimeRangeConstraint:
"""Moves the start of the time constraint back by 1 window.
"""Moves the start of the time constraint back by <time_unit_count> windows.

if the metric is weekly-active-users (ie window = 1 week) it moves time_constraint.start one week earlier
"""
Expand All @@ -80,7 +80,7 @@ def adjust_time_constraint_for_cumulative_metric(
) -> TimeRangeConstraint:
"""Given a time constraint for the overall query, adjust it to cover the time range for this metric."""
if granularity is not None:
return self._adjust_time_constraint_start_by_window(granularity, count - 1)
return self._adjust_time_constraint_start_by_window(granularity, count)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the actual fix.


# if no window is specified we want to accumulate from the beginning of time
return TimeRangeConstraint(
Expand Down
154 changes: 154 additions & 0 deletions metricflow/test/integration/query_output/test_cumulative_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
from __future__ import annotations

import datetime

import pytest
from _pytest.fixtures import FixtureRequest

from metricflow.engine.metricflow_engine import MetricFlowQueryRequest
from metricflow.protocols.sql_client import SqlClient
from metricflow.test.fixtures.setup_fixtures import MetricFlowTestSessionState
from metricflow.test.integration.conftest import IntegrationTestHelpers
from metricflow.test.snapshot_utils import assert_object_snapshot_equal


@pytest.mark.sql_engine_snapshot
def test_simple_cumulative_metric(
request: FixtureRequest,
mf_test_session_state: MetricFlowTestSessionState,
sql_client: SqlClient,
it_helpers: IntegrationTestHelpers,
) -> None:
"""Tests a query of a cumulative metric with a monthly window and a time constraint adjustment."""
query_result = it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metric_names=["trailing_2_months_revenue"],
group_by_names=["metric_time"],
order_by_names=["metric_time"],
time_constraint_start=datetime.datetime(2020, 2, 1),
time_constraint_end=datetime.datetime(2020, 4, 30),
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_object_snapshot_equal(
request=request,
mf_test_session_state=mf_test_session_state,
obj_id="query_output",
obj=query_result.result_df.to_string(),
sql_client=sql_client,
)


@pytest.mark.sql_engine_snapshot
def test_multiple_cumulative_metrics(
request: FixtureRequest,
mf_test_session_state: MetricFlowTestSessionState,
sql_client: SqlClient,
it_helpers: IntegrationTestHelpers,
) -> None:
"""Tests a query with multiple cumulative metrics to ensure date selections align."""
query_result = it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metric_names=["revenue_all_time", "trailing_2_months_revenue"],
group_by_names=["metric_time"],
order_by_names=["metric_time"],
time_constraint_start=datetime.datetime(2020, 3, 31),
time_constraint_end=datetime.datetime(2020, 5, 31),
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_object_snapshot_equal(
request=request,
mf_test_session_state=mf_test_session_state,
obj_id="query_output",
obj=query_result.result_df.to_string(),
sql_client=sql_client,
)


@pytest.mark.sql_engine_snapshot
def test_non_additive_cumulative_metric(
request: FixtureRequest,
mf_test_session_state: MetricFlowTestSessionState,
sql_client: SqlClient,
it_helpers: IntegrationTestHelpers,
) -> None:
"""Tests a query with a non-additive cumulative metric to ensure the non-additive constraint is applied."""
query_result = it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metric_names=["every_two_days_bookers"],
group_by_names=["metric_time"],
order_by_names=["metric_time"],
time_constraint_start=datetime.datetime(2019, 12, 31),
time_constraint_end=datetime.datetime(2020, 1, 3),
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_object_snapshot_equal(
request=request,
mf_test_session_state=mf_test_session_state,
obj_id="query_output",
obj=query_result.result_df.to_string(),
sql_client=sql_client,
)


@pytest.mark.sql_engine_snapshot
def test_grain_to_date_cumulative_metric(
request: FixtureRequest,
mf_test_session_state: MetricFlowTestSessionState,
sql_client: SqlClient,
it_helpers: IntegrationTestHelpers,
) -> None:
"""Tests a month to date cumulative metric with a constraint to ensure all necessary input data is included."""
query_result = it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metric_names=["revenue_mtd"],
group_by_names=["metric_time"],
order_by_names=["metric_time"],
time_constraint_start=datetime.datetime(2021, 1, 3),
time_constraint_end=datetime.datetime(2021, 1, 6),
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_object_snapshot_equal(
request=request,
mf_test_session_state=mf_test_session_state,
obj_id="query_output",
obj=query_result.result_df.to_string(),
sql_client=sql_client,
)


@pytest.mark.sql_engine_snapshot
def test_cumulative_metric_with_non_adjustable_filter(
request: FixtureRequest,
mf_test_session_state: MetricFlowTestSessionState,
sql_client: SqlClient,
it_helpers: IntegrationTestHelpers,
) -> None:
"""Tests a cumulative metric with a filter that cannot be adjusted to ensure all data is included."""
query_result = it_helpers.mf_engine.query(
MetricFlowQueryRequest.create_with_random_request_id(
metric_names=["trailing_2_months_revenue"],
group_by_names=["metric_time"],
order_by_names=["metric_time"],
where_constraint=(
"{{ TimeDimension('metric_time', 'day') }} = '2020-03-15' or "
"{{ TimeDimension('metric_time', 'day') }} = '2020-04-30'"
),
)
)
assert query_result.result_df is not None, "Unexpected empty result."

assert_object_snapshot_equal(
request=request,
mf_test_session_state=mf_test_session_state,
obj_id="query_output",
obj=query_result.result_df.to_string(),
sql_client=sql_client,
)
32 changes: 19 additions & 13 deletions metricflow/test/integration/test_cases/itest_cumulative_metric.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ integration_test:
metrics: ["trailing_2_months_revenue"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2020-01-05", "2021-01-04"]
time_constraint: ["2020-03-05", "2021-01-04"]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I left these tests in place for a couple of reasons, but one is to illustrate how the constraint adjustments need to be applied here vs in the where clause.

A number of these were producing correct results on inspection of input data only because the input data didn't start until within the too-short filter window.

check_query: |
SELECT
SUM(b.txn_revenue) as trailing_2_months_revenue
Expand All @@ -23,6 +23,7 @@ integration_test:
FROM {{ source_schema }}.fct_revenue
) b
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }}
WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }}
GROUP BY a.ds
ORDER BY a.ds
---
Expand All @@ -33,7 +34,7 @@ integration_test:
metrics: ["trailing_2_months_revenue"]
group_bys: ["metric_time", "user__home_state_latest"]
order_bys: ["metric_time", "user__home_state_latest"]
time_constraint: ["2020-01-05", "2021-01-04"]
time_constraint: ["2020-03-05", "2021-01-04"]
check_query: |
SELECT
SUM(revenue) as trailing_2_months_revenue
Expand All @@ -55,6 +56,7 @@ integration_test:
GROUP BY m.created_at, m.revenue, u.home_state_latest
) b
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }}
WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }}
GROUP BY a.ds, user__home_state_latest
ORDER by a.ds, user__home_state_latest
---
Expand All @@ -65,15 +67,15 @@ integration_test:
metrics: ["revenue_all_time"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2020-01-01", "2021-01-05"]
time_constraint: ["2020-03-01", "2021-01-05"]
check_query: |
SELECT
SUM(revenue) AS revenue_all_time
, a.ds AS metric_time__day
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2020-01-01", "2021-01-05") }}
WHERE {{ render_time_constraint("ds", "2020-03-01", "2021-01-05") }}
) a
INNER JOIN (
SELECT
Expand Down Expand Up @@ -102,7 +104,7 @@ integration_test:
metrics: ["revenue_all_time", "trailing_2_months_revenue"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2019-12-31", "2021-01-05"]
time_constraint: ["2020-03-31", "2021-01-05"]
check_query: |
SELECT revenue_all_time, trailing_2_months_revenue, a.ds AS metric_time__day
FROM (
Expand All @@ -116,7 +118,7 @@ integration_test:
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2019-12-31", "2021-01-05") }}
WHERE {{ render_time_constraint("ds", "2020-03-31", "2021-01-05") }}
) a
INNER JOIN (
SELECT
Expand All @@ -135,7 +137,7 @@ integration_test:
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2019-12-31", "2021-01-05") }}
WHERE {{ render_time_constraint("ds", "2020-01-31", "2021-01-05") }}
) a
INNER JOIN (
SELECT
Expand All @@ -147,6 +149,7 @@ integration_test:
GROUP BY a.ds
) b
ON a.ds = b.ds
WHERE {{ render_time_constraint("a.ds", "2020-03-31", "2021-01-05") }}
ORDER BY a.ds
---
integration_test:
Expand All @@ -156,7 +159,7 @@ integration_test:
metrics: ["trailing_2_months_revenue"]
group_bys: ["metric_time__day"]
order_bys: ["metric_time__day"]
time_constraint: ["2020-01-05", "2021-01-04"]
time_constraint: ["2020-03-05", "2021-01-04"]
check_query: |
SELECT
SUM(b.txn_revenue) as trailing_2_months_revenue
Expand All @@ -173,6 +176,7 @@ integration_test:
FROM {{ source_schema }}.fct_revenue
) b
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }}
WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }}
GROUP BY a.ds
ORDER BY a.ds
---
Expand Down Expand Up @@ -218,7 +222,7 @@ integration_test:
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2019-12-31", "2020-01-04") }}
WHERE {{ render_time_constraint("ds", "2019-12-30", "2020-01-04") }}
) a
INNER JOIN (
SELECT
Expand All @@ -237,15 +241,15 @@ integration_test:
metrics: ["every_two_days_bookers"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2020-01-04", "2020-01-04"]
time_constraint: ["2020-01-03", "2020-01-03"]
check_query: |
SELECT
COUNT (DISTINCT(b.guest_id)) as every_two_days_bookers
, a.ds AS metric_time__day
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2020-01-04", "2020-01-04") }}
WHERE {{ render_time_constraint("ds", "2020-01-02", "2020-01-03") }}
) a
INNER JOIN (
SELECT
Expand All @@ -254,6 +258,7 @@ integration_test:
FROM {{ source_schema }}.fct_bookings
) b
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.DAY) }}
WHERE {{ render_time_constraint("a.ds", "2020-01-03", "2020-01-03") }}
GROUP BY a.ds
ORDER BY a.ds
---
Expand All @@ -264,15 +269,15 @@ integration_test:
metrics: ["revenue_mtd"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2020-01-01", "2021-01-05"]
time_constraint: ["2021-01-04", "2021-01-05"]
check_query: |
SELECT
SUM(b.txn_revenue) as revenue_mtd
, a.ds AS metric_time__day
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2020-01-01", "2021-01-05") }}
WHERE {{ render_time_constraint("ds", "2021-01-01", "2021-01-05") }}
) a
INNER JOIN (
SELECT
Expand All @@ -281,6 +286,7 @@ integration_test:
FROM {{ source_schema }}.fct_revenue
) b
ON b.ds <= a.ds AND b.ds >= {{ render_date_trunc("a.ds", TimeGranularity.MONTH) }}
WHERE {{ render_time_constraint("a.ds", "2021-01-04", "2021-01-05") }}
GROUP BY a.ds
ORDER BY a.ds
---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ FROM (
WHERE subq_4.ds BETWEEN '2020-01-01' AND '2020-01-01'
) subq_3
INNER JOIN (
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
subq_1.ds__day
, subq_1.ds__week
Expand Down Expand Up @@ -173,7 +173,7 @@ FROM (
FROM ***************************.fct_revenue revenue_src_10006
) subq_0
) subq_1
WHERE subq_1.metric_time__day BETWEEN '2019-12-01' AND '2020-01-01'
WHERE subq_1.metric_time__day BETWEEN '2019-11-01' AND '2020-01-01'
) subq_2
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ FROM (
INNER JOIN (
-- Read Elements From Semantic Model 'revenue'
-- Metric Time Dimension 'ds'
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
DATE_TRUNC(created_at, day) AS metric_time__day
, DATE_TRUNC(created_at, month) AS metric_time__month
, revenue AS txn_revenue
FROM ***************************.fct_revenue revenue_src_10006
WHERE DATE_TRUNC(created_at, day) BETWEEN '2019-12-01' AND '2020-01-01'
WHERE DATE_TRUNC(created_at, day) BETWEEN '2019-11-01' AND '2020-01-01'
) subq_11
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ FROM (
WHERE subq_4.ds BETWEEN '2020-01-01' AND '2020-01-01'
) subq_3
INNER JOIN (
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
subq_1.ds__day
, subq_1.ds__week
Expand Down Expand Up @@ -173,7 +173,7 @@ FROM (
FROM ***************************.fct_revenue revenue_src_10006
) subq_0
) subq_1
WHERE subq_1.metric_time__day BETWEEN '2019-12-01' AND '2020-01-01'
WHERE subq_1.metric_time__day BETWEEN '2019-11-01' AND '2020-01-01'
) subq_2
ON
(
Expand Down
Loading