-
Notifications
You must be signed in to change notification settings - Fork 99
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix incorrect cumulative metric output when start-time and end-time are used #871
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
kind: Fixes | ||
body: Fix error in cumulative metric output when start-time and end-time are specified | ||
time: 2023-11-14T17:11:37.462241-08:00 | ||
custom: | ||
Author: tlento | ||
Issue: "869" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
from __future__ import annotations | ||
|
||
import datetime | ||
|
||
import pytest | ||
from _pytest.fixtures import FixtureRequest | ||
|
||
from metricflow.engine.metricflow_engine import MetricFlowQueryRequest | ||
from metricflow.protocols.sql_client import SqlClient | ||
from metricflow.test.fixtures.setup_fixtures import MetricFlowTestSessionState | ||
from metricflow.test.integration.conftest import IntegrationTestHelpers | ||
from metricflow.test.snapshot_utils import assert_object_snapshot_equal | ||
|
||
|
||
@pytest.mark.sql_engine_snapshot | ||
def test_simple_cumulative_metric( | ||
request: FixtureRequest, | ||
mf_test_session_state: MetricFlowTestSessionState, | ||
sql_client: SqlClient, | ||
it_helpers: IntegrationTestHelpers, | ||
) -> None: | ||
"""Tests a query of a cumulative metric with a monthly window and a time constraint adjustment.""" | ||
query_result = it_helpers.mf_engine.query( | ||
MetricFlowQueryRequest.create_with_random_request_id( | ||
metric_names=["trailing_2_months_revenue"], | ||
group_by_names=["metric_time"], | ||
order_by_names=["metric_time"], | ||
time_constraint_start=datetime.datetime(2020, 2, 1), | ||
time_constraint_end=datetime.datetime(2020, 4, 30), | ||
) | ||
) | ||
assert query_result.result_df is not None, "Unexpected empty result." | ||
|
||
assert_object_snapshot_equal( | ||
request=request, | ||
mf_test_session_state=mf_test_session_state, | ||
obj_id="query_output", | ||
obj=query_result.result_df.to_string(), | ||
sql_client=sql_client, | ||
) | ||
|
||
|
||
@pytest.mark.sql_engine_snapshot | ||
def test_multiple_cumulative_metrics( | ||
request: FixtureRequest, | ||
mf_test_session_state: MetricFlowTestSessionState, | ||
sql_client: SqlClient, | ||
it_helpers: IntegrationTestHelpers, | ||
) -> None: | ||
"""Tests a query with multiple cumulative metrics to ensure date selections align.""" | ||
query_result = it_helpers.mf_engine.query( | ||
MetricFlowQueryRequest.create_with_random_request_id( | ||
metric_names=["revenue_all_time", "trailing_2_months_revenue"], | ||
group_by_names=["metric_time"], | ||
order_by_names=["metric_time"], | ||
time_constraint_start=datetime.datetime(2020, 3, 31), | ||
time_constraint_end=datetime.datetime(2020, 5, 31), | ||
) | ||
) | ||
assert query_result.result_df is not None, "Unexpected empty result." | ||
|
||
assert_object_snapshot_equal( | ||
request=request, | ||
mf_test_session_state=mf_test_session_state, | ||
obj_id="query_output", | ||
obj=query_result.result_df.to_string(), | ||
sql_client=sql_client, | ||
) | ||
|
||
|
||
@pytest.mark.sql_engine_snapshot | ||
def test_non_additive_cumulative_metric( | ||
request: FixtureRequest, | ||
mf_test_session_state: MetricFlowTestSessionState, | ||
sql_client: SqlClient, | ||
it_helpers: IntegrationTestHelpers, | ||
) -> None: | ||
"""Tests a query with a non-additive cumulative metric to ensure the non-additive constraint is applied.""" | ||
query_result = it_helpers.mf_engine.query( | ||
MetricFlowQueryRequest.create_with_random_request_id( | ||
metric_names=["every_two_days_bookers"], | ||
group_by_names=["metric_time"], | ||
order_by_names=["metric_time"], | ||
time_constraint_start=datetime.datetime(2019, 12, 31), | ||
time_constraint_end=datetime.datetime(2020, 1, 3), | ||
) | ||
) | ||
assert query_result.result_df is not None, "Unexpected empty result." | ||
|
||
assert_object_snapshot_equal( | ||
request=request, | ||
mf_test_session_state=mf_test_session_state, | ||
obj_id="query_output", | ||
obj=query_result.result_df.to_string(), | ||
sql_client=sql_client, | ||
) | ||
|
||
|
||
@pytest.mark.sql_engine_snapshot | ||
def test_grain_to_date_cumulative_metric( | ||
request: FixtureRequest, | ||
mf_test_session_state: MetricFlowTestSessionState, | ||
sql_client: SqlClient, | ||
it_helpers: IntegrationTestHelpers, | ||
) -> None: | ||
"""Tests a month to date cumulative metric with a constraint to ensure all necessary input data is included.""" | ||
query_result = it_helpers.mf_engine.query( | ||
MetricFlowQueryRequest.create_with_random_request_id( | ||
metric_names=["revenue_mtd"], | ||
group_by_names=["metric_time"], | ||
order_by_names=["metric_time"], | ||
time_constraint_start=datetime.datetime(2021, 1, 3), | ||
time_constraint_end=datetime.datetime(2021, 1, 6), | ||
) | ||
) | ||
assert query_result.result_df is not None, "Unexpected empty result." | ||
|
||
assert_object_snapshot_equal( | ||
request=request, | ||
mf_test_session_state=mf_test_session_state, | ||
obj_id="query_output", | ||
obj=query_result.result_df.to_string(), | ||
sql_client=sql_client, | ||
) | ||
|
||
|
||
@pytest.mark.sql_engine_snapshot | ||
def test_cumulative_metric_with_non_adjustable_filter( | ||
request: FixtureRequest, | ||
mf_test_session_state: MetricFlowTestSessionState, | ||
sql_client: SqlClient, | ||
it_helpers: IntegrationTestHelpers, | ||
) -> None: | ||
"""Tests a cumulative metric with a filter that cannot be adjusted to ensure all data is included.""" | ||
query_result = it_helpers.mf_engine.query( | ||
MetricFlowQueryRequest.create_with_random_request_id( | ||
metric_names=["trailing_2_months_revenue"], | ||
group_by_names=["metric_time"], | ||
order_by_names=["metric_time"], | ||
where_constraint=( | ||
"{{ TimeDimension('metric_time', 'day') }} = '2020-03-15' or " | ||
"{{ TimeDimension('metric_time', 'day') }} = '2020-04-30'" | ||
), | ||
) | ||
) | ||
assert query_result.result_df is not None, "Unexpected empty result." | ||
|
||
assert_object_snapshot_equal( | ||
request=request, | ||
mf_test_session_state=mf_test_session_state, | ||
obj_id="query_output", | ||
obj=query_result.result_df.to_string(), | ||
sql_client=sql_client, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ integration_test: | |
metrics: ["trailing_2_months_revenue"] | ||
group_bys: ["metric_time"] | ||
order_bys: ["metric_time"] | ||
time_constraint: ["2020-01-05", "2021-01-04"] | ||
time_constraint: ["2020-03-05", "2021-01-04"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I left these tests in place for a couple of reasons, but one is to illustrate how the constraint adjustments need to be applied here vs in the where clause. A number of these were producing correct results on inspection of input data only because the input data didn't start until within the too-short filter window. |
||
check_query: | | ||
SELECT | ||
SUM(b.txn_revenue) as trailing_2_months_revenue | ||
|
@@ -23,6 +23,7 @@ integration_test: | |
FROM {{ source_schema }}.fct_revenue | ||
) b | ||
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }} | ||
WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }} | ||
GROUP BY a.ds | ||
ORDER BY a.ds | ||
--- | ||
|
@@ -33,7 +34,7 @@ integration_test: | |
metrics: ["trailing_2_months_revenue"] | ||
group_bys: ["metric_time", "user__home_state_latest"] | ||
order_bys: ["metric_time", "user__home_state_latest"] | ||
time_constraint: ["2020-01-05", "2021-01-04"] | ||
time_constraint: ["2020-03-05", "2021-01-04"] | ||
check_query: | | ||
SELECT | ||
SUM(revenue) as trailing_2_months_revenue | ||
|
@@ -55,6 +56,7 @@ integration_test: | |
GROUP BY m.created_at, m.revenue, u.home_state_latest | ||
) b | ||
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }} | ||
WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }} | ||
GROUP BY a.ds, user__home_state_latest | ||
ORDER by a.ds, user__home_state_latest | ||
--- | ||
|
@@ -65,15 +67,15 @@ integration_test: | |
metrics: ["revenue_all_time"] | ||
group_bys: ["metric_time"] | ||
order_bys: ["metric_time"] | ||
time_constraint: ["2020-01-01", "2021-01-05"] | ||
time_constraint: ["2020-03-01", "2021-01-05"] | ||
check_query: | | ||
SELECT | ||
SUM(revenue) AS revenue_all_time | ||
, a.ds AS metric_time__day | ||
FROM ( | ||
SELECT ds | ||
FROM {{ mf_time_spine_source }} | ||
WHERE {{ render_time_constraint("ds", "2020-01-01", "2021-01-05") }} | ||
WHERE {{ render_time_constraint("ds", "2020-03-01", "2021-01-05") }} | ||
) a | ||
INNER JOIN ( | ||
SELECT | ||
|
@@ -102,7 +104,7 @@ integration_test: | |
metrics: ["revenue_all_time", "trailing_2_months_revenue"] | ||
group_bys: ["metric_time"] | ||
order_bys: ["metric_time"] | ||
time_constraint: ["2019-12-31", "2021-01-05"] | ||
time_constraint: ["2020-03-31", "2021-01-05"] | ||
check_query: | | ||
SELECT revenue_all_time, trailing_2_months_revenue, a.ds AS metric_time__day | ||
FROM ( | ||
|
@@ -116,7 +118,7 @@ integration_test: | |
FROM ( | ||
SELECT ds | ||
FROM {{ mf_time_spine_source }} | ||
WHERE {{ render_time_constraint("ds", "2019-12-31", "2021-01-05") }} | ||
WHERE {{ render_time_constraint("ds", "2020-03-31", "2021-01-05") }} | ||
) a | ||
INNER JOIN ( | ||
SELECT | ||
|
@@ -135,7 +137,7 @@ integration_test: | |
FROM ( | ||
SELECT ds | ||
FROM {{ mf_time_spine_source }} | ||
WHERE {{ render_time_constraint("ds", "2019-12-31", "2021-01-05") }} | ||
WHERE {{ render_time_constraint("ds", "2020-01-31", "2021-01-05") }} | ||
) a | ||
INNER JOIN ( | ||
SELECT | ||
|
@@ -147,6 +149,7 @@ integration_test: | |
GROUP BY a.ds | ||
) b | ||
ON a.ds = b.ds | ||
WHERE {{ render_time_constraint("a.ds", "2020-03-31", "2021-01-05") }} | ||
ORDER BY a.ds | ||
--- | ||
integration_test: | ||
|
@@ -156,7 +159,7 @@ integration_test: | |
metrics: ["trailing_2_months_revenue"] | ||
group_bys: ["metric_time__day"] | ||
order_bys: ["metric_time__day"] | ||
time_constraint: ["2020-01-05", "2021-01-04"] | ||
time_constraint: ["2020-03-05", "2021-01-04"] | ||
check_query: | | ||
SELECT | ||
SUM(b.txn_revenue) as trailing_2_months_revenue | ||
|
@@ -173,6 +176,7 @@ integration_test: | |
FROM {{ source_schema }}.fct_revenue | ||
) b | ||
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }} | ||
WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }} | ||
GROUP BY a.ds | ||
ORDER BY a.ds | ||
--- | ||
|
@@ -218,7 +222,7 @@ integration_test: | |
FROM ( | ||
SELECT ds | ||
FROM {{ mf_time_spine_source }} | ||
WHERE {{ render_time_constraint("ds", "2019-12-31", "2020-01-04") }} | ||
WHERE {{ render_time_constraint("ds", "2019-12-30", "2020-01-04") }} | ||
) a | ||
INNER JOIN ( | ||
SELECT | ||
|
@@ -237,15 +241,15 @@ integration_test: | |
metrics: ["every_two_days_bookers"] | ||
group_bys: ["metric_time"] | ||
order_bys: ["metric_time"] | ||
time_constraint: ["2020-01-04", "2020-01-04"] | ||
time_constraint: ["2020-01-03", "2020-01-03"] | ||
check_query: | | ||
SELECT | ||
COUNT (DISTINCT(b.guest_id)) as every_two_days_bookers | ||
, a.ds AS metric_time__day | ||
FROM ( | ||
SELECT ds | ||
FROM {{ mf_time_spine_source }} | ||
WHERE {{ render_time_constraint("ds", "2020-01-04", "2020-01-04") }} | ||
WHERE {{ render_time_constraint("ds", "2020-01-02", "2020-01-03") }} | ||
) a | ||
INNER JOIN ( | ||
SELECT | ||
|
@@ -254,6 +258,7 @@ integration_test: | |
FROM {{ source_schema }}.fct_bookings | ||
) b | ||
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.DAY) }} | ||
WHERE {{ render_time_constraint("a.ds", "2020-01-03", "2020-01-03") }} | ||
GROUP BY a.ds | ||
ORDER BY a.ds | ||
--- | ||
|
@@ -264,15 +269,15 @@ integration_test: | |
metrics: ["revenue_mtd"] | ||
group_bys: ["metric_time"] | ||
order_bys: ["metric_time"] | ||
time_constraint: ["2020-01-01", "2021-01-05"] | ||
time_constraint: ["2021-01-04", "2021-01-05"] | ||
check_query: | | ||
SELECT | ||
SUM(b.txn_revenue) as revenue_mtd | ||
, a.ds AS metric_time__day | ||
FROM ( | ||
SELECT ds | ||
FROM {{ mf_time_spine_source }} | ||
WHERE {{ render_time_constraint("ds", "2020-01-01", "2021-01-05") }} | ||
WHERE {{ render_time_constraint("ds", "2021-01-01", "2021-01-05") }} | ||
) a | ||
INNER JOIN ( | ||
SELECT | ||
|
@@ -281,6 +286,7 @@ integration_test: | |
FROM {{ source_schema }}.fct_revenue | ||
) b | ||
ON b.ds <= a.ds AND b.ds >= {{ render_date_trunc("a.ds", TimeGranularity.MONTH) }} | ||
WHERE {{ render_time_constraint("a.ds", "2021-01-04", "2021-01-05") }} | ||
GROUP BY a.ds | ||
ORDER BY a.ds | ||
--- | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the actual fix.