Skip to content

Commit

Permalink
Fix incorrect time constraint adjustment for cumulative metrics
Browse files Browse the repository at this point in the history
When users submit CLI queries with the --start-time and --end-time
parameters, MetricFlow will automatically adjust the time constraint
window to include all relevant cumulative metric input.

Unfortunately, we got over-zealous with fencepost adjustments and
introduced an off-by-one error that meant cumulative metric
queries would only be correct when the specified granularity was
DAILY.

We never caught this issue because the error in the rendered SQL is
subtle, and all of our test input data on the integration tests
happened to fit within the artificially shortened constraint windows
in all of the cases where we were testing non-daily granularity
queries.

This change updates the integration test configurations to ensure
boundary conditions will be caught in the future. In addition, the
fix alters the values of the query output snapshot tests, which,
when compared with the relevant input test data, will reveal the
nature of the original bug and the effects of this fix.
  • Loading branch information
tlento committed Nov 15, 2023
1 parent 9ffa83b commit e628d6e
Show file tree
Hide file tree
Showing 27 changed files with 81 additions and 69 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231114-171137.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix error in cumulative metric output when start-time and end-time are specified
time: 2023-11-14T17:11:37.462241-08:00
custom:
Author: tlento
Issue: "869"
4 changes: 2 additions & 2 deletions metricflow/filters/time_constraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _adjust_time_constraint_start_by_window(
time_granularity: TimeGranularity,
time_unit_count: int,
) -> TimeRangeConstraint:
"""Moves the start of the time constraint back by 1 window.
"""Moves the start of the time constraint back by <time_unit_count> windows.
if the metric is weekly-active-users (ie window = 1 week) it moves time_constraint.start one week earlier
"""
Expand All @@ -80,7 +80,7 @@ def adjust_time_constraint_for_cumulative_metric(
) -> TimeRangeConstraint:
"""Given a time constraint for the overall query, adjust it to cover the time range for this metric."""
if granularity is not None:
return self._adjust_time_constraint_start_by_window(granularity, count - 1)
return self._adjust_time_constraint_start_by_window(granularity, count)

# if no window is specified we want to accumulate from the beginning of time
return TimeRangeConstraint(
Expand Down
32 changes: 19 additions & 13 deletions metricflow/test/integration/test_cases/itest_cumulative_metric.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ integration_test:
metrics: ["trailing_2_months_revenue"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2020-01-05", "2021-01-04"]
time_constraint: ["2020-03-05", "2021-01-04"]
check_query: |
SELECT
SUM(b.txn_revenue) as trailing_2_months_revenue
Expand All @@ -23,6 +23,7 @@ integration_test:
FROM {{ source_schema }}.fct_revenue
) b
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }}
WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }}
GROUP BY a.ds
ORDER BY a.ds
---
Expand All @@ -33,7 +34,7 @@ integration_test:
metrics: ["trailing_2_months_revenue"]
group_bys: ["metric_time", "user__home_state_latest"]
order_bys: ["metric_time", "user__home_state_latest"]
time_constraint: ["2020-01-05", "2021-01-04"]
time_constraint: ["2020-03-05", "2021-01-04"]
check_query: |
SELECT
SUM(revenue) as trailing_2_months_revenue
Expand All @@ -55,6 +56,7 @@ integration_test:
GROUP BY m.created_at, m.revenue, u.home_state_latest
) b
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }}
WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }}
GROUP BY a.ds, user__home_state_latest
ORDER by a.ds, user__home_state_latest
---
Expand All @@ -65,15 +67,15 @@ integration_test:
metrics: ["revenue_all_time"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2020-01-01", "2021-01-05"]
time_constraint: ["2020-03-01", "2021-01-05"]
check_query: |
SELECT
SUM(revenue) AS revenue_all_time
, a.ds AS metric_time__day
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2020-01-01", "2021-01-05") }}
WHERE {{ render_time_constraint("ds", "2020-03-01", "2021-01-05") }}
) a
INNER JOIN (
SELECT
Expand Down Expand Up @@ -102,7 +104,7 @@ integration_test:
metrics: ["revenue_all_time", "trailing_2_months_revenue"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2019-12-31", "2021-01-05"]
time_constraint: ["2020-03-31", "2021-01-05"]
check_query: |
SELECT revenue_all_time, trailing_2_months_revenue, a.ds AS metric_time__day
FROM (
Expand All @@ -116,7 +118,7 @@ integration_test:
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2019-12-31", "2021-01-05") }}
WHERE {{ render_time_constraint("ds", "2020-03-31", "2021-01-05") }}
) a
INNER JOIN (
SELECT
Expand All @@ -135,7 +137,7 @@ integration_test:
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2019-12-31", "2021-01-05") }}
WHERE {{ render_time_constraint("ds", "2020-01-31", "2021-01-05") }}
) a
INNER JOIN (
SELECT
Expand All @@ -147,6 +149,7 @@ integration_test:
GROUP BY a.ds
) b
ON a.ds = b.ds
WHERE {{ render_time_constraint("a.ds", "2020-03-31", "2021-01-05") }}
ORDER BY a.ds
---
integration_test:
Expand All @@ -156,7 +159,7 @@ integration_test:
metrics: ["trailing_2_months_revenue"]
group_bys: ["metric_time__day"]
order_bys: ["metric_time__day"]
time_constraint: ["2020-01-05", "2021-01-04"]
time_constraint: ["2020-03-05", "2021-01-04"]
check_query: |
SELECT
SUM(b.txn_revenue) as trailing_2_months_revenue
Expand All @@ -173,6 +176,7 @@ integration_test:
FROM {{ source_schema }}.fct_revenue
) b
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }}
WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }}
GROUP BY a.ds
ORDER BY a.ds
---
Expand Down Expand Up @@ -218,7 +222,7 @@ integration_test:
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2019-12-31", "2020-01-04") }}
WHERE {{ render_time_constraint("ds", "2019-12-30", "2020-01-04") }}
) a
INNER JOIN (
SELECT
Expand All @@ -237,15 +241,15 @@ integration_test:
metrics: ["every_two_days_bookers"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2020-01-04", "2020-01-04"]
time_constraint: ["2020-01-03", "2020-01-03"]
check_query: |
SELECT
COUNT (DISTINCT(b.guest_id)) as every_two_days_bookers
, a.ds AS metric_time__day
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2020-01-04", "2020-01-04") }}
WHERE {{ render_time_constraint("ds", "2020-01-02", "2020-01-03") }}
) a
INNER JOIN (
SELECT
Expand All @@ -254,6 +258,7 @@ integration_test:
FROM {{ source_schema }}.fct_bookings
) b
ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.DAY) }}
WHERE {{ render_time_constraint("a.ds", "2020-01-03", "2020-01-03") }}
GROUP BY a.ds
ORDER BY a.ds
---
Expand All @@ -264,15 +269,15 @@ integration_test:
metrics: ["revenue_mtd"]
group_bys: ["metric_time"]
order_bys: ["metric_time"]
time_constraint: ["2020-01-01", "2021-01-05"]
time_constraint: ["2021-01-04", "2021-01-05"]
check_query: |
SELECT
SUM(b.txn_revenue) as revenue_mtd
, a.ds AS metric_time__day
FROM (
SELECT ds
FROM {{ mf_time_spine_source }}
WHERE {{ render_time_constraint("ds", "2020-01-01", "2021-01-05") }}
WHERE {{ render_time_constraint("ds", "2021-01-01", "2021-01-05") }}
) a
INNER JOIN (
SELECT
Expand All @@ -281,6 +286,7 @@ integration_test:
FROM {{ source_schema }}.fct_revenue
) b
ON b.ds <= a.ds AND b.ds >= {{ render_date_trunc("a.ds", TimeGranularity.MONTH) }}
WHERE {{ render_time_constraint("a.ds", "2021-01-04", "2021-01-05") }}
GROUP BY a.ds
ORDER BY a.ds
---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ FROM (
WHERE subq_4.ds BETWEEN '2020-01-01' AND '2020-01-01'
) subq_3
INNER JOIN (
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
subq_1.ds__day
, subq_1.ds__week
Expand Down Expand Up @@ -173,7 +173,7 @@ FROM (
FROM ***************************.fct_revenue revenue_src_10006
) subq_0
) subq_1
WHERE subq_1.metric_time__day BETWEEN '2019-12-01' AND '2020-01-01'
WHERE subq_1.metric_time__day BETWEEN '2019-11-01' AND '2020-01-01'
) subq_2
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ FROM (
INNER JOIN (
-- Read Elements From Semantic Model 'revenue'
-- Metric Time Dimension 'ds'
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
DATE_TRUNC(created_at, day) AS metric_time__day
, DATE_TRUNC(created_at, month) AS metric_time__month
, revenue AS txn_revenue
FROM ***************************.fct_revenue revenue_src_10006
WHERE DATE_TRUNC(created_at, day) BETWEEN '2019-12-01' AND '2020-01-01'
WHERE DATE_TRUNC(created_at, day) BETWEEN '2019-11-01' AND '2020-01-01'
) subq_11
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ FROM (
WHERE subq_4.ds BETWEEN '2020-01-01' AND '2020-01-01'
) subq_3
INNER JOIN (
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
subq_1.ds__day
, subq_1.ds__week
Expand Down Expand Up @@ -173,7 +173,7 @@ FROM (
FROM ***************************.fct_revenue revenue_src_10006
) subq_0
) subq_1
WHERE subq_1.metric_time__day BETWEEN '2019-12-01' AND '2020-01-01'
WHERE subq_1.metric_time__day BETWEEN '2019-11-01' AND '2020-01-01'
) subq_2
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ FROM (
INNER JOIN (
-- Read Elements From Semantic Model 'revenue'
-- Metric Time Dimension 'ds'
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
DATE_TRUNC('day', created_at) AS metric_time__day
, DATE_TRUNC('month', created_at) AS metric_time__month
, revenue AS txn_revenue
FROM ***************************.fct_revenue revenue_src_10006
WHERE DATE_TRUNC('day', created_at) BETWEEN '2019-12-01' AND '2020-01-01'
WHERE DATE_TRUNC('day', created_at) BETWEEN '2019-11-01' AND '2020-01-01'
) subq_11
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ FROM (
WHERE subq_4.ds BETWEEN '2020-01-01' AND '2020-01-01'
) subq_3
INNER JOIN (
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
subq_1.ds__day
, subq_1.ds__week
Expand Down Expand Up @@ -173,7 +173,7 @@ FROM (
FROM ***************************.fct_revenue revenue_src_10006
) subq_0
) subq_1
WHERE subq_1.metric_time__day BETWEEN '2019-12-01' AND '2020-01-01'
WHERE subq_1.metric_time__day BETWEEN '2019-11-01' AND '2020-01-01'
) subq_2
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ FROM (
INNER JOIN (
-- Read Elements From Semantic Model 'revenue'
-- Metric Time Dimension 'ds'
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
DATE_TRUNC('day', created_at) AS metric_time__day
, DATE_TRUNC('month', created_at) AS metric_time__month
, revenue AS txn_revenue
FROM ***************************.fct_revenue revenue_src_10006
WHERE DATE_TRUNC('day', created_at) BETWEEN '2019-12-01' AND '2020-01-01'
WHERE DATE_TRUNC('day', created_at) BETWEEN '2019-11-01' AND '2020-01-01'
) subq_11
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ FROM (
WHERE subq_4.ds BETWEEN '2020-01-01' AND '2020-01-01'
) subq_3
INNER JOIN (
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
subq_1.ds__day
, subq_1.ds__week
Expand Down Expand Up @@ -173,7 +173,7 @@ FROM (
FROM ***************************.fct_revenue revenue_src_10006
) subq_0
) subq_1
WHERE subq_1.metric_time__day BETWEEN '2019-12-01' AND '2020-01-01'
WHERE subq_1.metric_time__day BETWEEN '2019-11-01' AND '2020-01-01'
) subq_2
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ FROM (
INNER JOIN (
-- Read Elements From Semantic Model 'revenue'
-- Metric Time Dimension 'ds'
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
DATE_TRUNC('day', created_at) AS metric_time__day
, DATE_TRUNC('month', created_at) AS metric_time__month
, revenue AS txn_revenue
FROM ***************************.fct_revenue revenue_src_10006
WHERE DATE_TRUNC('day', created_at) BETWEEN '2019-12-01' AND '2020-01-01'
WHERE DATE_TRUNC('day', created_at) BETWEEN '2019-11-01' AND '2020-01-01'
) subq_11
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ FROM (
WHERE subq_4.ds BETWEEN '2020-01-01' AND '2020-01-01'
) subq_3
INNER JOIN (
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
subq_1.ds__day
, subq_1.ds__week
Expand Down Expand Up @@ -173,7 +173,7 @@ FROM (
FROM ***************************.fct_revenue revenue_src_10006
) subq_0
) subq_1
WHERE subq_1.metric_time__day BETWEEN '2019-12-01' AND '2020-01-01'
WHERE subq_1.metric_time__day BETWEEN '2019-11-01' AND '2020-01-01'
) subq_2
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ FROM (
INNER JOIN (
-- Read Elements From Semantic Model 'revenue'
-- Metric Time Dimension 'ds'
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
DATE_TRUNC('day', created_at) AS metric_time__day
, DATE_TRUNC('month', created_at) AS metric_time__month
, revenue AS txn_revenue
FROM ***************************.fct_revenue revenue_src_10006
WHERE DATE_TRUNC('day', created_at) BETWEEN '2019-12-01' AND '2020-01-01'
WHERE DATE_TRUNC('day', created_at) BETWEEN '2019-11-01' AND '2020-01-01'
) subq_11
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ FROM (
WHERE subq_4.ds BETWEEN '2020-01-01' AND '2020-01-01'
) subq_3
INNER JOIN (
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
subq_1.ds__day
, subq_1.ds__week
Expand Down Expand Up @@ -173,7 +173,7 @@ FROM (
FROM ***************************.fct_revenue revenue_src_10006
) subq_0
) subq_1
WHERE subq_1.metric_time__day BETWEEN '2019-12-01' AND '2020-01-01'
WHERE subq_1.metric_time__day BETWEEN '2019-11-01' AND '2020-01-01'
) subq_2
ON
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ FROM (
INNER JOIN (
-- Read Elements From Semantic Model 'revenue'
-- Metric Time Dimension 'ds'
-- Constrain Time Range to [2019-12-01T00:00:00, 2020-01-01T00:00:00]
-- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00]
SELECT
DATE_TRUNC('day', created_at) AS metric_time__day
, DATE_TRUNC('month', created_at) AS metric_time__month
, revenue AS txn_revenue
FROM ***************************.fct_revenue revenue_src_10006
WHERE DATE_TRUNC('day', created_at) BETWEEN '2019-12-01' AND '2020-01-01'
WHERE DATE_TRUNC('day', created_at) BETWEEN '2019-11-01' AND '2020-01-01'
) subq_11
ON
(
Expand Down
Loading

0 comments on commit e628d6e

Please sign in to comment.