Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unneeded group bys from time spine dataset #1453

Merged
merged 3 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20241009-174346.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Remove unnecessary group bys that make queries less efficient.
time: 2024-10-09T17:43:46.011252-07:00
custom:
Author: courtneyholcomb
Issue: "1453"
8 changes: 4 additions & 4 deletions metricflow/plan_conversion/dataflow_to_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,18 +263,18 @@ def _make_time_spine_data_set(
table_alias=time_spine_table_alias, column_name=time_spine_source.base_column
)
select_columns: Tuple[SqlSelectColumn, ...] = ()
apply_group_by = False
apply_group_by = True
for agg_time_dimension_instance in agg_time_dimension_instances:
column_alias = self.column_association_resolver.resolve_spec(agg_time_dimension_instance.spec).column_name
# If the requested granularity is the same as the granularity of the spine, do a direct select.
# TODO: also handle date part.
agg_time_grain = agg_time_dimension_instance.spec.time_granularity
assert (
not agg_time_grain.is_custom_granularity
), "Custom time granularities are not yet supported for all queries."
if agg_time_grain.base_granularity == time_spine_source.base_granularity:
select_columns += (SqlSelectColumn(expr=column_expr, column_alias=column_alias),)
# If any columns have a different granularity, apply a DATE_TRUNC() and aggregate via group_by.
apply_group_by = False
# If any columns have a different granularity, apply a DATE_TRUNC().
else:
select_columns += (
SqlSelectColumn(
Expand All @@ -284,7 +284,7 @@ def _make_time_spine_data_set(
column_alias=column_alias,
),
)
apply_group_by = True
# TODO: also handle date part.

return SqlDataSet(
instance_set=time_spine_instance_set,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,6 @@ FROM (
, DATETIME_TRUNC(subq_3.ds, isoweek) AS metric_time__week
, DATETIME_TRUNC(subq_3.ds, quarter) AS metric_time__quarter
FROM ***************************.mf_time_spine subq_3
GROUP BY
metric_time__day
, metric_time__week
, metric_time__quarter
) subq_2
INNER JOIN (
-- Metric Time Dimension 'ds'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,16 @@ FROM (
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
subq_11.metric_time__day AS metric_time__day
, subq_11.metric_time__week AS metric_time__week
, subq_11.metric_time__quarter AS metric_time__quarter
subq_12.ds AS metric_time__day
, DATETIME_TRUNC(subq_12.ds, isoweek) AS metric_time__week
, DATETIME_TRUNC(subq_12.ds, quarter) AS metric_time__quarter
, SUM(revenue_src_28000.revenue) AS revenue_all_time
FROM (
-- Time Spine
SELECT
ds AS metric_time__day
, DATETIME_TRUNC(ds, isoweek) AS metric_time__week
, DATETIME_TRUNC(ds, quarter) AS metric_time__quarter
FROM ***************************.mf_time_spine subq_12
GROUP BY
metric_time__day
, metric_time__week
, metric_time__quarter
) subq_11
FROM ***************************.mf_time_spine subq_12
INNER JOIN
***************************.fct_revenue revenue_src_28000
ON
(
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_11.metric_time__day
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_12.ds
)
GROUP BY
metric_time__day
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ FROM (
DATETIME_TRUNC(subq_3.ds, month) AS revenue_instance__ds__month
, subq_3.ds AS metric_time__day
FROM ***************************.mf_time_spine subq_3
GROUP BY
revenue_instance__ds__month
, metric_time__day
) subq_2
INNER JOIN (
-- Metric Time Dimension 'ds'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,17 @@
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
subq_9.revenue_instance__ds__month AS revenue_instance__ds__month
, subq_9.metric_time__day AS metric_time__day
DATETIME_TRUNC(subq_10.ds, month) AS revenue_instance__ds__month
, subq_10.ds AS metric_time__day
, SUM(revenue_src_28000.revenue) AS trailing_2_months_revenue
FROM (
-- Time Spine
SELECT
DATETIME_TRUNC(ds, month) AS revenue_instance__ds__month
, ds AS metric_time__day
FROM ***************************.mf_time_spine subq_10
GROUP BY
revenue_instance__ds__month
, metric_time__day
) subq_9
FROM ***************************.mf_time_spine subq_10
INNER JOIN
***************************.fct_revenue revenue_src_28000
ON
(
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_9.metric_time__day
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_10.ds
) AND (
DATETIME_TRUNC(revenue_src_28000.created_at, day) > DATE_SUB(CAST(subq_9.metric_time__day AS DATETIME), INTERVAL 2 month)
DATETIME_TRUNC(revenue_src_28000.created_at, day) > DATE_SUB(CAST(subq_10.ds AS DATETIME), INTERVAL 2 month)
)
GROUP BY
revenue_instance__ds__month
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ FROM (
subq_3.ds AS revenue_instance__ds__day
, DATETIME_TRUNC(subq_3.ds, month) AS revenue_instance__ds__month
FROM ***************************.mf_time_spine subq_3
GROUP BY
revenue_instance__ds__day
, revenue_instance__ds__month
) subq_2
INNER JOIN (
-- Metric Time Dimension 'ds'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,17 @@
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
subq_9.revenue_instance__ds__day AS revenue_instance__ds__day
, subq_9.revenue_instance__ds__month AS revenue_instance__ds__month
subq_10.ds AS revenue_instance__ds__day
, DATETIME_TRUNC(subq_10.ds, month) AS revenue_instance__ds__month
, SUM(revenue_src_28000.revenue) AS trailing_2_months_revenue
FROM (
-- Time Spine
SELECT
ds AS revenue_instance__ds__day
, DATETIME_TRUNC(ds, month) AS revenue_instance__ds__month
FROM ***************************.mf_time_spine subq_10
GROUP BY
revenue_instance__ds__day
, revenue_instance__ds__month
) subq_9
FROM ***************************.mf_time_spine subq_10
INNER JOIN
***************************.fct_revenue revenue_src_28000
ON
(
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_9.revenue_instance__ds__day
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_10.ds
) AND (
DATETIME_TRUNC(revenue_src_28000.created_at, day) > DATE_SUB(CAST(subq_9.revenue_instance__ds__day AS DATETIME), INTERVAL 2 month)
DATETIME_TRUNC(revenue_src_28000.created_at, day) > DATE_SUB(CAST(subq_10.ds AS DATETIME), INTERVAL 2 month)
)
GROUP BY
revenue_instance__ds__day
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ FROM (
subq_3.ds AS metric_time__day
, DATETIME_TRUNC(subq_3.ds, month) AS metric_time__month
FROM ***************************.mf_time_spine subq_3
GROUP BY
metric_time__day
, metric_time__month
) subq_2
INNER JOIN (
-- Metric Time Dimension 'ds'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,17 @@
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
subq_9.metric_time__day AS metric_time__day
, subq_9.metric_time__month AS metric_time__month
subq_10.ds AS metric_time__day
, DATETIME_TRUNC(subq_10.ds, month) AS metric_time__month
, SUM(revenue_src_28000.revenue) AS trailing_2_months_revenue
FROM (
-- Time Spine
SELECT
ds AS metric_time__day
, DATETIME_TRUNC(ds, month) AS metric_time__month
FROM ***************************.mf_time_spine subq_10
GROUP BY
metric_time__day
, metric_time__month
) subq_9
FROM ***************************.mf_time_spine subq_10
INNER JOIN
***************************.fct_revenue revenue_src_28000
ON
(
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_9.metric_time__day
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_10.ds
) AND (
DATETIME_TRUNC(revenue_src_28000.created_at, day) > DATE_SUB(CAST(subq_9.metric_time__day AS DATETIME), INTERVAL 2 month)
DATETIME_TRUNC(revenue_src_28000.created_at, day) > DATE_SUB(CAST(subq_10.ds AS DATETIME), INTERVAL 2 month)
)
GROUP BY
metric_time__day
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ FROM (
subq_3.ds AS metric_time__day
, DATETIME_TRUNC(subq_3.ds, isoweek) AS metric_time__week
FROM ***************************.mf_time_spine subq_3
GROUP BY
metric_time__day
, metric_time__week
) subq_2
INNER JOIN (
-- Metric Time Dimension 'ds'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,15 @@ FROM (
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
subq_11.metric_time__day AS metric_time__day
, subq_11.metric_time__week AS metric_time__week
subq_12.ds AS metric_time__day
, DATETIME_TRUNC(subq_12.ds, isoweek) AS metric_time__week
, SUM(revenue_src_28000.revenue) AS revenue_all_time
FROM (
-- Time Spine
SELECT
ds AS metric_time__day
, DATETIME_TRUNC(ds, isoweek) AS metric_time__week
FROM ***************************.mf_time_spine subq_12
GROUP BY
metric_time__day
, metric_time__week
) subq_11
FROM ***************************.mf_time_spine subq_12
INNER JOIN
***************************.fct_revenue revenue_src_28000
ON
(
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_11.metric_time__day
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_12.ds
)
GROUP BY
metric_time__day
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,6 @@ FROM (
subq_3.ds AS metric_time__day
, DATETIME_TRUNC(subq_3.ds, isoweek) AS metric_time__week
FROM ***************************.mf_time_spine subq_3
GROUP BY
metric_time__day
, metric_time__week
) subq_2
INNER JOIN (
-- Metric Time Dimension 'ds'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,17 @@ FROM (
-- Pass Only Elements: ['txn_revenue', 'metric_time__week', 'metric_time__day']
-- Aggregate Measures
SELECT
subq_12.metric_time__day AS metric_time__day
, subq_12.metric_time__week AS metric_time__week
subq_13.ds AS metric_time__day
, DATETIME_TRUNC(subq_13.ds, isoweek) AS metric_time__week
, SUM(revenue_src_28000.revenue) AS txn_revenue
FROM (
-- Time Spine
SELECT
ds AS metric_time__day
, DATETIME_TRUNC(ds, isoweek) AS metric_time__week
FROM ***************************.mf_time_spine subq_13
GROUP BY
metric_time__day
, metric_time__week
) subq_12
FROM ***************************.mf_time_spine subq_13
INNER JOIN
***************************.fct_revenue revenue_src_28000
ON
(
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_12.metric_time__day
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_13.ds
) AND (
DATETIME_TRUNC(revenue_src_28000.created_at, day) > DATE_SUB(CAST(subq_12.metric_time__day AS DATETIME), INTERVAL 2 month)
DATETIME_TRUNC(revenue_src_28000.created_at, day) > DATE_SUB(CAST(subq_13.ds AS DATETIME), INTERVAL 2 month)
)
GROUP BY
metric_time__day
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ FROM (
subq_3.ds AS metric_time__day
, DATETIME_TRUNC(subq_3.ds, month) AS metric_time__month
FROM ***************************.mf_time_spine subq_3
GROUP BY
metric_time__day
, metric_time__month
) subq_2
INNER JOIN (
-- Metric Time Dimension 'ds'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,17 @@ FROM (
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
subq_11.metric_time__day AS metric_time__day
, subq_11.metric_time__month AS metric_time__month
subq_12.ds AS metric_time__day
, DATETIME_TRUNC(subq_12.ds, month) AS metric_time__month
, SUM(revenue_src_28000.revenue) AS revenue_mtd
FROM (
-- Time Spine
SELECT
ds AS metric_time__day
, DATETIME_TRUNC(ds, month) AS metric_time__month
FROM ***************************.mf_time_spine subq_12
GROUP BY
metric_time__day
, metric_time__month
) subq_11
FROM ***************************.mf_time_spine subq_12
INNER JOIN
***************************.fct_revenue revenue_src_28000
ON
(
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_11.metric_time__day
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_12.ds
) AND (
DATETIME_TRUNC(revenue_src_28000.created_at, day) >= DATETIME_TRUNC(subq_11.metric_time__day, month)
DATETIME_TRUNC(revenue_src_28000.created_at, day) >= DATETIME_TRUNC(subq_12.ds, month)
)
GROUP BY
metric_time__day
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,6 @@ FROM (
, DATETIME_TRUNC(subq_3.ds, year) AS revenue_instance__ds__year
, subq_3.ds AS metric_time__day
FROM ***************************.mf_time_spine subq_3
GROUP BY
revenue_instance__ds__quarter
, revenue_instance__ds__year
, metric_time__day
) subq_2
INNER JOIN (
-- Metric Time Dimension 'ds'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,18 @@ FROM (
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
subq_11.revenue_instance__ds__quarter AS revenue_instance__ds__quarter
, subq_11.revenue_instance__ds__year AS revenue_instance__ds__year
, subq_11.metric_time__day AS metric_time__day
DATETIME_TRUNC(subq_12.ds, quarter) AS revenue_instance__ds__quarter
, DATETIME_TRUNC(subq_12.ds, year) AS revenue_instance__ds__year
, subq_12.ds AS metric_time__day
, SUM(revenue_src_28000.revenue) AS revenue_mtd
FROM (
-- Time Spine
SELECT
DATETIME_TRUNC(ds, quarter) AS revenue_instance__ds__quarter
, DATETIME_TRUNC(ds, year) AS revenue_instance__ds__year
, ds AS metric_time__day
FROM ***************************.mf_time_spine subq_12
GROUP BY
revenue_instance__ds__quarter
, revenue_instance__ds__year
, metric_time__day
) subq_11
FROM ***************************.mf_time_spine subq_12
INNER JOIN
***************************.fct_revenue revenue_src_28000
ON
(
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_11.metric_time__day
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_12.ds
) AND (
DATETIME_TRUNC(revenue_src_28000.created_at, day) >= DATETIME_TRUNC(subq_11.metric_time__day, month)
DATETIME_TRUNC(revenue_src_28000.created_at, day) >= DATETIME_TRUNC(subq_12.ds, month)
)
GROUP BY
revenue_instance__ds__quarter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,6 @@ FROM (
subq_3.ds AS metric_time__day
, DATETIME_TRUNC(subq_3.ds, year) AS metric_time__year
FROM ***************************.mf_time_spine subq_3
GROUP BY
metric_time__day
, metric_time__year
) subq_2
INNER JOIN (
-- Metric Time Dimension 'ds'
Expand Down
Loading
Loading