Skip to content

Commit

Permalink
Add tests for derived metrics with shared aliases
Browse files Browse the repository at this point in the history
This commit adds dataflow and SQL snapshot tests that verify we're not
generating duplicated aliases when combining branches.

I created 3 new dummy metrics for these tests: `derived_sharedalias_1a`,
`derived_sharedalias_1b` and `derived_sharedalias_2`. The `1a` and `1b`
variant both make `shared_alias` refer to `bookings`, while the `2`
variant makes it refer to `instant_bookings`. This way we can test the 2
cases:
- If the alias refers to the same thing (`1a` vs `1b`), we can merge
branches safely
- If the alias refers to different things (`1a` vs `2`), we cannot merge
branches safely
  • Loading branch information
serramatutu committed Oct 30, 2024
1 parent b0a54fe commit cac1d59
Show file tree
Hide file tree
Showing 12 changed files with 979 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -805,3 +805,36 @@ metric:
name: listings
filter: "{{ Metric('views', ['listing']) }} > 10"
time_granularity: week
---
# note that in both 1a and 1b "shared_alias" refer to "bookinks", whereas
# in 2 it refers to "instant_bookings", hence the naming difference
metric:
name: derived_sharedalias_1a
description: "Minimal repro case for derived metrics which give the same alias for different underlying metrics"
type: derived
type_params:
expr: shared_alias - 10
metrics:
- name: bookings
alias: shared_alias
---
metric:
name: derived_sharedalias_1b
description: "Minimal repro case for derived metrics which give the same alias for different underlying metrics"
type: derived
type_params:
expr: shared_alias - 100
metrics:
- name: bookings
alias: shared_alias
---
metric:
name: derived_sharedalias_2
description: "Minimal repro case for derived metrics which give the same alias for different underlying metrics"
type: derived
type_params:
expr: shared_alias + 10
metrics:
- name: instant_bookings
alias: shared_alias
---
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
'listing__booking__listing__bookings_per_dollar',
'listing__booking__listing__derived_bookings_0',
'listing__booking__listing__derived_bookings_1',
'listing__booking__listing__derived_sharedalias_1a',
'listing__booking__listing__derived_sharedalias_1b',
'listing__booking__listing__derived_sharedalias_2',
'listing__booking__listing__discrete_booking_value_p99',
'listing__booking__listing__double_counted_delayed_bookings',
'listing__booking__listing__instant_booking_fraction_of_max_value',
Expand Down Expand Up @@ -90,6 +93,9 @@
'listing__created_at__year',
'listing__derived_bookings_0',
'listing__derived_bookings_1',
'listing__derived_sharedalias_1a',
'listing__derived_sharedalias_1b',
'listing__derived_sharedalias_2',
'listing__discrete_booking_value_p99',
'listing__double_counted_delayed_bookings',
'listing__ds__day',
Expand Down Expand Up @@ -441,6 +447,9 @@
'user__listing__user__bookings_per_view',
'user__listing__user__derived_bookings_0',
'user__listing__user__derived_bookings_1',
'user__listing__user__derived_sharedalias_1a',
'user__listing__user__derived_sharedalias_1b',
'user__listing__user__derived_sharedalias_2',
'user__listing__user__discrete_booking_value_p99',
'user__listing__user__double_counted_delayed_bookings',
'user__listing__user__instant_booking_fraction_of_max_value',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ Model Join-Path Entity Links
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_per_dollar ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") derived_bookings_0 ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") derived_bookings_1 ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") derived_sharedalias_1a ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") derived_sharedalias_1b ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") derived_sharedalias_2 ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") discrete_booking_value_p99 ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") double_counted_delayed_bookings ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") instant_booking_fraction_of_max_value ['JOINED', 'METRIC']
Expand Down Expand Up @@ -94,6 +97,9 @@ Model Join-Path Entity Links
('listings_latest',) ("('listing',)", "('listing',)") bookings_per_view ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") derived_bookings_0 ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") derived_bookings_1 ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") derived_sharedalias_1a ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") derived_sharedalias_1b ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") derived_sharedalias_2 ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") discrete_booking_value_p99 ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") double_counted_delayed_bookings ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") instant_booking_fraction_of_max_value ['JOINED', 'METRIC']
Expand Down Expand Up @@ -156,6 +162,9 @@ Model Join-Path Entity Links
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_per_view ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") derived_bookings_0 ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") derived_bookings_1 ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") derived_sharedalias_1a ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") derived_sharedalias_1b ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") derived_sharedalias_2 ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") discrete_booking_value_p99 ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") double_counted_delayed_bookings ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") instant_booking_fraction_of_max_value ['JOINED', 'METRIC']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@
'company__listing__user__company__bookings_per_view',
'company__listing__user__company__derived_bookings_0',
'company__listing__user__company__derived_bookings_1',
'company__listing__user__company__derived_sharedalias_1a',
'company__listing__user__company__derived_sharedalias_1b',
'company__listing__user__company__derived_sharedalias_2',
'company__listing__user__company__discrete_booking_value_p99',
'company__listing__user__company__double_counted_delayed_bookings',
'company__listing__user__company__instant_booking_fraction_of_max_value',
Expand Down Expand Up @@ -146,6 +149,9 @@
'guest__booking__guest__bookings_per_dollar',
'guest__booking__guest__derived_bookings_0',
'guest__booking__guest__derived_bookings_1',
'guest__booking__guest__derived_sharedalias_1a',
'guest__booking__guest__derived_sharedalias_1b',
'guest__booking__guest__derived_sharedalias_2',
'guest__booking__guest__discrete_booking_value_p99',
'guest__booking__guest__double_counted_delayed_bookings',
'guest__booking__guest__instant_booking_fraction_of_max_value',
Expand Down Expand Up @@ -181,6 +187,9 @@
'guest__bookings_per_dollar',
'guest__derived_bookings_0',
'guest__derived_bookings_1',
'guest__derived_sharedalias_1a',
'guest__derived_sharedalias_1b',
'guest__derived_sharedalias_2',
'guest__discrete_booking_value_p99',
'guest__double_counted_delayed_bookings',
'guest__instant_booking_fraction_of_max_value',
Expand Down Expand Up @@ -227,6 +236,9 @@
'host__booking__host__bookings_per_dollar',
'host__booking__host__derived_bookings_0',
'host__booking__host__derived_bookings_1',
'host__booking__host__derived_sharedalias_1a',
'host__booking__host__derived_sharedalias_1b',
'host__booking__host__derived_sharedalias_2',
'host__booking__host__discrete_booking_value_p99',
'host__booking__host__double_counted_delayed_bookings',
'host__booking__host__instant_booking_fraction_of_max_value',
Expand Down Expand Up @@ -262,6 +274,9 @@
'host__bookings_per_dollar',
'host__derived_bookings_0',
'host__derived_bookings_1',
'host__derived_sharedalias_1a',
'host__derived_sharedalias_1b',
'host__derived_sharedalias_2',
'host__discrete_booking_value_p99',
'host__double_counted_delayed_bookings',
'host__instant_booking_fraction_of_max_value',
Expand Down Expand Up @@ -309,6 +324,9 @@
'listing__booking__listing__bookings_per_dollar',
'listing__booking__listing__derived_bookings_0',
'listing__booking__listing__derived_bookings_1',
'listing__booking__listing__derived_sharedalias_1a',
'listing__booking__listing__derived_sharedalias_1b',
'listing__booking__listing__derived_sharedalias_2',
'listing__booking__listing__discrete_booking_value_p99',
'listing__booking__listing__double_counted_delayed_bookings',
'listing__booking__listing__instant_booking_fraction_of_max_value',
Expand Down Expand Up @@ -357,6 +375,9 @@
'listing__created_at__extract_year',
'listing__derived_bookings_0',
'listing__derived_bookings_1',
'listing__derived_sharedalias_1a',
'listing__derived_sharedalias_1b',
'listing__derived_sharedalias_2',
'listing__discrete_booking_value_p99',
'listing__double_counted_delayed_bookings',
'listing__ds__day',
Expand Down Expand Up @@ -421,6 +442,9 @@
'lux_listing__listing__lux_listing__bookings_per_view',
'lux_listing__listing__lux_listing__derived_bookings_0',
'lux_listing__listing__lux_listing__derived_bookings_1',
'lux_listing__listing__lux_listing__derived_sharedalias_1a',
'lux_listing__listing__lux_listing__derived_sharedalias_1b',
'lux_listing__listing__lux_listing__derived_sharedalias_2',
'lux_listing__listing__lux_listing__discrete_booking_value_p99',
'lux_listing__listing__lux_listing__double_counted_delayed_bookings',
'lux_listing__listing__lux_listing__instant_booking_fraction_of_max_value',
Expand Down Expand Up @@ -564,6 +588,9 @@
'user__listing__user__bookings_per_view',
'user__listing__user__derived_bookings_0',
'user__listing__user__derived_bookings_1',
'user__listing__user__derived_sharedalias_1a',
'user__listing__user__derived_sharedalias_1b',
'user__listing__user__derived_sharedalias_2',
'user__listing__user__discrete_booking_value_p99',
'user__listing__user__double_counted_delayed_bookings',
'user__listing__user__instant_booking_fraction_of_max_value',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,60 @@ def test_derived_metric_with_non_derived_metric(
)


@pytest.mark.sql_engine_snapshot
def test_derived_metric_same_alias_components_combined(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
) -> None:
"""Tests optimization of querying 2 metrics which give the same alias to the same thing in their components.
In this case we DO combine source nodes, since the components are the same exact thing so we don't need to
scan over it twice
"""
check_optimization(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=MetricFlowQuerySpec(
metric_specs=(
MetricSpec(element_name="derived_sharedalias_1a"),
MetricSpec(element_name="derived_sharedalias_1b"),
),
dimension_specs=(DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)),),
),
expected_num_sources_in_unoptimized=2,
expected_num_sources_in_optimized=1,
)


@pytest.mark.sql_engine_snapshot
def test_derived_metric_same_alias_components_not_combined(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
) -> None:
"""Tests optimization of querying 2 metrics which give the same alias different things in their components.
In this case we should NOT combine source nodes, since this would generate two columns with
the same alias.
"""
check_optimization(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=MetricFlowQuerySpec(
metric_specs=(
MetricSpec(element_name="derived_sharedalias_1a"),
MetricSpec(element_name="derived_sharedalias_2"),
),
dimension_specs=(DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)),),
),
expected_num_sources_in_unoptimized=2,
expected_num_sources_in_optimized=2,
)


@pytest.mark.sql_engine_snapshot
def test_2_ratio_metrics_from_1_semantic_model(
request: FixtureRequest,
Expand Down
26 changes: 26 additions & 0 deletions tests_metricflow/query_rendering/test_derived_metric_rendering.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,3 +795,29 @@ def test_offset_to_grain_metric_filter_and_query_have_different_granularities(
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)


@pytest.mark.sql_engine_snapshot
def test_derived_metric_that_defines_the_same_alias_in_different_components(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
query_parser: MetricFlowQueryParser,
sql_client: SqlClient,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
) -> None:
"""Tests querying a cumulative metric which give the same alias to its components."""
query_spec = query_parser.parse_and_validate_query(
metric_names=("derived_sharedalias_1a", "derived_sharedalias_2"),
group_by_names=("booking__is_instant",),
limit=1,
).query_spec

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)
Loading

0 comments on commit cac1d59

Please sign in to comment.