Skip to content

Commit

Permalink
Merge pull request #128 from dbt-labs/plypaul--44--primary-entity-in-…
Browse files Browse the repository at this point in the history
…where

Add Primary Entity Prefix When Specifying Dimensions in the WhereFilter
  • Loading branch information
plypaul authored Jul 28, 2023
2 parents f36dbe1 + f322968 commit cc7a031
Show file tree
Hide file tree
Showing 7 changed files with 253 additions and 21 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Breaking Changes-20230727-165005.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Breaking Changes
body: Require primary entities when specifying dimensions in the WhereFilter.
time: 2023-07-27T16:50:05.653782-07:00
custom:
Author: plypaul
Issue: "123"
Empty file.
138 changes: 138 additions & 0 deletions dbt_semantic_interfaces/naming/dundered.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from __future__ import annotations

import logging
from dataclasses import dataclass
from typing import Optional, Tuple

from dbt_semantic_interfaces.naming.keywords import DUNDER
from dbt_semantic_interfaces.references import EntityReference
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class StructuredDunderedName:
"""Group by items (e.g. dimensions / entities) in a query that are named using a double underscore as a seperator.
e.g. listing__ds__week ->
entity_links: ["listing"]
element_name: "ds"
granularity: TimeGranularity.WEEK
The time granularity is part of legacy query syntax and there are plans to migrate away from this format.
"""

entity_links: Tuple[EntityReference, ...]
element_name: str
time_granularity: Optional[TimeGranularity] = None

@staticmethod
def parse_name(name: str) -> StructuredDunderedName:
"""Construct from a string like 'listing__ds__month'."""
name_parts = name.split(DUNDER)

# No dunder, e.g. "ds"
if len(name_parts) == 1:
return StructuredDunderedName((), name_parts[0])

associated_granularity = None
granularity: TimeGranularity
for granularity in TimeGranularity:
if name_parts[-1] == granularity.value:
associated_granularity = granularity

# Has a time granularity
if associated_granularity:
# e.g. "ds__month"
if len(name_parts) == 2:
return StructuredDunderedName((), name_parts[0], associated_granularity)
# e.g. "messages__ds__month"
return StructuredDunderedName(
entity_links=tuple(EntityReference(element_name=entity_name) for entity_name in name_parts[:-2]),
element_name=name_parts[-2],
time_granularity=associated_granularity,
)
# e.g. "messages__ds"
else:
return StructuredDunderedName(
entity_links=tuple(EntityReference(element_name=entity_name) for entity_name in name_parts[:-1]),
element_name=name_parts[-1],
)

@property
def dundered_name(self) -> str:
"""Return the full name form. e.g. ds or listing__ds__month."""
items = [entity_reference.element_name for entity_reference in self.entity_links] + [self.element_name]
if self.time_granularity and self.time_granularity != TimeGranularity.DAY:
items.append(self.time_granularity.value)
return DUNDER.join(items)

@property
def dundered_name_without_granularity(self) -> str:
"""Return the name without the time granularity. e.g. listing__ds__month -> listing__ds."""
return DUNDER.join(
tuple(entity_reference.element_name for entity_reference in self.entity_links) + (self.element_name,)
)

@property
def dundered_name_without_entity(self) -> str:
"""Return the name without the entity. e.g. listing__ds__month -> ds__month."""
return DUNDER.join((self.element_name,) + ((self.time_granularity.value,) if self.time_granularity else ()))

@property
def entity_prefix(self) -> Optional[str]:
"""Return the entity prefix. e.g. listing__ds__month -> listing."""
if len(self.entity_links) > 0:
return DUNDER.join(tuple(entity_reference.element_name for entity_reference in self.entity_links))

return None


class DunderedNameFormatter:
"""Helps to parse names into StructuredDunderedName and vice versa."""

@staticmethod
def parse_name(name: str) -> StructuredDunderedName:
"""Construct from a string like 'listing__ds__month'."""
name_parts = name.split(DUNDER)

# No dunder, e.g. "ds"
if len(name_parts) == 1:
return StructuredDunderedName((), name_parts[0])

associated_granularity = None
granularity: TimeGranularity
for granularity in TimeGranularity:
if name_parts[-1] == granularity.value:
associated_granularity = granularity

# Has a time granularity
if associated_granularity:
# e.g. "ds__month"
if len(name_parts) == 2:
return StructuredDunderedName((), name_parts[0], associated_granularity)
# e.g. "messages__ds__month"
return StructuredDunderedName(
entity_links=tuple(EntityReference(element_name=entity_name) for entity_name in name_parts[:-2]),
element_name=name_parts[-2],
time_granularity=associated_granularity,
)
# e.g. "messages__ds"
else:
return StructuredDunderedName(
entity_links=tuple(EntityReference(element_name=entity_name) for entity_name in name_parts[:-1]),
element_name=name_parts[-1],
)

@staticmethod
def create_structured_name( # noqa: D
element_name: str,
entity_links: Tuple[EntityReference, ...] = (),
time_granularity: Optional[TimeGranularity] = None,
) -> StructuredDunderedName:
return StructuredDunderedName(
entity_links=entity_links,
element_name=element_name,
time_granularity=time_granularity,
)
11 changes: 11 additions & 0 deletions dbt_semantic_interfaces/naming/keywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# A double underscore used as a seperator in group by item names.
# e.g. user__country
DUNDER = "__"

# The name for the time dimension used to tabulate / plot metrics.
METRIC_TIME_ELEMENT_NAME = "metric_time"


def is_metric_time_name(element_name: str) -> bool:
"""Returns True if the given element name corresponds to metric time."""
return element_name == METRIC_TIME_ELEMENT_NAME
58 changes: 54 additions & 4 deletions dbt_semantic_interfaces/parsing/where_filter_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
ParseWhereFilterException,
TimeDimensionCallParameterSet,
)
from dbt_semantic_interfaces.naming.dundered import DunderedNameFormatter
from dbt_semantic_interfaces.naming.keywords import (
METRIC_TIME_ELEMENT_NAME,
is_metric_time_name,
)
from dbt_semantic_interfaces.references import (
DimensionReference,
EntityReference,
Expand All @@ -24,6 +29,13 @@
class WhereFilterParser:
"""Parses the template in the WhereFilter into FilterCallParameterSets."""

@staticmethod
def _exception_message_for_incorrect_format(element_name: str) -> str:
return (
f"Name is in an incorrect format: '{element_name}'. It should be of the form: "
f"<primary entity name>__<dimension_name>"
)

@staticmethod
def parse_call_parameter_sets(where_sql_template: str) -> FilterCallParameterSets:
"""Return the result of extracting the semantic objects referenced in the where SQL template string."""
Expand All @@ -38,10 +50,19 @@ def parse_call_parameter_sets(where_sql_template: str) -> FilterCallParameterSet

def _dimension_call(dimension_name: str, entity_path: Sequence[str] = ()) -> str:
"""Gets called by Jinja when rendering {{ dimension(...) }}."""
group_by_item_name = DunderedNameFormatter.parse_name(dimension_name)
if len(group_by_item_name.entity_links) != 1:
raise ParseWhereFilterException(
WhereFilterParser._exception_message_for_incorrect_format(dimension_name)
)

dimension_call_parameter_sets.append(
DimensionCallParameterSet(
dimension_reference=DimensionReference(element_name=dimension_name),
entity_path=tuple(EntityReference(element_name=arg) for arg in entity_path),
dimension_reference=DimensionReference(element_name=group_by_item_name.element_name),
entity_path=(
tuple(EntityReference(element_name=arg) for arg in entity_path)
+ group_by_item_name.entity_links
),
)
)
return _DUMMY_PLACEHOLDER
Expand All @@ -50,17 +71,46 @@ def _time_dimension_call(
time_dimension_name: str, time_granularity_name: str, entity_path: Sequence[str] = ()
) -> str:
"""Gets called by Jinja when rendering {{ time_dimension(...) }}."""
group_by_item_name = DunderedNameFormatter.parse_name(time_dimension_name)

# metric_time is the only time dimension that does not have an associated primary entity, so the
# GroupByItemName would not have any entity links.
if is_metric_time_name(group_by_item_name.element_name):
if len(group_by_item_name.entity_links) != 0 or group_by_item_name.time_granularity is not None:
raise ParseWhereFilterException(
WhereFilterParser._exception_message_for_incorrect_format(
f"Name is in an incorrect format: {time_dimension_name} "
f"When referencing {METRIC_TIME_ELEMENT_NAME}, the name should not have any dunders."
)
)

else:
if len(group_by_item_name.entity_links) != 1 or group_by_item_name.time_granularity is not None:
raise ParseWhereFilterException(
WhereFilterParser._exception_message_for_incorrect_format(time_dimension_name)
)

time_dimension_call_parameter_sets.append(
TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference(element_name=time_dimension_name),
entity_path=tuple(EntityReference(element_name=arg) for arg in entity_path),
time_dimension_reference=TimeDimensionReference(element_name=group_by_item_name.element_name),
entity_path=(
tuple(EntityReference(element_name=arg) for arg in entity_path)
+ group_by_item_name.entity_links
),
time_granularity=TimeGranularity(time_granularity_name),
)
)
return _DUMMY_PLACEHOLDER

def _entity_call(entity_name: str, entity_path: Sequence[str] = ()) -> str:
"""Gets called by Jinja when rendering {{ entity(...) }}."""
group_by_item_name = DunderedNameFormatter.parse_name(entity_name)
if len(group_by_item_name.entity_links) > 0 or group_by_item_name.time_granularity is not None:
WhereFilterParser._exception_message_for_incorrect_format(
f"Name is in an incorrect format: {entity_name} "
f"When referencing entities, the name should not have any dunders."
)

entity_call_parameter_sets.append(
EntityCallParameterSet(
entity_path=tuple(EntityReference(element_name=arg) for arg in entity_path),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ metric:
type_params:
measure:
name: booking_value
filter: "{{ dimension('is_instant') }}"
filter: "{{ dimension('booking__is_instant') }}"
---
metric:
name: "average_instant_booking_value"
Expand All @@ -63,7 +63,7 @@ metric:
type_params:
measure:
name: average_booking_value
filter: "{{ dimension('is_instant') }}"
filter: "{{ dimension('booking__is_instant') }}"
---
metric:
name: "booking_value_for_non_null_listing_id"
Expand Down Expand Up @@ -113,7 +113,7 @@ metric:
type_params:
measure:
name: listings
filter: "{{ dimension('is_lux_latest') }}"
filter: "{{ dimension('listing__is_lux_latest') }}"
---
metric:
name: "smallest_listing"
Expand Down Expand Up @@ -276,7 +276,7 @@ metric:
type_params:
numerator:
name: average_booking_value
filter: "{{ dimension('is_instant') }}"
filter: "{{ dimension('booking__is_instant') }}"
denominator:
name: max_booking_value
---
Expand All @@ -289,7 +289,7 @@ metric:
type_params:
numerator:
name: average_booking_value
filter: "{{ dimension('is_lux_latest', entity_path=['listing']) }}"
filter: "{{ dimension('listing__is_lux_latest', entity_path=['listing']) }}"
denominator:
name: max_booking_value
---
Expand All @@ -303,9 +303,9 @@ metric:
expr: "average_booking_value * bookings / NULLIF(booking_value, 0)"
metrics:
- name: average_booking_value
filter: "{{ dimension('is_lux_latest', entity_path=['listing']) }}"
filter: "{{ dimension('listing__is_lux_latest', entity_path=['listing']) }}"
- name: bookings
filter: "{{ dimension('is_lux_latest', entity_path=['listing']) }}"
filter: "{{ dimension('listing__is_lux_latest', entity_path=['listing']) }}"
- name: booking_value
---
metric:
Expand All @@ -317,7 +317,7 @@ metric:
type_params:
numerator:
name: booking_value
filter: "{{ dimension('is_instant') }}"
filter: "{{ dimension('booking__is_instant') }}"
alias: booking_value_with_is_instant_constraint
denominator:
name: booking_value
Expand All @@ -331,11 +331,11 @@ metric:
type_params:
numerator:
name: total_account_balance_first_day
filter: "{{ dimension('home_state_latest', entity_path=['user']) }} IN ('CA', 'HI', 'WA')"
filter: "{{ dimension('user__home_state_latest') }} IN ('CA', 'HI', 'WA')"
alias: west_coast_balance_first_day
denominator:
name: total_account_balance_first_day
filter: "{{ dimension('home_state_latest', entity_path=['user']) }} IN ('MD', 'NY', 'TX')"
filter: "{{ dimension('user__home_state_latest') }} IN ('MD', 'NY', 'TX')"
alias: east_coast_balance_first_dat
---
metric:
Expand All @@ -347,7 +347,7 @@ metric:
expr: delayed_bookings * 2
metrics:
- name: bookings
filter: "NOT {{ dimension('is_instant') }}"
filter: "NOT {{ dimension('booking__is_instant') }}"
alias: delayed_bookings
---
metric:
Expand Down Expand Up @@ -398,7 +398,7 @@ metric:
- name: bookings
- name: listings
alias: lux_listing
filter: "{{ dimension('is_lux_latest') }}"
filter: "{{ dimension('listing__is_lux_latest') }}"
---
metric:
name: "instant_plus_non_referred_bookings_pct"
Expand Down
Loading

0 comments on commit cc7a031

Please sign in to comment.