Skip to content

Commit

Permalink
Add Primary Entity Prefix When Specifying Dimensions in the WhereFilter
Browse files Browse the repository at this point in the history
With the invariant that all dimensions are associated with a primary entity,
require the specification of the entity when using dimensions in the where
filter.

e.g.

```
dimension('capacity_latest') > 10

->

dimension('listing__capacity_latest') > 10
```
  • Loading branch information
plypaul committed Jul 28, 2023
1 parent f36dbe1 commit 906b587
Show file tree
Hide file tree
Showing 6 changed files with 247 additions and 21 deletions.
Empty file.
138 changes: 138 additions & 0 deletions dbt_semantic_interfaces/naming/dundered.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from __future__ import annotations

import logging
from dataclasses import dataclass
from typing import Optional, Tuple

from dbt_semantic_interfaces.naming.keywords import DUNDER
from dbt_semantic_interfaces.references import EntityReference
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class StructuredDunderedName:
"""Group by items (e.g. dimensions / entities) in a query that are named using a double underscore as a seperator.
e.g. listing__ds__week ->
entity_links: ["listing"]
element_name: "ds"
granularity: TimeGranularity.WEEK
The time granularity is part of legacy query syntax and there are plans to migrate away from this format.
"""

entity_links: Tuple[EntityReference, ...]
element_name: str
time_granularity: Optional[TimeGranularity] = None

@staticmethod
def parse_name(name: str) -> StructuredDunderedName:
"""Construct from a string like 'listing__ds__month'."""
name_parts = name.split(DUNDER)

# No dunder, e.g. "ds"
if len(name_parts) == 1:
return StructuredDunderedName((), name_parts[0])

associated_granularity = None
granularity: TimeGranularity
for granularity in TimeGranularity:
if name_parts[-1] == granularity.value:
associated_granularity = granularity

# Has a time granularity
if associated_granularity:
# e.g. "ds__month"
if len(name_parts) == 2:
return StructuredDunderedName((), name_parts[0], associated_granularity)
# e.g. "messages__ds__month"
return StructuredDunderedName(
entity_links=tuple(EntityReference(element_name=entity_name) for entity_name in name_parts[:-2]),
element_name=name_parts[-2],
time_granularity=associated_granularity,
)
# e.g. "messages__ds"
else:
return StructuredDunderedName(
entity_links=tuple(EntityReference(element_name=entity_name) for entity_name in name_parts[:-1]),
element_name=name_parts[-1],
)

@property
def dundered_name(self) -> str:
"""Return the full name form. e.g. ds or listing__ds__month."""
items = [entity_reference.element_name for entity_reference in self.entity_links] + [self.element_name]
if self.time_granularity and self.time_granularity != TimeGranularity.DAY:
items.append(self.time_granularity.value)
return DUNDER.join(items)

@property
def dundered_name_without_granularity(self) -> str:
"""Return the name without the time granularity. e.g. listing__ds__month -> listing__ds."""
return DUNDER.join(
tuple(entity_reference.element_name for entity_reference in self.entity_links) + (self.element_name,)
)

@property
def dundered_name_without_entity(self) -> str:
"""Return the name without the entity. e.g. listing__ds__month -> ds__month."""
return DUNDER.join((self.element_name,) + ((self.time_granularity.value,) if self.time_granularity else ()))

@property
def entity_prefix(self) -> Optional[str]:
"""Return the entity prefix. e.g. listing__ds__month -> listing."""
if len(self.entity_links) > 0:
return DUNDER.join(tuple(entity_reference.element_name for entity_reference in self.entity_links))

return None


class DunderedNameFormatter:
"""Helps to parse names into StructuredDunderedName and vice versa."""

@staticmethod
def parse_name(name: str) -> StructuredDunderedName:
"""Construct from a string like 'listing__ds__month'."""
name_parts = name.split(DUNDER)

# No dunder, e.g. "ds"
if len(name_parts) == 1:
return StructuredDunderedName((), name_parts[0])

associated_granularity = None
granularity: TimeGranularity
for granularity in TimeGranularity:
if name_parts[-1] == granularity.value:
associated_granularity = granularity

# Has a time granularity
if associated_granularity:
# e.g. "ds__month"
if len(name_parts) == 2:
return StructuredDunderedName((), name_parts[0], associated_granularity)
# e.g. "messages__ds__month"
return StructuredDunderedName(
entity_links=tuple(EntityReference(element_name=entity_name) for entity_name in name_parts[:-2]),
element_name=name_parts[-2],
time_granularity=associated_granularity,
)
# e.g. "messages__ds"
else:
return StructuredDunderedName(
entity_links=tuple(EntityReference(element_name=entity_name) for entity_name in name_parts[:-1]),
element_name=name_parts[-1],
)

@staticmethod
def create_structured_name( # noqa: D
element_name: str,
entity_links: Tuple[EntityReference, ...] = (),
time_granularity: Optional[TimeGranularity] = None,
) -> StructuredDunderedName:
return StructuredDunderedName(
entity_links=entity_links,
element_name=element_name,
time_granularity=time_granularity,
)
11 changes: 11 additions & 0 deletions dbt_semantic_interfaces/naming/keywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# A double underscore used as a seperator in group by item names.
# e.g. user__country
DUNDER = "__"

# The name for the time dimension used to tabulate / plot metrics.
METRIC_TIME_ELEMENT_NAME = "metric_time"


def is_metric_time_name(element_name: str) -> bool:
"""Returns True if the given element name corresponds to metric time."""
return element_name == METRIC_TIME_ELEMENT_NAME
58 changes: 54 additions & 4 deletions dbt_semantic_interfaces/parsing/where_filter_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
ParseWhereFilterException,
TimeDimensionCallParameterSet,
)
from dbt_semantic_interfaces.naming.dundered import DunderedNameFormatter
from dbt_semantic_interfaces.naming.keywords import (
METRIC_TIME_ELEMENT_NAME,
is_metric_time_name,
)
from dbt_semantic_interfaces.references import (
DimensionReference,
EntityReference,
Expand All @@ -24,6 +29,13 @@
class WhereFilterParser:
"""Parses the template in the WhereFilter into FilterCallParameterSets."""

@staticmethod
def _exception_message_for_incorrect_format(element_name: str) -> str:
return (
f"Name is in an incorrect format: '{element_name}'. It should be of the form: "
f"<primary entity name>__<dimension_name>"
)

@staticmethod
def parse_call_parameter_sets(where_sql_template: str) -> FilterCallParameterSets:
"""Return the result of extracting the semantic objects referenced in the where SQL template string."""
Expand All @@ -38,10 +50,19 @@ def parse_call_parameter_sets(where_sql_template: str) -> FilterCallParameterSet

def _dimension_call(dimension_name: str, entity_path: Sequence[str] = ()) -> str:
"""Gets called by Jinja when rendering {{ dimension(...) }}."""
group_by_item_name = DunderedNameFormatter.parse_name(dimension_name)
if len(group_by_item_name.entity_links) != 1:
raise ParseWhereFilterException(
WhereFilterParser._exception_message_for_incorrect_format(dimension_name)
)

dimension_call_parameter_sets.append(
DimensionCallParameterSet(
dimension_reference=DimensionReference(element_name=dimension_name),
entity_path=tuple(EntityReference(element_name=arg) for arg in entity_path),
dimension_reference=DimensionReference(element_name=group_by_item_name.element_name),
entity_path=(
tuple(EntityReference(element_name=arg) for arg in entity_path)
+ group_by_item_name.entity_links
),
)
)
return _DUMMY_PLACEHOLDER
Expand All @@ -50,17 +71,46 @@ def _time_dimension_call(
time_dimension_name: str, time_granularity_name: str, entity_path: Sequence[str] = ()
) -> str:
"""Gets called by Jinja when rendering {{ time_dimension(...) }}."""
group_by_item_name = DunderedNameFormatter.parse_name(time_dimension_name)

# metric_time is the only time dimension that does not have an associated primary entity, so the
# GroupByItemName would not have any entity links.
if is_metric_time_name(group_by_item_name.element_name):
if len(group_by_item_name.entity_links) != 0 or group_by_item_name.time_granularity is not None:
raise ParseWhereFilterException(
WhereFilterParser._exception_message_for_incorrect_format(
f"Name is in an incorrect format: {time_dimension_name} "
f"When referencing {METRIC_TIME_ELEMENT_NAME}, the name should not have any dunders."
)
)

else:
if len(group_by_item_name.entity_links) != 1 or group_by_item_name.time_granularity is not None:
raise ParseWhereFilterException(
WhereFilterParser._exception_message_for_incorrect_format(time_dimension_name)
)

time_dimension_call_parameter_sets.append(
TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference(element_name=time_dimension_name),
entity_path=tuple(EntityReference(element_name=arg) for arg in entity_path),
time_dimension_reference=TimeDimensionReference(element_name=group_by_item_name.element_name),
entity_path=(
tuple(EntityReference(element_name=arg) for arg in entity_path)
+ group_by_item_name.entity_links
),
time_granularity=TimeGranularity(time_granularity_name),
)
)
return _DUMMY_PLACEHOLDER

def _entity_call(entity_name: str, entity_path: Sequence[str] = ()) -> str:
"""Gets called by Jinja when rendering {{ entity(...) }}."""
group_by_item_name = DunderedNameFormatter.parse_name(entity_name)
if len(group_by_item_name.entity_links) > 0 or group_by_item_name.time_granularity is not None:
WhereFilterParser._exception_message_for_incorrect_format(
f"Name is in an incorrect format: {entity_name} "
f"When referencing entities, the name should not have any dunders."
)

entity_call_parameter_sets.append(
EntityCallParameterSet(
entity_path=tuple(EntityReference(element_name=arg) for arg in entity_path),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ metric:
type_params:
measure:
name: booking_value
filter: "{{ dimension('is_instant') }}"
filter: "{{ dimension('booking__is_instant') }}"
---
metric:
name: "average_instant_booking_value"
Expand All @@ -63,7 +63,7 @@ metric:
type_params:
measure:
name: average_booking_value
filter: "{{ dimension('is_instant') }}"
filter: "{{ dimension('booking__is_instant') }}"
---
metric:
name: "booking_value_for_non_null_listing_id"
Expand Down Expand Up @@ -113,7 +113,7 @@ metric:
type_params:
measure:
name: listings
filter: "{{ dimension('is_lux_latest') }}"
filter: "{{ dimension('listing__is_lux_latest') }}"
---
metric:
name: "smallest_listing"
Expand Down Expand Up @@ -276,7 +276,7 @@ metric:
type_params:
numerator:
name: average_booking_value
filter: "{{ dimension('is_instant') }}"
filter: "{{ dimension('booking__is_instant') }}"
denominator:
name: max_booking_value
---
Expand All @@ -289,7 +289,7 @@ metric:
type_params:
numerator:
name: average_booking_value
filter: "{{ dimension('is_lux_latest', entity_path=['listing']) }}"
filter: "{{ dimension('listing__is_lux_latest', entity_path=['listing']) }}"
denominator:
name: max_booking_value
---
Expand All @@ -303,9 +303,9 @@ metric:
expr: "average_booking_value * bookings / NULLIF(booking_value, 0)"
metrics:
- name: average_booking_value
filter: "{{ dimension('is_lux_latest', entity_path=['listing']) }}"
filter: "{{ dimension('listing__is_lux_latest', entity_path=['listing']) }}"
- name: bookings
filter: "{{ dimension('is_lux_latest', entity_path=['listing']) }}"
filter: "{{ dimension('listing__is_lux_latest', entity_path=['listing']) }}"
- name: booking_value
---
metric:
Expand All @@ -317,7 +317,7 @@ metric:
type_params:
numerator:
name: booking_value
filter: "{{ dimension('is_instant') }}"
filter: "{{ dimension('booking__is_instant') }}"
alias: booking_value_with_is_instant_constraint
denominator:
name: booking_value
Expand All @@ -331,11 +331,11 @@ metric:
type_params:
numerator:
name: total_account_balance_first_day
filter: "{{ dimension('home_state_latest', entity_path=['user']) }} IN ('CA', 'HI', 'WA')"
filter: "{{ dimension('user__home_state_latest') }} IN ('CA', 'HI', 'WA')"
alias: west_coast_balance_first_day
denominator:
name: total_account_balance_first_day
filter: "{{ dimension('home_state_latest', entity_path=['user']) }} IN ('MD', 'NY', 'TX')"
filter: "{{ dimension('user__home_state_latest') }} IN ('MD', 'NY', 'TX')"
alias: east_coast_balance_first_dat
---
metric:
Expand All @@ -347,7 +347,7 @@ metric:
expr: delayed_bookings * 2
metrics:
- name: bookings
filter: "NOT {{ dimension('is_instant') }}"
filter: "NOT {{ dimension('booking__is_instant') }}"
alias: delayed_bookings
---
metric:
Expand Down Expand Up @@ -398,7 +398,7 @@ metric:
- name: bookings
- name: listings
alias: lux_listing
filter: "{{ dimension('is_lux_latest') }}"
filter: "{{ dimension('listing__is_lux_latest') }}"
---
metric:
name: "instant_plus_non_referred_bookings_pct"
Expand Down
Loading

0 comments on commit 906b587

Please sign in to comment.