Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for Dimension.grain(...) in where/filter #152

Merged
merged 17 commits into from
Sep 20, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
added support for Dimension(...).grain
DevonFulcher committed Sep 15, 2023
commit 7faca7078511fc720a3137844c81cf8d238b799c
6 changes: 3 additions & 3 deletions dbt_semantic_interfaces/call_parameter_sets.py
Original file line number Diff line number Diff line change
@@ -13,15 +13,15 @@

@dataclass(frozen=True)
class DimensionCallParameterSet:
"""When 'dimension(...)' is used in the Jinja template of the where filter, the parameters to that call."""
"""When 'Dimension(...)' is used in the Jinja template of the where filter, the parameters to that call."""

entity_path: Tuple[EntityReference, ...]
dimension_reference: DimensionReference


@dataclass(frozen=True)
class TimeDimensionCallParameterSet:
"""When 'time_dimension(...)' is used in the Jinja template of the where filter, the parameters to that call."""
"""When 'TimeDimension(...)' is used in the Jinja template of the where filter, the parameters to that call."""

entity_path: Tuple[EntityReference, ...]
time_dimension_reference: TimeDimensionReference
@@ -30,7 +30,7 @@ class TimeDimensionCallParameterSet:

@dataclass(frozen=True)
class EntityCallParameterSet:
"""When 'entity(...)' is used in the Jinja template of the where filter, the parameters to that call."""
"""When 'Entity(...)' is used in the Jinja template of the where filter, the parameters to that call."""

entity_path: Tuple[EntityReference, ...]
entity_reference: EntityReference
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@
PydanticCustomInputParser,
PydanticParseableValueType,
)
from dbt_semantic_interfaces.parsing.where_filter_parser import WhereFilterParser
from dbt_semantic_interfaces.parsing.where_filter.where_filter_parser import WhereFilterParser


class PydanticWhereFilter(PydanticCustomInputParser, HashableBaseModel):
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from typing import Sequence

from dbt_semantic_interfaces.call_parameter_sets import (
DimensionCallParameterSet,
EntityCallParameterSet,
ParseWhereFilterException,
TimeDimensionCallParameterSet,
)
from dbt_semantic_interfaces.naming.dundered import DunderedNameFormatter
from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME, is_metric_time_name
from dbt_semantic_interfaces.references import DimensionReference, EntityReference, TimeDimensionReference
from dbt_semantic_interfaces.type_enums import TimeGranularity


class ParameterSetFactory:
@staticmethod
def _exception_message_for_incorrect_format(element_name: str) -> str:
return (
f"Name is in an incorrect format: '{element_name}'. It should be of the form: "
f"<primary entity name>__<dimension_name>"
)

@staticmethod
def create_time_dimension(
time_dimension_name: str, time_granularity_name: str, entity_path: Sequence[str] = ()
) -> TimeDimensionCallParameterSet:
"""Gets called by Jinja when rendering {{ TimeDimension(...) }}."""
group_by_item_name = DunderedNameFormatter.parse_name(time_dimension_name)

# metric_time is the only time dimension that does not have an associated primary entity, so the
# GroupByItemName would not have any entity links.
if is_metric_time_name(group_by_item_name.element_name):
if len(group_by_item_name.entity_links) != 0 or group_by_item_name.time_granularity is not None:
raise ParseWhereFilterException(
f"Name is in an incorrect format: {time_dimension_name} "
f"When referencing {METRIC_TIME_ELEMENT_NAME}, the name should not have any dunders."
DevonFulcher marked this conversation as resolved.
Show resolved Hide resolved
)
else:
if len(group_by_item_name.entity_links) != 1 or group_by_item_name.time_granularity is not None:
raise ParseWhereFilterException(
ParameterSetFactory._exception_message_for_incorrect_format(time_dimension_name)
)

return TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference(element_name=group_by_item_name.element_name),
entity_path=(
tuple(EntityReference(element_name=arg) for arg in entity_path) + group_by_item_name.entity_links
),
time_granularity=TimeGranularity(time_granularity_name),
)

@staticmethod
def create_dimension(dimension_name: str, entity_path: Sequence[str] = ()) -> DimensionCallParameterSet:
"""Gets called by Jinja when rendering {{ Dimension(...) }}."""
group_by_item_name = DunderedNameFormatter.parse_name(dimension_name)
if is_metric_time_name(group_by_item_name.element_name):
raise ParseWhereFilterException(
f"{METRIC_TIME_ELEMENT_NAME} is a time dimension, so it should be referenced using "
f"TimeDimension(...)"
DevonFulcher marked this conversation as resolved.
Show resolved Hide resolved
)

if len(group_by_item_name.entity_links) != 1:
raise ParseWhereFilterException(ParameterSetFactory._exception_message_for_incorrect_format(dimension_name))

return DimensionCallParameterSet(
dimension_reference=DimensionReference(element_name=group_by_item_name.element_name),
entity_path=(
tuple(EntityReference(element_name=arg) for arg in entity_path) + group_by_item_name.entity_links
),
)

@staticmethod
def create_entity(entity_name: str, entity_path: Sequence[str] = ()) -> EntityCallParameterSet:
"""Gets called by Jinja when rendering {{ Entity(...) }}."""
group_by_item_name = DunderedNameFormatter.parse_name(entity_name)
if len(group_by_item_name.entity_links) > 0 or group_by_item_name.time_granularity is not None:
ParameterSetFactory._exception_message_for_incorrect_format(
f"Name is in an incorrect format: {entity_name} "
f"When referencing entities, the name should not have any dunders."
DevonFulcher marked this conversation as resolved.
Show resolved Hide resolved
)

return EntityCallParameterSet(
entity_path=tuple(EntityReference(element_name=arg) for arg in entity_path),
entity_reference=EntityReference(element_name=entity_name),
)
65 changes: 65 additions & 0 deletions dbt_semantic_interfaces/parsing/where_filter/query_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from __future__ import annotations

from typing import Protocol, Sequence


DevonFulcher marked this conversation as resolved.
Show resolved Hide resolved
class QueryInterfaceDimension(Protocol):
"""Represents the interface for Dimension in the query interface."""

def grain(self, _grain: str) -> QueryInterfaceDimension:
"""The time granularity."""
raise NotImplementedError
DevonFulcher marked this conversation as resolved.
Show resolved Hide resolved

def alias(self, _alias: str) -> QueryInterfaceDimension:
"""Renaming the column."""
raise NotImplementedError


class QueryInterfaceDimensionFactory(Protocol):
"""Creates a Dimension for the query interface.

Represented as the Dimension constructor in the Jinja sandbox.
"""

def create(self, name: str, entity_path: Sequence[str] = ()) -> QueryInterfaceDimension:
"""Create a QueryInterfaceDimension."""
raise NotImplementedError


class QueryInterfaceTimeDimension(Protocol):
"""Represents the interface for TimeDimension in the query interface."""

pass


class QueryInterfaceTimeDimensionFactory(Protocol):
"""Creates a TimeDimension for the query interface.

Represented as the TimeDimension constructor in the Jinja sandbox.
"""

def create(
self,
time_dimension_name: str,
time_granularity_name: str,
entity_path: Sequence[str] = (),
) -> QueryInterfaceTimeDimension:
"""Create a TimeDimension."""
raise NotImplementedError


class QueryInterfaceEntity(Protocol):
"""Represents the interface for Entity in the query interface."""

pass


class QueryInterfaceEntityFactory(Protocol):
"""Creates an Entity for the query interface.

Represented as the Entity constructor in the Jinja sandbox.
"""

def create(self, entity_name: str, entity_path: Sequence[str] = ()) -> QueryInterfaceEntity:
"""Create an Entity."""
raise NotImplementedError
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from __future__ import annotations

from typing import List, Optional, Sequence

from dbt_semantic_interfaces.call_parameter_sets import (
DimensionCallParameterSet,
ParseWhereFilterException,
TimeDimensionCallParameterSet,
)
from dbt_semantic_interfaces.naming.dundered import DunderedNameFormatter
from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME, is_metric_time_name
from dbt_semantic_interfaces.parsing.where_filter.parameter_set_factory import ParameterSetFactory
from dbt_semantic_interfaces.parsing.where_filter.query_interface import (
QueryInterfaceDimension,
QueryInterfaceDimensionFactory,
)
from dbt_semantic_interfaces.parsing.where_filter.where_filter_error import WhereFilterError
from dbt_semantic_interfaces.protocols.protocol_hint import ProtocolHint
from dbt_semantic_interfaces.references import DimensionReference, EntityReference
from typing_extensions import override

from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity


class WhereFilterDimension(ProtocolHint[QueryInterfaceDimension]):
"""A dimension that is passed in through the where filter parameter."""

@override
def _implements_protocol(self) -> QueryInterfaceDimension:
return self

def __init__( # noqa
self,
name: str,
entity_path: Sequence[str],
time_dimension_call_parameter_sets: List[TimeDimensionCallParameterSet],
):
self.name = name
self.entity_path = entity_path
self._time_dimension_call_parameter_sets = time_dimension_call_parameter_sets
self.time_granularity: Optional[TimeGranularity] = None

def grain(self, time_granularity: str) -> QueryInterfaceDimension:
"""The time granularity."""
self.time_granularity = TimeGranularity(time_granularity)
self._time_dimension_call_parameter_sets.append(
Copy link
Collaborator

@QMalcolm QMalcolm Sep 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me know if I'm getting out pocket on this 🙃 I'm likely biased from my personal preference for functional programming, but I found chain of passing the pointer for time_dimension_factory.time_dimension_call_parameter_sets odd. Additionally, until I followed the chain I was confused as to why in the where_filter_parser dimension_parameter_sets were getting skipped unless they didn't have a time_granularity specified. That is to say it appeared that the dimension_parameter_sets with time_granularity specified were getting trashed. I understand how it works now, and it's elegant in an OOP way. I do worry it's more complex than it needs to be though and that it will be harder for those unfamiliar with the code to approach. Additionally, thinking more about it, a question in my mind came up of what would happen if someone did the following

{{ Dimension('<some_dimension>').grain('day').grain('week') }}

This chain wouldn't break on parsing and it would add the dimension to the time_dimension_call_parameter_sets twice, once with granularity DAY and once with granularity WEEK. Maybe that is what we want, but I don't think we do? My assumption here is that we'd actually just want it added once with granularity WEEK.

A way to simplify things and to solve the duplication problem would be to not have WhereFilterDimensions directly modify the time _dimension_call_parameter_sets. Instead when grain is called it could just generate a TimeDimensionCallParameterSet and store it on a self property (maybe something like self.time_dimension_call_parameter_set which is defaulted to None on instantiation of the WhereFilterDimsnion. Then in parse_call_parameter_sets when we iterate over dimension_factory.created we'd separate the created parameter sets into two lists, one of DimensionCallParameterSets and the other of TimeDimensionCallParameterSet. The latter of which would be joined to the time_dimension_factory.created in the return statement.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was confused as to why in the where_filter_parser dimension_parameter_sets were getting skipped unless they didn't have a time_granularity specified

I will add comments to this section and look for other opportunities to add comments.

{{ Dimension('<some_dimension>').grain('day').grain('week') }}

Good call. I hadn't considered that edge case.

A way simplify things and to solve the duplication problem...

Good idea. I had implemented something similar to this, but I wasn't sure if the order of time_dimension_call_parameter_sets mattered, so I refactored to this current approach. If we did what you are suggesting, then the TimeDimensionCallParameterSets that are generated from this syntax Dimension('metric_time').grain('day') would always be appended after the ones that are generated from the TimeDimension syntax. Paul mentioned that the order doesn't matter (but I had already completed the refactor), so I think I will go ahead and implement what you have suggested.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah +1 to @QMalcolm here, looking forward to the update!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe I have addressed all of your concerns. Let me know if you would prefer a different approach.

ParameterSetFactory.create_time_dimension(self.name, self.entity_path, self.time_granularity)
)

def alias(self, _alias: str) -> QueryInterfaceDimension:
"""Renaming the column."""
raise NotImplementedError
DevonFulcher marked this conversation as resolved.
Show resolved Hide resolved


class WhereFilterDimensionFactory(ProtocolHint[QueryInterfaceDimensionFactory]):
"""Creates a WhereFilterDimension.

Each call to `create` adds a WhereFilterDimension to created.
"""

@override
def _implements_protocol(self) -> QueryInterfaceDimensionFactory:
return self

def __init__(self, time_dimension_call_parameter_sets: List[TimeDimensionCallParameterSet]): # noqa
self.dimension_call_parameter_sets: List[DimensionCallParameterSet] = []
self.created: List[WhereFilterDimension] = []
self._time_dimension_call_parameter_sets = time_dimension_call_parameter_sets

def create(self, dimension_name: str, entity_path: Sequence[str] = ()) -> WhereFilterDimension:
"""Gets called by Jinja when rendering {{ Dimension(...) }}."""
dimension = WhereFilterDimension(dimension_name, entity_path, self._time_dimension_call_parameter_sets)
self.created.append(dimension)
return dimension
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from __future__ import annotations

from typing import List, Sequence

from dbt_semantic_interfaces.call_parameter_sets import EntityCallParameterSet
from dbt_semantic_interfaces.parsing.where_filter.parameter_set_factory import ParameterSetFactory
from dbt_semantic_interfaces.parsing.where_filter.query_interface import (
QueryInterfaceEntity,
QueryInterfaceEntityFactory,
)
from dbt_semantic_interfaces.protocols.protocol_hint import ProtocolHint
from typing_extensions import override


class EntityStub(ProtocolHint[QueryInterfaceEntity]):
@override
def _implements_protocol(self) -> QueryInterfaceEntity:
return self


class WhereFilterEntityFactory(ProtocolHint[QueryInterfaceEntityFactory]):
@override
def _implements_protocol(self) -> QueryInterfaceEntityFactory:
return self

def __init__(self): # noqa
self.entity_call_parameter_sets: List[EntityCallParameterSet] = []

def create(self, entity_name: str, entity_path: Sequence[str] = ()) -> EntityStub:
"""Gets called by Jinja when rendering {{ Entity(...) }}."""
self.entity_call_parameter_sets.append(ParameterSetFactory.create_entity(entity_name, entity_path))
return EntityStub()
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from __future__ import annotations
from dbt_semantic_interfaces.parsing.where_filter.parameter_set_factory import ParameterSetFactory

from dbt_semantic_interfaces.parsing.where_filter.where_filter_dimension import WhereFilterDimensionFactory
from dbt_semantic_interfaces.parsing.where_filter.where_filter_entity import WhereFilterEntityFactory
from dbt_semantic_interfaces.parsing.where_filter.where_filter_time_dimension import WhereFilterTimeDimensionFactory

from jinja2 import StrictUndefined
from jinja2.exceptions import SecurityError, TemplateSyntaxError, UndefinedError
from jinja2.sandbox import SandboxedEnvironment

from dbt_semantic_interfaces.call_parameter_sets import (
FilterCallParameterSets,
ParseWhereFilterException,
)


class WhereFilterParser:
"""Parses the template in the WhereFilter into FilterCallParameterSets."""

@staticmethod
def parse_call_parameter_sets(where_sql_template: str) -> FilterCallParameterSets:
"""Return the result of extracting the semantic objects referenced in the where SQL template string."""
time_dimension_factory = WhereFilterTimeDimensionFactory()
dimension_factory = WhereFilterDimensionFactory(time_dimension_factory.time_dimension_call_parameter_sets)
entity_factory = WhereFilterEntityFactory()

try:
# the string that the sandbox renders is unused
SandboxedEnvironment(undefined=StrictUndefined).from_string(where_sql_template).render(
Dimension=dimension_factory.create,
TimeDimension=time_dimension_factory.create,
Entity=entity_factory.create,
)
except (UndefinedError, TemplateSyntaxError, SecurityError) as e:
raise ParseWhereFilterException(f"Error while parsing Jinja template:\n{where_sql_template}") from e

dimension_parameter_sets = []
for dimension in dimension_factory.created:
if not dimension.time_granularity:
param_set = ParameterSetFactory.create_dimension(dimension.name, dimension.entity_path)
dimension_parameter_sets.append(param_set)

return FilterCallParameterSets(
dimension_call_parameter_sets=tuple(dimension_parameter_sets),
time_dimension_call_parameter_sets=tuple(time_dimension_factory.time_dimension_call_parameter_sets),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this reference passing and hidden update extensions be addressed by the approach @QMalcolm suggests? I think it will. If so that's pretty nice, because this is confusing - it looks as if we skip any time dimensions defined through the Dimension() syntax, but that isn't the case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I have fixed your concern, but please let me know if you prefer a different approach. I don't think there is a way to implement this without the mutable WhereFilterTimeDimensionFactory.time_dimension_call_parameter_sets, WhereFilterEntityFactory.entity_call_parameter_sets, & WhereFilterDimensionFactory.created properties given the protocols that we want to implement.

entity_call_parameter_sets=tuple(entity_factory.entity_call_parameter_sets),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from __future__ import annotations

from typing import List, Sequence

from dbt_semantic_interfaces.call_parameter_sets import (
TimeDimensionCallParameterSet,
)
from dbt_semantic_interfaces.parsing.where_filter.parameter_set_factory import ParameterSetFactory
from dbt_semantic_interfaces.parsing.where_filter.query_interface import (
QueryInterfaceTimeDimension,
QueryInterfaceTimeDimensionFactory,
)
from dbt_semantic_interfaces.protocols.protocol_hint import ProtocolHint
from typing_extensions import override


class TimeDimensionStub(ProtocolHint[QueryInterfaceTimeDimension]):
@override
def _implements_protocol(self) -> QueryInterfaceTimeDimension:
return self


class WhereFilterTimeDimensionFactory(ProtocolHint[QueryInterfaceTimeDimensionFactory]):
@override
def _implements_protocol(self) -> QueryInterfaceTimeDimensionFactory:
return self

def __init__(self): # noqa
self.time_dimension_call_parameter_sets: List[TimeDimensionCallParameterSet] = []

def create(
self, time_dimension_name: str, time_granularity_name: str, entity_path: Sequence[str] = ()
) -> TimeDimensionStub:
"""Gets called by Jinja when rendering {{ TimeDimension(...) }}."""
self.time_dimension_call_parameter_sets.append(
ParameterSetFactory.create_time_dimension(time_dimension_name, time_granularity_name, entity_path)
)
return TimeDimensionStub()
138 changes: 0 additions & 138 deletions dbt_semantic_interfaces/parsing/where_filter_parser.py

This file was deleted.