Skip to content

Commit

Permalink
Enable custom granularity inputs for filters (#346)
Browse files Browse the repository at this point in the history
We are enabling support for custom granularity, which means
users will need to be able to reference them in where filters and
other contexts where their input representations of their manifest
elements will need to be parsed.

Historically, we have restricted time granularities to an enumerated
set of standard supported grains. With the addition of custom
granularities we must now admit the possibility of arbitrary string
names representing valid time granularities.

This change updates the interfaces for parsing time dimensions out
of filter expressions to allow for arbitrary string inputs representing
custom granularity names.

For various reasons - mainly having to do with the filter parser
not having access to the semantic manifest - we will NOT be able
to support custom granularity references via dundered string names. So
something like `metric_time__martian_year` will not work, but
the explicit syntax of `Dimension('metric_time').grain('martian_year')`
will be accepted and processed.

Resolves #345
  • Loading branch information
tlento authored Sep 18, 2024
1 parent 47f831e commit ff37d9f
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 32 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240908-221757.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Allow filter parameters to accept custom granularities
time: 2024-09-08T22:17:57.865557-07:00
custom:
Author: tlento
Issue: "345"
3 changes: 1 addition & 2 deletions dbt_semantic_interfaces/call_parameter_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
MetricReference,
TimeDimensionReference,
)
from dbt_semantic_interfaces.type_enums import TimeGranularity
from dbt_semantic_interfaces.type_enums.date_part import DatePart


Expand All @@ -29,7 +28,7 @@ class TimeDimensionCallParameterSet:

entity_path: Tuple[EntityReference, ...]
time_dimension_reference: TimeDimensionReference
time_granularity: Optional[TimeGranularity] = None
time_granularity_name: Optional[str] = None
date_part: Optional[DatePart] = None


Expand Down
6 changes: 5 additions & 1 deletion dbt_semantic_interfaces/naming/dundered.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ class StructuredDunderedName:
element_name: "ds"
granularity: TimeGranularity.WEEK
The time granularity is part of legacy query syntax and there are plans to migrate away from this format.
The time granularity is part of legacy query syntax and there are plans to migrate away from this format. As such,
this will not be updated to allow for custom granularity values. This implies that any query paths that push named
parameters through this class will not support a custom grain reference of the form `metric_time__martian_year`,
and users wishing to use their martian year grain will have to explicitly reference it via a separate parameter
instead of gluing it onto the end of the name.
"""

entity_links: Tuple[EntityReference, ...]
Expand Down
13 changes: 2 additions & 11 deletions dbt_semantic_interfaces/parsing/text_input/ti_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.errors import InvalidQuerySyntax
from dbt_semantic_interfaces.naming.dundered import StructuredDunderedName
from dbt_semantic_interfaces.type_enums import TimeGranularity
from dbt_semantic_interfaces.type_enums.date_part import DatePart


Expand Down Expand Up @@ -65,21 +64,13 @@ def __post_init__(self) -> None: # noqa: D105
raise InvalidQuerySyntax("The entity path should not be specified for a metric.")
if len(structured_item_name.entity_links) > 0:
raise InvalidQuerySyntax("The name of the metric should not have entity links.")
# Check that dimensions / time dimensions have a valid time granularity / date part.
# Check that dimensions / time dimensions have a valid date part.
elif item_type is QueryItemType.DIMENSION or item_type is QueryItemType.TIME_DIMENSION:
if self.time_granularity_name is not None:
valid_time_granularity_names = set(time_granularity.value for time_granularity in TimeGranularity)
if self.time_granularity_name.lower() not in valid_time_granularity_names:
raise InvalidQuerySyntax(
f"{self.time_granularity_name!r} is not a valid time granularity. Valid values are"
f" {valid_time_granularity_names}"
)

if self.date_part_name is not None:
valid_date_part_names = set(date_part.value for date_part in DatePart)
if self.date_part_name.lower() not in set(date_part.value for date_part in DatePart):
raise InvalidQuerySyntax(
f"{self.time_granularity_name!r} is not a valid time granularity. Valid values are"
f"{self.date_part_name!r} is not a valid date part. Valid values are"
f" {valid_date_part_names}"
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,25 @@
MetricReference,
TimeDimensionReference,
)
from dbt_semantic_interfaces.type_enums import TimeGranularity
from dbt_semantic_interfaces.type_enums.date_part import DatePart


class ParameterSetFactory:
"""Creates parameter sets for use in the Jinja sandbox."""
"""Creates parameter sets for use in the Jinja sandbox.
This class does the following:
1. Parses element references (e.g., "{{ Dimension('listing__is_lux') }}") out of where filter expressions
2. Extracts reference attributes (e.g., grain for "{{ TimeDimension('metric_time', grain='martian_year') }}")
3. Allows use of standard time granularities in name strings, (e.g., "{{ Dimension('metric_time__year') }}")
This class does not do direct validation of any custom granularity values, nor does it allow for use of custom
granularities as parts of element reference names. So we can parse "{{ Dimension('shuttle__launch_time__year') }}"
into a valid TimeDimension object with yearly granularity, but we will not correctly parse something like
"{{Dimension('shuttle__launch_time__martian_year')}}" - this will return a dimension named `martian_year` with the
entity link path of ['shuttle', 'launch_time']. Since custom granularity names will not be allowed to be re-used
as dimension names this will fail to match anything defined in the semantic manifest, but the error management
experience will not be as clean and direct as it was for standard granularities.
"""

@staticmethod
def _exception_message_for_incorrect_format(element_name: str) -> str:
Expand All @@ -37,26 +50,51 @@ def create_time_dimension(
entity_path: Sequence[str] = (),
date_part_name: Optional[str] = None,
) -> TimeDimensionCallParameterSet:
"""Gets called by Jinja when rendering {{ TimeDimension(...) }}."""
"""Gets called by Jinja when rendering {{ TimeDimension(...) }}.
There is a lot of strangeness around the time granularity specification here. Historically,
we accepted time dimension names of the form `metric_time__week` or `customer__registration_date__month`
in this interface. We have not yet fully deprecated this, and it's unclear if we ever will.
Key points to note:
1. The time dimension name parsing only accepts standard time granularities. This will not change.
2. The time granularity parameter is what we want everybody to use because it's more explicit.
3. The time granularity parameter will support custom granularities, so that's nice
While this all may seem pretty bad it's not as terrible as all that - this class is only used
for parsing where filters. When we solve the problems with our current where filter spec this will
persist as a backwards compatibility model, but nothing more.
"""
group_by_item_name = DunderedNameFormatter.parse_name(time_dimension_name)
if len(group_by_item_name.entity_links) != 1 and not is_metric_time_name(group_by_item_name.element_name):
raise ParseWhereFilterException(
ParameterSetFactory._exception_message_for_incorrect_format(time_dimension_name)
)
grain_parsed_from_name = group_by_item_name.time_granularity
grain_from_param = TimeGranularity(time_granularity_name) if time_granularity_name else None
if grain_parsed_from_name and grain_from_param and grain_from_param != grain_parsed_from_name:
grain_parsed_from_name = (
group_by_item_name.time_granularity.value if group_by_item_name.time_granularity else None
)
inputs_are_mismatched = (
grain_parsed_from_name is not None
and time_granularity_name is not None
and time_granularity_name != grain_parsed_from_name
)

if inputs_are_mismatched:
raise ParseWhereFilterException(
f"Received different grains in `time_dimension_name` parameter ('{time_dimension_name}') "
f"and `time_granularity_name` parameter ('{time_granularity_name}')."
f"and `time_granularity_name` parameter ('{time_granularity_name}'). Remove the grain suffix "
f"(`{grain_parsed_from_name}`) from the time dimension name and use the `time_granularity_name` "
"parameter to specify the intendend grain."
)

time_granularity_name = grain_parsed_from_name or time_granularity_name

return TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference(element_name=group_by_item_name.element_name),
entity_path=(
tuple(EntityReference(element_name=arg) for arg in entity_path) + group_by_item_name.entity_links
),
time_granularity=grain_parsed_from_name or grain_from_param,
time_granularity_name=time_granularity_name.lower() if time_granularity_name else None,
date_part=DatePart(date_part_name.lower()) if date_part_name else None,
)

Expand Down
12 changes: 6 additions & 6 deletions tests/implementations/where_filter/test_parse_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_extract_dimension_with_grain_call_parameter_sets() -> None: # noqa: D
TimeDimensionCallParameterSet(
entity_path=(),
time_dimension_reference=TimeDimensionReference(element_name="metric_time"),
time_granularity=TimeGranularity.WEEK,
time_granularity_name=TimeGranularity.WEEK.value,
),
),
entity_call_parameter_sets=(),
Expand All @@ -91,7 +91,7 @@ def test_extract_time_dimension_call_parameter_sets() -> None: # noqa: D
EntityReference("listing"),
EntityReference("user"),
),
time_granularity=TimeGranularity.MONTH,
time_granularity_name=TimeGranularity.MONTH.value,
),
)
)
Expand All @@ -110,7 +110,7 @@ def test_extract_time_dimension_call_parameter_sets() -> None: # noqa: D
EntityReference("listing"),
EntityReference("user"),
),
time_granularity=TimeGranularity.MONTH,
time_granularity_name=TimeGranularity.MONTH.value,
),
)
)
Expand All @@ -126,7 +126,7 @@ def test_extract_metric_time_dimension_call_parameter_sets() -> None: # noqa: D
TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference(element_name="metric_time"),
entity_path=(),
time_granularity=TimeGranularity.MONTH,
time_granularity_name=TimeGranularity.MONTH.value,
),
)
)
Expand Down Expand Up @@ -216,7 +216,7 @@ def test_where_filter_interesection_extract_call_parameter_sets() -> None:
TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference(element_name="metric_time"),
entity_path=(),
time_granularity=TimeGranularity.MONTH,
time_granularity_name=TimeGranularity.MONTH.value,
),
)
)
Expand Down Expand Up @@ -269,7 +269,7 @@ def test_time_dimension_without_granularity() -> None: # noqa: D
TimeDimensionCallParameterSet(
entity_path=(EntityReference("booking"),),
time_dimension_reference=TimeDimensionReference(element_name="created_at"),
time_granularity=None,
time_granularity_name=None,
),
),
entity_call_parameter_sets=(),
Expand Down
4 changes: 2 additions & 2 deletions tests/parsing/test_object_builder_item_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ def test_valid_object_builder_items() -> None: # noqa: D
valid_items = (
"Dimension('listing__created_at', entity_path=['host'])",
"Dimension('listing__created_at', entity_path=['host']).grain('day').date_part('day')",
"Dimension('metric_time').grain('martian_year')",
"TimeDimension('listing__created_at', time_granularity_name='day', entity_path=['host'], date_part_name='day')",
"Entity('listing__created_at', entity_path=['host'])",
"Metric('bookings', group_by=['listing__created_at'])",
"TimeDimension('metric_time', time_granularity_name='martian_year')",
)
for valid_item in valid_items:
logger.info(f"Checking {valid_item=}")
Expand All @@ -32,9 +34,7 @@ def test_invalid_object_builder_items() -> None: # noqa: D
text_processor = ObjectBuilderTextProcessor()

invalid_items = (
"Dimension('listing__created_at').grain('invalid')",
"Dimension('listing__created_at').date_part('invalid')",
"TimeDimension('listing__created_at', 'invalid', 'day')",
"TimeDimension('listing__created_at', 'day', date_part_name='invalid')",
"TimeDimension('listing__created_at', 'day', date_part_name='month').grain('month')",
"TimeDimension('listing__created_at', 'day', date_part_name='month').date_part('month')",
Expand Down
4 changes: 2 additions & 2 deletions tests/parsing/test_where_filter_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,15 @@ def test_dimension_date_part() -> None: # noqa
TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference("metric_time"),
entity_path=(),
time_granularity=TimeGranularity.WEEK,
time_granularity_name=TimeGranularity.WEEK.value,
),
),
(
"{{ TimeDimension('metric_time', time_granularity_name='week') }} > '2023-01-01'",
TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference("metric_time"),
entity_path=(),
time_granularity=TimeGranularity.WEEK,
time_granularity_name=TimeGranularity.WEEK.value,
),
),
],
Expand Down

0 comments on commit ff37d9f

Please sign in to comment.