From 88f38738297544863375fa74cdff926e34e1caa7 Mon Sep 17 00:00:00 2001 From: tlento Date: Sun, 8 Sep 2024 22:10:02 -0700 Subject: [PATCH 1/3] Enable custom granularity inputs for filters We are enabling support for custom granularity, which means users will need to be able to reference them in where filters and other contexts where their input representations of their manifest elements will need to be parsed. Historically, we have restricted time granularities to an enumerated set of standard supported grains. With the addition of custom granularities we must now admit the possibility of arbitrary string names representing valid time granularities. This change updates the interfaces for parsing time dimensions out of filter expressions to allow for arbitrary string inputs representing custom granularity names. For various reasons - mainly having to do with the filter parser not having access to the semantic manifest - we will NOT be able to support custom granularity references via dundered string names. So something like `metric_time__martian_year` will not work, but the explicit syntax of `Dimension('metric_time').grain('martian_year')` will be accepted and processed. --- .../unreleased/Features-20240908-221757.yaml | 6 +++ .../call_parameter_sets.py | 3 +- dbt_semantic_interfaces/naming/dundered.py | 6 ++- .../parsing/text_input/ti_description.py | 13 +----- .../where_filter/parameter_set_factory.py | 41 +++++++++++++++---- .../where_filter/test_parse_calls.py | 12 +++--- .../test_object_builder_item_description.py | 4 +- tests/parsing/test_where_filter_parsing.py | 4 +- 8 files changed, 58 insertions(+), 31 deletions(-) create mode 100644 .changes/unreleased/Features-20240908-221757.yaml diff --git a/.changes/unreleased/Features-20240908-221757.yaml b/.changes/unreleased/Features-20240908-221757.yaml new file mode 100644 index 00000000..b277a0c8 --- /dev/null +++ b/.changes/unreleased/Features-20240908-221757.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Allow filter parameters to accept custom granularities +time: 2024-09-08T22:17:57.865557-07:00 +custom: + Author: tlento + Issue: "345" diff --git a/dbt_semantic_interfaces/call_parameter_sets.py b/dbt_semantic_interfaces/call_parameter_sets.py index 0520ac46..a018e447 100644 --- a/dbt_semantic_interfaces/call_parameter_sets.py +++ b/dbt_semantic_interfaces/call_parameter_sets.py @@ -10,7 +10,6 @@ MetricReference, TimeDimensionReference, ) -from dbt_semantic_interfaces.type_enums import TimeGranularity from dbt_semantic_interfaces.type_enums.date_part import DatePart @@ -29,7 +28,7 @@ class TimeDimensionCallParameterSet: entity_path: Tuple[EntityReference, ...] time_dimension_reference: TimeDimensionReference - time_granularity: Optional[TimeGranularity] = None + time_granularity_name: Optional[str] = None date_part: Optional[DatePart] = None diff --git a/dbt_semantic_interfaces/naming/dundered.py b/dbt_semantic_interfaces/naming/dundered.py index 31ff2193..1095d3a3 100644 --- a/dbt_semantic_interfaces/naming/dundered.py +++ b/dbt_semantic_interfaces/naming/dundered.py @@ -20,7 +20,11 @@ class StructuredDunderedName: element_name: "ds" granularity: TimeGranularity.WEEK - The time granularity is part of legacy query syntax and there are plans to migrate away from this format. + The time granularity is part of legacy query syntax and there are plans to migrate away from this format. As such, + this will not be updated to allow for custom granularity values. This implies that any query paths that push named + parameters through this class will not support a custom grain reference of the form `metric_time__martian_year`, + and users wishing to use their martian year grain will have to explicitly reference it via a separate parameter + instead of gluing it onto the end of the name. """ entity_links: Tuple[EntityReference, ...] diff --git a/dbt_semantic_interfaces/parsing/text_input/ti_description.py b/dbt_semantic_interfaces/parsing/text_input/ti_description.py index 6477b4f1..62fff662 100644 --- a/dbt_semantic_interfaces/parsing/text_input/ti_description.py +++ b/dbt_semantic_interfaces/parsing/text_input/ti_description.py @@ -7,7 +7,6 @@ from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.errors import InvalidQuerySyntax from dbt_semantic_interfaces.naming.dundered import StructuredDunderedName -from dbt_semantic_interfaces.type_enums import TimeGranularity from dbt_semantic_interfaces.type_enums.date_part import DatePart @@ -65,21 +64,13 @@ def __post_init__(self) -> None: # noqa: D105 raise InvalidQuerySyntax("The entity path should not be specified for a metric.") if len(structured_item_name.entity_links) > 0: raise InvalidQuerySyntax("The name of the metric should not have entity links.") - # Check that dimensions / time dimensions have a valid time granularity / date part. + # Check that dimensions / time dimensions have a valid date part. elif item_type is QueryItemType.DIMENSION or item_type is QueryItemType.TIME_DIMENSION: - if self.time_granularity_name is not None: - valid_time_granularity_names = set(time_granularity.value for time_granularity in TimeGranularity) - if self.time_granularity_name.lower() not in valid_time_granularity_names: - raise InvalidQuerySyntax( - f"{self.time_granularity_name!r} is not a valid time granularity. Valid values are" - f" {valid_time_granularity_names}" - ) - if self.date_part_name is not None: valid_date_part_names = set(date_part.value for date_part in DatePart) if self.date_part_name.lower() not in set(date_part.value for date_part in DatePart): raise InvalidQuerySyntax( - f"{self.time_granularity_name!r} is not a valid time granularity. Valid values are" + f"{self.date_part_name!r} is not a valid date part. Valid values are" f" {valid_date_part_names}" ) diff --git a/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py b/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py index c1e84e60..b1fd25d6 100644 --- a/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py +++ b/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py @@ -16,12 +16,19 @@ MetricReference, TimeDimensionReference, ) -from dbt_semantic_interfaces.type_enums import TimeGranularity from dbt_semantic_interfaces.type_enums.date_part import DatePart class ParameterSetFactory: - """Creates parameter sets for use in the Jinja sandbox.""" + """Creates parameter sets for use in the Jinja sandbox. + + This is ONLY to be used for parsing where filters, and then only for the purposes of extracting + some basic information about which elements are being accessed in the filter expression in the + small number of contexts where a more complete semantic layer implementation is not available. + + In practice, today, this is used by the dbt core parser, which cannot take on a MetricFlow + dependency, in order to provide some DAG annotations around elements referenced in where filters. + """ @staticmethod def _exception_message_for_incorrect_format(element_name: str) -> str: @@ -37,26 +44,46 @@ def create_time_dimension( entity_path: Sequence[str] = (), date_part_name: Optional[str] = None, ) -> TimeDimensionCallParameterSet: - """Gets called by Jinja when rendering {{ TimeDimension(...) }}.""" + """Gets called by Jinja when rendering {{ TimeDimension(...) }}. + + There is a lot of strangeness around the time granularity specification here. Historically, + we accepted time dimension names of the form `metric_time__week` or `customer__registration_date__month` + in this interface. We have not yet fully deprecated this, and it's unclear if we ever will. + + Key points to note: + 1. The time dimension name parsing only accepts standard time granularities. This will not change. + 2. The time granularity parameter is what we want everybody to use because it's more explicit. + 3. The time granularity parameter will support custom granularities, so that's nice + + While this all may seem pretty bad it's not as terrible as all that - this class is only used + for parsing where filters. When we solve the problems with our current where filter spec this will + persist as a backwards compatibility model, but nothing more. + """ group_by_item_name = DunderedNameFormatter.parse_name(time_dimension_name) if len(group_by_item_name.entity_links) != 1 and not is_metric_time_name(group_by_item_name.element_name): raise ParseWhereFilterException( ParameterSetFactory._exception_message_for_incorrect_format(time_dimension_name) ) - grain_parsed_from_name = group_by_item_name.time_granularity - grain_from_param = TimeGranularity(time_granularity_name) if time_granularity_name else None + grain_parsed_from_name = ( + group_by_item_name.time_granularity.value if group_by_item_name.time_granularity else None + ) + grain_from_param = time_granularity_name if grain_parsed_from_name and grain_from_param and grain_from_param != grain_parsed_from_name: raise ParseWhereFilterException( f"Received different grains in `time_dimension_name` parameter ('{time_dimension_name}') " - f"and `time_granularity_name` parameter ('{time_granularity_name}')." + f"and `time_granularity_name` parameter ('{time_granularity_name}'). Remove the grain suffix " + f"(`{grain_parsed_from_name}`) from the time dimension name and use the `time_granularity_name` " + "parameter to specify the intendend grain." ) + time_granularity_name = grain_parsed_from_name or grain_from_param + return TimeDimensionCallParameterSet( time_dimension_reference=TimeDimensionReference(element_name=group_by_item_name.element_name), entity_path=( tuple(EntityReference(element_name=arg) for arg in entity_path) + group_by_item_name.entity_links ), - time_granularity=grain_parsed_from_name or grain_from_param, + time_granularity_name=time_granularity_name.lower() if time_granularity_name else None, date_part=DatePart(date_part_name.lower()) if date_part_name else None, ) diff --git a/tests/implementations/where_filter/test_parse_calls.py b/tests/implementations/where_filter/test_parse_calls.py index 0fcad048..2a7f9e89 100644 --- a/tests/implementations/where_filter/test_parse_calls.py +++ b/tests/implementations/where_filter/test_parse_calls.py @@ -69,7 +69,7 @@ def test_extract_dimension_with_grain_call_parameter_sets() -> None: # noqa: D TimeDimensionCallParameterSet( entity_path=(), time_dimension_reference=TimeDimensionReference(element_name="metric_time"), - time_granularity=TimeGranularity.WEEK, + time_granularity_name=TimeGranularity.WEEK.value, ), ), entity_call_parameter_sets=(), @@ -91,7 +91,7 @@ def test_extract_time_dimension_call_parameter_sets() -> None: # noqa: D EntityReference("listing"), EntityReference("user"), ), - time_granularity=TimeGranularity.MONTH, + time_granularity_name=TimeGranularity.MONTH.value, ), ) ) @@ -110,7 +110,7 @@ def test_extract_time_dimension_call_parameter_sets() -> None: # noqa: D EntityReference("listing"), EntityReference("user"), ), - time_granularity=TimeGranularity.MONTH, + time_granularity_name=TimeGranularity.MONTH.value, ), ) ) @@ -126,7 +126,7 @@ def test_extract_metric_time_dimension_call_parameter_sets() -> None: # noqa: D TimeDimensionCallParameterSet( time_dimension_reference=TimeDimensionReference(element_name="metric_time"), entity_path=(), - time_granularity=TimeGranularity.MONTH, + time_granularity_name=TimeGranularity.MONTH.value, ), ) ) @@ -216,7 +216,7 @@ def test_where_filter_interesection_extract_call_parameter_sets() -> None: TimeDimensionCallParameterSet( time_dimension_reference=TimeDimensionReference(element_name="metric_time"), entity_path=(), - time_granularity=TimeGranularity.MONTH, + time_granularity_name=TimeGranularity.MONTH.value, ), ) ) @@ -269,7 +269,7 @@ def test_time_dimension_without_granularity() -> None: # noqa: D TimeDimensionCallParameterSet( entity_path=(EntityReference("booking"),), time_dimension_reference=TimeDimensionReference(element_name="created_at"), - time_granularity=None, + time_granularity_name=None, ), ), entity_call_parameter_sets=(), diff --git a/tests/parsing/test_object_builder_item_description.py b/tests/parsing/test_object_builder_item_description.py index 1b4abba2..10ef925e 100644 --- a/tests/parsing/test_object_builder_item_description.py +++ b/tests/parsing/test_object_builder_item_description.py @@ -19,9 +19,11 @@ def test_valid_object_builder_items() -> None: # noqa: D valid_items = ( "Dimension('listing__created_at', entity_path=['host'])", "Dimension('listing__created_at', entity_path=['host']).grain('day').date_part('day')", + "Dimension('metric_time').grain('martian_year')", "TimeDimension('listing__created_at', time_granularity_name='day', entity_path=['host'], date_part_name='day')", "Entity('listing__created_at', entity_path=['host'])", "Metric('bookings', group_by=['listing__created_at'])", + "TimeDimension('metric_time', time_granularity_name='martian_year')", ) for valid_item in valid_items: logger.info(f"Checking {valid_item=}") @@ -32,9 +34,7 @@ def test_invalid_object_builder_items() -> None: # noqa: D text_processor = ObjectBuilderTextProcessor() invalid_items = ( - "Dimension('listing__created_at').grain('invalid')", "Dimension('listing__created_at').date_part('invalid')", - "TimeDimension('listing__created_at', 'invalid', 'day')", "TimeDimension('listing__created_at', 'day', date_part_name='invalid')", "TimeDimension('listing__created_at', 'day', date_part_name='month').grain('month')", "TimeDimension('listing__created_at', 'day', date_part_name='month').date_part('month')", diff --git a/tests/parsing/test_where_filter_parsing.py b/tests/parsing/test_where_filter_parsing.py index d9ab918c..60a37e0a 100644 --- a/tests/parsing/test_where_filter_parsing.py +++ b/tests/parsing/test_where_filter_parsing.py @@ -185,7 +185,7 @@ def test_dimension_date_part() -> None: # noqa TimeDimensionCallParameterSet( time_dimension_reference=TimeDimensionReference("metric_time"), entity_path=(), - time_granularity=TimeGranularity.WEEK, + time_granularity_name=TimeGranularity.WEEK.value, ), ), ( @@ -193,7 +193,7 @@ def test_dimension_date_part() -> None: # noqa TimeDimensionCallParameterSet( time_dimension_reference=TimeDimensionReference("metric_time"), entity_path=(), - time_granularity=TimeGranularity.WEEK, + time_granularity_name=TimeGranularity.WEEK.value, ), ), ], From c9abc2a37a8f011912ad3a98f434b25c460fc243 Mon Sep 17 00:00:00 2001 From: tlento Date: Fri, 13 Sep 2024 14:42:11 -0700 Subject: [PATCH 2/3] Improve docstring for ParameterSetFactory --- .../where_filter/parameter_set_factory.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py b/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py index b1fd25d6..919c5aa8 100644 --- a/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py +++ b/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py @@ -22,12 +22,18 @@ class ParameterSetFactory: """Creates parameter sets for use in the Jinja sandbox. - This is ONLY to be used for parsing where filters, and then only for the purposes of extracting - some basic information about which elements are being accessed in the filter expression in the - small number of contexts where a more complete semantic layer implementation is not available. - - In practice, today, this is used by the dbt core parser, which cannot take on a MetricFlow - dependency, in order to provide some DAG annotations around elements referenced in where filters. + This class does the following: + 1. Parses element references (e.g., "{{ Dimension('listing__is_lux') }}") out of where filter expressions + 2. Extracts reference attributes (e.g., grain for "{{ TimeDimension('metric_time', grain='martian_year') }}") + 3. Allows use of standard time granularities in name strings, (e.g., "{{ Dimension('metric_time__year') }}") + + This class does not do direct validation of any custom granularity values, nor does it allow for use of custom + granularities as parts of element reference names. So we can parse "{{ Dimension('shuttle__launch_time__year') }}" + into a valid TimeDimension object with yearly granularity, but we will not correctly parse something like + "{{Dimension('shuttle__launch_time__martian_year')}}" - this will return a dimension named `martian_year` with the + entity link path of ['shuttle', 'launch_time']. Since custom granularity names will not be allowed to be re-used + as dimension names this will fail to match anything defined in the semantic manifest, but the error management + experience will not be as clean and direct as it was for standard granularities. """ @staticmethod From a16495a36425a7514987a585e89e337dc946c25c Mon Sep 17 00:00:00 2001 From: tlento Date: Fri, 13 Sep 2024 14:42:37 -0700 Subject: [PATCH 3/3] Clean up granularity input matching logic --- .../parsing/where_filter/parameter_set_factory.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py b/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py index 919c5aa8..c59dc016 100644 --- a/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py +++ b/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py @@ -73,8 +73,13 @@ def create_time_dimension( grain_parsed_from_name = ( group_by_item_name.time_granularity.value if group_by_item_name.time_granularity else None ) - grain_from_param = time_granularity_name - if grain_parsed_from_name and grain_from_param and grain_from_param != grain_parsed_from_name: + inputs_are_mismatched = ( + grain_parsed_from_name is not None + and time_granularity_name is not None + and time_granularity_name != grain_parsed_from_name + ) + + if inputs_are_mismatched: raise ParseWhereFilterException( f"Received different grains in `time_dimension_name` parameter ('{time_dimension_name}') " f"and `time_granularity_name` parameter ('{time_granularity_name}'). Remove the grain suffix " @@ -82,7 +87,7 @@ def create_time_dimension( "parameter to specify the intendend grain." ) - time_granularity_name = grain_parsed_from_name or grain_from_param + time_granularity_name = grain_parsed_from_name or time_granularity_name return TimeDimensionCallParameterSet( time_dimension_reference=TimeDimensionReference(element_name=group_by_item_name.element_name),