diff --git a/.changes/unreleased/Features-20240908-221757.yaml b/.changes/unreleased/Features-20240908-221757.yaml new file mode 100644 index 00000000..b277a0c8 --- /dev/null +++ b/.changes/unreleased/Features-20240908-221757.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Allow filter parameters to accept custom granularities +time: 2024-09-08T22:17:57.865557-07:00 +custom: + Author: tlento + Issue: "345" diff --git a/dbt_semantic_interfaces/call_parameter_sets.py b/dbt_semantic_interfaces/call_parameter_sets.py index 0520ac46..a018e447 100644 --- a/dbt_semantic_interfaces/call_parameter_sets.py +++ b/dbt_semantic_interfaces/call_parameter_sets.py @@ -10,7 +10,6 @@ MetricReference, TimeDimensionReference, ) -from dbt_semantic_interfaces.type_enums import TimeGranularity from dbt_semantic_interfaces.type_enums.date_part import DatePart @@ -29,7 +28,7 @@ class TimeDimensionCallParameterSet: entity_path: Tuple[EntityReference, ...] time_dimension_reference: TimeDimensionReference - time_granularity: Optional[TimeGranularity] = None + time_granularity_name: Optional[str] = None date_part: Optional[DatePart] = None diff --git a/dbt_semantic_interfaces/naming/dundered.py b/dbt_semantic_interfaces/naming/dundered.py index 31ff2193..1095d3a3 100644 --- a/dbt_semantic_interfaces/naming/dundered.py +++ b/dbt_semantic_interfaces/naming/dundered.py @@ -20,7 +20,11 @@ class StructuredDunderedName: element_name: "ds" granularity: TimeGranularity.WEEK - The time granularity is part of legacy query syntax and there are plans to migrate away from this format. + The time granularity is part of legacy query syntax and there are plans to migrate away from this format. As such, + this will not be updated to allow for custom granularity values. This implies that any query paths that push named + parameters through this class will not support a custom grain reference of the form `metric_time__martian_year`, + and users wishing to use their martian year grain will have to explicitly reference it via a separate parameter + instead of gluing it onto the end of the name. """ entity_links: Tuple[EntityReference, ...] diff --git a/dbt_semantic_interfaces/parsing/text_input/ti_description.py b/dbt_semantic_interfaces/parsing/text_input/ti_description.py index 6477b4f1..62fff662 100644 --- a/dbt_semantic_interfaces/parsing/text_input/ti_description.py +++ b/dbt_semantic_interfaces/parsing/text_input/ti_description.py @@ -7,7 +7,6 @@ from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.errors import InvalidQuerySyntax from dbt_semantic_interfaces.naming.dundered import StructuredDunderedName -from dbt_semantic_interfaces.type_enums import TimeGranularity from dbt_semantic_interfaces.type_enums.date_part import DatePart @@ -65,21 +64,13 @@ def __post_init__(self) -> None: # noqa: D105 raise InvalidQuerySyntax("The entity path should not be specified for a metric.") if len(structured_item_name.entity_links) > 0: raise InvalidQuerySyntax("The name of the metric should not have entity links.") - # Check that dimensions / time dimensions have a valid time granularity / date part. + # Check that dimensions / time dimensions have a valid date part. elif item_type is QueryItemType.DIMENSION or item_type is QueryItemType.TIME_DIMENSION: - if self.time_granularity_name is not None: - valid_time_granularity_names = set(time_granularity.value for time_granularity in TimeGranularity) - if self.time_granularity_name.lower() not in valid_time_granularity_names: - raise InvalidQuerySyntax( - f"{self.time_granularity_name!r} is not a valid time granularity. Valid values are" - f" {valid_time_granularity_names}" - ) - if self.date_part_name is not None: valid_date_part_names = set(date_part.value for date_part in DatePart) if self.date_part_name.lower() not in set(date_part.value for date_part in DatePart): raise InvalidQuerySyntax( - f"{self.time_granularity_name!r} is not a valid time granularity. Valid values are" + f"{self.date_part_name!r} is not a valid date part. Valid values are" f" {valid_date_part_names}" ) diff --git a/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py b/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py index c1e84e60..c59dc016 100644 --- a/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py +++ b/dbt_semantic_interfaces/parsing/where_filter/parameter_set_factory.py @@ -16,12 +16,25 @@ MetricReference, TimeDimensionReference, ) -from dbt_semantic_interfaces.type_enums import TimeGranularity from dbt_semantic_interfaces.type_enums.date_part import DatePart class ParameterSetFactory: - """Creates parameter sets for use in the Jinja sandbox.""" + """Creates parameter sets for use in the Jinja sandbox. + + This class does the following: + 1. Parses element references (e.g., "{{ Dimension('listing__is_lux') }}") out of where filter expressions + 2. Extracts reference attributes (e.g., grain for "{{ TimeDimension('metric_time', grain='martian_year') }}") + 3. Allows use of standard time granularities in name strings, (e.g., "{{ Dimension('metric_time__year') }}") + + This class does not do direct validation of any custom granularity values, nor does it allow for use of custom + granularities as parts of element reference names. So we can parse "{{ Dimension('shuttle__launch_time__year') }}" + into a valid TimeDimension object with yearly granularity, but we will not correctly parse something like + "{{Dimension('shuttle__launch_time__martian_year')}}" - this will return a dimension named `martian_year` with the + entity link path of ['shuttle', 'launch_time']. Since custom granularity names will not be allowed to be re-used + as dimension names this will fail to match anything defined in the semantic manifest, but the error management + experience will not be as clean and direct as it was for standard granularities. + """ @staticmethod def _exception_message_for_incorrect_format(element_name: str) -> str: @@ -37,26 +50,51 @@ def create_time_dimension( entity_path: Sequence[str] = (), date_part_name: Optional[str] = None, ) -> TimeDimensionCallParameterSet: - """Gets called by Jinja when rendering {{ TimeDimension(...) }}.""" + """Gets called by Jinja when rendering {{ TimeDimension(...) }}. + + There is a lot of strangeness around the time granularity specification here. Historically, + we accepted time dimension names of the form `metric_time__week` or `customer__registration_date__month` + in this interface. We have not yet fully deprecated this, and it's unclear if we ever will. + + Key points to note: + 1. The time dimension name parsing only accepts standard time granularities. This will not change. + 2. The time granularity parameter is what we want everybody to use because it's more explicit. + 3. The time granularity parameter will support custom granularities, so that's nice + + While this all may seem pretty bad it's not as terrible as all that - this class is only used + for parsing where filters. When we solve the problems with our current where filter spec this will + persist as a backwards compatibility model, but nothing more. + """ group_by_item_name = DunderedNameFormatter.parse_name(time_dimension_name) if len(group_by_item_name.entity_links) != 1 and not is_metric_time_name(group_by_item_name.element_name): raise ParseWhereFilterException( ParameterSetFactory._exception_message_for_incorrect_format(time_dimension_name) ) - grain_parsed_from_name = group_by_item_name.time_granularity - grain_from_param = TimeGranularity(time_granularity_name) if time_granularity_name else None - if grain_parsed_from_name and grain_from_param and grain_from_param != grain_parsed_from_name: + grain_parsed_from_name = ( + group_by_item_name.time_granularity.value if group_by_item_name.time_granularity else None + ) + inputs_are_mismatched = ( + grain_parsed_from_name is not None + and time_granularity_name is not None + and time_granularity_name != grain_parsed_from_name + ) + + if inputs_are_mismatched: raise ParseWhereFilterException( f"Received different grains in `time_dimension_name` parameter ('{time_dimension_name}') " - f"and `time_granularity_name` parameter ('{time_granularity_name}')." + f"and `time_granularity_name` parameter ('{time_granularity_name}'). Remove the grain suffix " + f"(`{grain_parsed_from_name}`) from the time dimension name and use the `time_granularity_name` " + "parameter to specify the intendend grain." ) + time_granularity_name = grain_parsed_from_name or time_granularity_name + return TimeDimensionCallParameterSet( time_dimension_reference=TimeDimensionReference(element_name=group_by_item_name.element_name), entity_path=( tuple(EntityReference(element_name=arg) for arg in entity_path) + group_by_item_name.entity_links ), - time_granularity=grain_parsed_from_name or grain_from_param, + time_granularity_name=time_granularity_name.lower() if time_granularity_name else None, date_part=DatePart(date_part_name.lower()) if date_part_name else None, ) diff --git a/tests/implementations/where_filter/test_parse_calls.py b/tests/implementations/where_filter/test_parse_calls.py index 0fcad048..2a7f9e89 100644 --- a/tests/implementations/where_filter/test_parse_calls.py +++ b/tests/implementations/where_filter/test_parse_calls.py @@ -69,7 +69,7 @@ def test_extract_dimension_with_grain_call_parameter_sets() -> None: # noqa: D TimeDimensionCallParameterSet( entity_path=(), time_dimension_reference=TimeDimensionReference(element_name="metric_time"), - time_granularity=TimeGranularity.WEEK, + time_granularity_name=TimeGranularity.WEEK.value, ), ), entity_call_parameter_sets=(), @@ -91,7 +91,7 @@ def test_extract_time_dimension_call_parameter_sets() -> None: # noqa: D EntityReference("listing"), EntityReference("user"), ), - time_granularity=TimeGranularity.MONTH, + time_granularity_name=TimeGranularity.MONTH.value, ), ) ) @@ -110,7 +110,7 @@ def test_extract_time_dimension_call_parameter_sets() -> None: # noqa: D EntityReference("listing"), EntityReference("user"), ), - time_granularity=TimeGranularity.MONTH, + time_granularity_name=TimeGranularity.MONTH.value, ), ) ) @@ -126,7 +126,7 @@ def test_extract_metric_time_dimension_call_parameter_sets() -> None: # noqa: D TimeDimensionCallParameterSet( time_dimension_reference=TimeDimensionReference(element_name="metric_time"), entity_path=(), - time_granularity=TimeGranularity.MONTH, + time_granularity_name=TimeGranularity.MONTH.value, ), ) ) @@ -216,7 +216,7 @@ def test_where_filter_interesection_extract_call_parameter_sets() -> None: TimeDimensionCallParameterSet( time_dimension_reference=TimeDimensionReference(element_name="metric_time"), entity_path=(), - time_granularity=TimeGranularity.MONTH, + time_granularity_name=TimeGranularity.MONTH.value, ), ) ) @@ -269,7 +269,7 @@ def test_time_dimension_without_granularity() -> None: # noqa: D TimeDimensionCallParameterSet( entity_path=(EntityReference("booking"),), time_dimension_reference=TimeDimensionReference(element_name="created_at"), - time_granularity=None, + time_granularity_name=None, ), ), entity_call_parameter_sets=(), diff --git a/tests/parsing/test_object_builder_item_description.py b/tests/parsing/test_object_builder_item_description.py index 1b4abba2..10ef925e 100644 --- a/tests/parsing/test_object_builder_item_description.py +++ b/tests/parsing/test_object_builder_item_description.py @@ -19,9 +19,11 @@ def test_valid_object_builder_items() -> None: # noqa: D valid_items = ( "Dimension('listing__created_at', entity_path=['host'])", "Dimension('listing__created_at', entity_path=['host']).grain('day').date_part('day')", + "Dimension('metric_time').grain('martian_year')", "TimeDimension('listing__created_at', time_granularity_name='day', entity_path=['host'], date_part_name='day')", "Entity('listing__created_at', entity_path=['host'])", "Metric('bookings', group_by=['listing__created_at'])", + "TimeDimension('metric_time', time_granularity_name='martian_year')", ) for valid_item in valid_items: logger.info(f"Checking {valid_item=}") @@ -32,9 +34,7 @@ def test_invalid_object_builder_items() -> None: # noqa: D text_processor = ObjectBuilderTextProcessor() invalid_items = ( - "Dimension('listing__created_at').grain('invalid')", "Dimension('listing__created_at').date_part('invalid')", - "TimeDimension('listing__created_at', 'invalid', 'day')", "TimeDimension('listing__created_at', 'day', date_part_name='invalid')", "TimeDimension('listing__created_at', 'day', date_part_name='month').grain('month')", "TimeDimension('listing__created_at', 'day', date_part_name='month').date_part('month')", diff --git a/tests/parsing/test_where_filter_parsing.py b/tests/parsing/test_where_filter_parsing.py index d9ab918c..60a37e0a 100644 --- a/tests/parsing/test_where_filter_parsing.py +++ b/tests/parsing/test_where_filter_parsing.py @@ -185,7 +185,7 @@ def test_dimension_date_part() -> None: # noqa TimeDimensionCallParameterSet( time_dimension_reference=TimeDimensionReference("metric_time"), entity_path=(), - time_granularity=TimeGranularity.WEEK, + time_granularity_name=TimeGranularity.WEEK.value, ), ), ( @@ -193,7 +193,7 @@ def test_dimension_date_part() -> None: # noqa TimeDimensionCallParameterSet( time_dimension_reference=TimeDimensionReference("metric_time"), entity_path=(), - time_granularity=TimeGranularity.WEEK, + time_granularity_name=TimeGranularity.WEEK.value, ), ), ],