Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow specification of filters as either lists or strings #171

Merged
merged 6 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20231009-210737.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Allow metric filters and saved query where properties to accept lists of filter
expressions
time: 2023-10-09T21:07:37.978465-07:00
custom:
Author: tlento
Issue: "147"
103 changes: 98 additions & 5 deletions dbt_semantic_interfaces/implementations/filters/where_filter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from __future__ import annotations

from dbt_semantic_interfaces.call_parameter_sets import FilterCallParameterSets
from typing import Callable, Generator, List, Tuple

from typing_extensions import Self

from dbt_semantic_interfaces.call_parameter_sets import (
FilterCallParameterSets,
ParseWhereFilterException,
)
from dbt_semantic_interfaces.implementations.base import (
HashableBaseModel,
PydanticCustomInputParser,
Expand All @@ -9,17 +16,21 @@
from dbt_semantic_interfaces.parsing.where_filter.where_filter_parser import (
WhereFilterParser,
)
from dbt_semantic_interfaces.pretty_print import pformat_big_objects


class PydanticWhereFilter(PydanticCustomInputParser, HashableBaseModel):
"""A filter applied to the data set containing measures, dimensions, identifiers relevant to the query.
"""Pydantic implementation of a WhereFilter.

TODO: Clarify whether the filter applies to aggregated or un-aggregated data sets.
This specifies a templated SQl where expression, with templates allowing for extraction of dimensions and
entities (and, eventually, measures and metrics) to include in the filter itself. This filter will then
be applied to an input data set, either from an original input source or an intermediate subquery output.

The data set will contain dimensions as required by the query and the dimensions that a referenced in any of the
filters that are used in the definition of metrics.
The data set will contain entities and dimensions as referenced in the query along with the entities and dimensions
that are referenced in any of these filters, whether they are part of the query request or metric definition.
"""

# The where_sql_template field is used in PydanticWhereFilterIntersection.convert_legacy_input. Remove with caution.
where_sql_template: str

@classmethod
Expand All @@ -40,3 +51,85 @@ def _from_yaml_value(
@property
def call_parameter_sets(self) -> FilterCallParameterSets: # noqa: D
return WhereFilterParser.parse_call_parameter_sets(self.where_sql_template)


class PydanticWhereFilterIntersection(HashableBaseModel):
"""Pydantic implementation of a WhereFilterIntersection."""

# This class can not have a property named `where_sql_template` without a parsing logic update
__WHERE_SQL_TEMPLATE_FIELD__ = "where_sql_template"
__WHERE_FILTERS_FIELD__ = "where_filters"

where_filters: List[PydanticWhereFilter]

@classmethod
def __get_validators__(cls) -> Generator[Callable[[PydanticParseableValueType], Self], None, None]:
"""Pydantic magic method for allowing handling of arbitrary input on parse_obj invocation.

This class requires more subtle handling of input deserialized object types (dicts), and so it cannot
extend the common interface via _from_yaml_values.
"""
yield cls._convert_legacy_and_yaml_input

@classmethod
def _convert_legacy_and_yaml_input(cls, input: PydanticParseableValueType) -> Self:
"""Specifies raw input conversion rules to ensure serialized semantic manifests will parse correctly.

The original spec for where filters relied on a raw WhereFilter object, but this has now been updated to
expect an object containing a collection of WhereFilters.

The inputs for the original PydanticWhereFilter could have been either a bare string, a PydanticWhereFilter,
or a partially deserialized json object (i.e., dict) representation of the PydanticWhereFilter.

Consequently, we must support a variety of inputs and coerce them into the appropriate form, which is in general
a List[valid_where_filter_input] with valid_where_filter_input being one of the types described above. Here
are the operations:

Sequence transforms:
1. str -> {"where_filters": [input]}
2. PydanticWhereFilter -> {"where_filters": [input]}
3. {"where_sql_template": str} -> {"where_filters": [input]}

Object initializations (inputs requiring standard initialization, validated via the next pydantic operation):
1. List -> PydanticWhereFilterIntersection(where_filters=input)
2. other dicts -> PydanticWhereFilterIntersection(**input)

Identity transforms (no-ops, as these represent PydanticWhereFilterIntersection objects):
1. PydanticWhereFilterIntersection
"""
has_legacy_keys = isinstance(input, dict) and cls.__WHERE_SQL_TEMPLATE_FIELD__ in input.keys()
is_legacy_where_filter = isinstance(input, str) or isinstance(input, PydanticWhereFilter) or has_legacy_keys

if is_legacy_where_filter:
return cls(where_filters=[input])
elif isinstance(input, list):
return cls(where_filters=input)
elif isinstance(input, dict):
return cls(**input)
elif isinstance(input, cls):
return input
else:
raise ValueError(
f"Expected input to be of type string, list, PydanticWhereFilter, PydanticWhereFilterIntersection, "
f"or dict but got {type(input)} with value {input}"
)

@property
def filter_expression_parameter_sets(self) -> List[Tuple[str, FilterCallParameterSets]]:
"""Gets the call parameter sets for each filter expression."""
filter_parameter_sets: List[Tuple[str, FilterCallParameterSets]] = []
invalid_filter_expressions: List[Tuple[str, Exception]] = []
for where_filter in self.where_filters:
try:
filter_parameter_sets.append((where_filter.where_sql_template, where_filter.call_parameter_sets))
except Exception as e:
invalid_filter_expressions.append((where_filter.where_sql_template, e))

if invalid_filter_expressions:
raise ParseWhereFilterException(
f"Encountered one or more errors when parsing the set of filter expressions "
f"{pformat_big_objects(self.where_filters)}! Invalid expressions: \n "
f"{pformat_big_objects(invalid_filter_expressions)}"
)

return filter_parameter_sets
8 changes: 4 additions & 4 deletions dbt_semantic_interfaces/implementations/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
PydanticParseableValueType,
)
from dbt_semantic_interfaces.implementations.filters.where_filter import (
PydanticWhereFilter,
PydanticWhereFilterIntersection,
)
from dbt_semantic_interfaces.implementations.metadata import PydanticMetadata
from dbt_semantic_interfaces.references import MeasureReference, MetricReference
Expand All @@ -28,7 +28,7 @@ class PydanticMetricInputMeasure(PydanticCustomInputParser, HashableBaseModel):
"""

name: str
filter: Optional[PydanticWhereFilter]
filter: Optional[PydanticWhereFilterIntersection]
alias: Optional[str]
join_to_timespine: bool = False
fill_nulls_with: Optional[int] = None
Expand Down Expand Up @@ -118,7 +118,7 @@ class PydanticMetricInput(HashableBaseModel):
"""Provides a pointer to a metric along with the additional properties used on that metric."""

name: str
filter: Optional[PydanticWhereFilter]
filter: Optional[PydanticWhereFilterIntersection]
alias: Optional[str]
offset_window: Optional[PydanticMetricTimeWindow]
offset_to_grain: Optional[TimeGranularity]
Expand Down Expand Up @@ -155,7 +155,7 @@ class PydanticMetric(HashableBaseModel, ModelWithMetadataParsing):
description: Optional[str]
type: MetricType
type_params: PydanticMetricTypeParams
filter: Optional[PydanticWhereFilter]
filter: Optional[PydanticWhereFilterIntersection]
metadata: Optional[PydanticMetadata]
label: Optional[str] = None

Expand Down
4 changes: 2 additions & 2 deletions dbt_semantic_interfaces/implementations/saved_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
ModelWithMetadataParsing,
)
from dbt_semantic_interfaces.implementations.filters.where_filter import (
PydanticWhereFilter,
PydanticWhereFilterIntersection,
)
from dbt_semantic_interfaces.implementations.metadata import PydanticMetadata
from dbt_semantic_interfaces.protocols import ProtocolHint
Expand All @@ -26,7 +26,7 @@ def _implements_protocol(self) -> SavedQuery:
name: str
metrics: List[str]
group_bys: List[str] = []
where: List[PydanticWhereFilter] = []
where: Optional[PydanticWhereFilterIntersection] = None

description: Optional[str] = None
metadata: Optional[PydanticMetadata] = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,20 @@
],
"type": "object"
},
"filter_schema": {
"$id": "filter_schema",
"oneOf": [
{
"type": "string"
},
{
"items": {
"type": "string"
},
"type": "array"
}
]
},
"is-time-dimension": {
"properties": {
"type": {
Expand Down Expand Up @@ -234,7 +248,7 @@
"type": "integer"
},
"filter": {
"type": "string"
"$ref": "#/definitions/filter_schema"
},
"join_to_timespine": {
"type": "boolean"
Expand All @@ -255,7 +269,7 @@
"type": "string"
},
"filter": {
"type": "string"
"$ref": "#/definitions/filter_schema"
},
"name": {
"type": "string"
Expand All @@ -277,7 +291,7 @@
"type": "string"
},
"filter": {
"type": "string"
"$ref": "#/definitions/filter_schema"
},
"label": {
"type": "string"
Expand Down Expand Up @@ -435,10 +449,7 @@
"type": "string"
},
"where": {
"items": {
"type": "string"
},
"type": "array"
"$ref": "#/definitions/filter_schema"
}
},
"required": [
Expand Down
23 changes: 16 additions & 7 deletions dbt_semantic_interfaces/parsing/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@

time_dimension_type_values = ["TIME", "time"]

filter_schema = {
"$id": "filter_schema",
"oneOf": [
{"type": "string"},
{
"type": "array",
"items": {"type": "string"},
},
],
}

metric_input_measure_schema = {
"$id": "metric_input_measure_schema",
"oneOf": [
Expand All @@ -47,7 +58,7 @@
"type": "object",
"properties": {
"name": {"type": "string"},
"filter": {"type": "string"},
"filter": {"$ref": "filter_schema"},
"alias": {"type": "string"},
"join_to_timespine": {"type": "boolean"},
"fill_nulls_with": {"type": "integer"},
Expand All @@ -62,7 +73,7 @@
"type": "object",
"properties": {
"name": {"type": "string"},
"filter": {"type": "string"},
"filter": {"$ref": "filter_schema"},
"alias": {"type": "string"},
"offset_window": {"type": "string"},
"offset_to_grain": {"type": "string"},
Expand Down Expand Up @@ -218,7 +229,7 @@
},
"type": {"enum": metric_types_enum_values},
"type_params": {"$ref": "metric_type_params"},
"filter": {"type": "string"},
"filter": {"$ref": "filter_schema"},
"description": {"type": "string"},
"label": {"type": "string"},
},
Expand Down Expand Up @@ -292,10 +303,7 @@
"type": "array",
"items": {"type": "string"},
},
"where": {
"type": "array",
"items": {"type": "string"},
},
"where": {"$ref": "filter_schema"},
"label": {"type": "string"},
},
"required": ["name", "metrics"],
Expand Down Expand Up @@ -333,6 +341,7 @@
project_configuration_schema["$id"]: project_configuration_schema,
saved_query_schema["$id"]: saved_query_schema,
# Sub-object schemas
filter_schema["$id"]: filter_schema,
metric_input_measure_schema["$id"]: metric_input_measure_schema,
metric_type_params_schema["$id"]: metric_type_params_schema,
entity_schema["$id"]: entity_schema,
Expand Down
5 changes: 4 additions & 1 deletion dbt_semantic_interfaces/protocols/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,7 @@
SemanticModelDefaults,
SemanticModelT,
)
from dbt_semantic_interfaces.protocols.where_filter import WhereFilter # noqa:F401
from dbt_semantic_interfaces.protocols.where_filter import ( # noqa:F401
WhereFilter,
WhereFilterIntersection,
)
11 changes: 7 additions & 4 deletions dbt_semantic_interfaces/protocols/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Optional, Protocol, Sequence

from dbt_semantic_interfaces.protocols.metadata import Metadata
from dbt_semantic_interfaces.protocols.where_filter import WhereFilter
from dbt_semantic_interfaces.protocols.where_filter import WhereFilterIntersection
from dbt_semantic_interfaces.references import MeasureReference, MetricReference
from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity

Expand All @@ -23,7 +23,8 @@ def name(self) -> str: # noqa: D

@property
@abstractmethod
def filter(self) -> Optional[WhereFilter]: # noqa: D
def filter(self) -> Optional[WhereFilterIntersection]:
"""Return the set of filters to apply prior to aggregating this input measure."""
pass

@property
Expand Down Expand Up @@ -80,7 +81,8 @@ def name(self) -> str: # noqa: D

@property
@abstractmethod
def filter(self) -> Optional[WhereFilter]: # noqa: D
def filter(self) -> Optional[WhereFilterIntersection]:
"""Return the set of filters to apply prior to calculating this input metric."""
pass

@property
Expand Down Expand Up @@ -181,7 +183,8 @@ def type_params(self) -> MetricTypeParams: # noqa: D

@property
@abstractmethod
def filter(self) -> Optional[WhereFilter]: # noqa: D
def filter(self) -> Optional[WhereFilterIntersection]:
"""Return the set of filters to apply prior to calculating this metric."""
pass

@property
Expand Down
5 changes: 3 additions & 2 deletions dbt_semantic_interfaces/protocols/saved_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Optional, Protocol, Sequence

from dbt_semantic_interfaces.protocols.metadata import Metadata
from dbt_semantic_interfaces.protocols.where_filter import WhereFilter
from dbt_semantic_interfaces.protocols.where_filter import WhereFilterIntersection


class SavedQuery(Protocol):
Expand Down Expand Up @@ -35,7 +35,8 @@ def group_bys(self) -> Sequence[str]: # noqa: D

@property
@abstractmethod
def where(self) -> Sequence[WhereFilter]: # noqa: D
def where(self) -> Optional[WhereFilterIntersection]:
"""Returns the intersection class containing any where filters specified in the saved query."""
pass

@property
Expand Down
Loading
Loading