Skip to content

Commit

Permalink
Add validation for order-by in saved queries.
Browse files Browse the repository at this point in the history
  • Loading branch information
plypaul committed Aug 5, 2024
1 parent 17aa359 commit 6d7c27a
Show file tree
Hide file tree
Showing 5 changed files with 258 additions and 6 deletions.
12 changes: 12 additions & 0 deletions dbt_semantic_interfaces/parsing/text_input/ti_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,18 @@ def create_modified(
descending=descending or self.descending,
)

def with_descending_unset(self) -> QueryItemDescription:
"""Return this with the `descending` field set to None."""
return QueryItemDescription(
item_type=self.item_type,
item_name=self.item_name,
entity_path=self.entity_path,
time_granularity_name=self.time_granularity_name,
date_part_name=self.date_part_name,
group_by_for_metric_item=self.group_by_for_metric_item,
descending=None,
)


class QueryItemType(Enum):
"""Enumerates the types of items that a used to group items in a filter or a query.
Expand Down
137 changes: 135 additions & 2 deletions dbt_semantic_interfaces/validations/saved_query.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
from __future__ import annotations

import logging
import traceback
from typing import Generic, List, Sequence, Set
from dataclasses import dataclass
from typing import Generic, List, Optional, Sequence, Set

from dbt_semantic_interfaces.call_parameter_sets import FilterCallParameterSets
from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME
from dbt_semantic_interfaces.parsing.text_input.ti_description import (
QueryItemDescription,
QueryItemType,
)
from dbt_semantic_interfaces.parsing.text_input.ti_processor import (
QueryItemTextProcessor,
)
from dbt_semantic_interfaces.parsing.text_input.valid_method import (
ConfiguredValidMethodMapping,
ValidMethodMapping,
)
from dbt_semantic_interfaces.parsing.where_filter.where_filter_parser import (
WhereFilterParser,
)
Expand Down Expand Up @@ -125,6 +139,113 @@ def _check_where(saved_query: SavedQuery) -> Sequence[ValidationIssue]:

return issues

@staticmethod
def _parse_query_item(
saved_query: SavedQuery,
text_processor: QueryItemTextProcessor,
query_item_input: str,
element_type: SavedQueryElementType,
valid_method_mapping: ValidMethodMapping,
) -> _ParseQueryItemResult:
try:
item_description = text_processor.get_description(query_item_input, valid_method_mapping)
return _ParseQueryItemResult(item_description=item_description, validation_issue=None)
except Exception as e:
return _ParseQueryItemResult(
item_description=None,
validation_issue=generate_exception_issue(
what_was_being_done=(
f"parsing a field in {saved_query.name!r}."
f" Note that metrics need to be specified using the object-builder syntax"
f" (`Metric('metric_name')`) and if `.descending(...)` is specified, it should be at the"
f" end."
),
e=e,
context=SavedQueryContext(
file_context=FileContext.from_metadata(metadata=saved_query.metadata),
element_type=element_type,
element_value=query_item_input,
),
extras={
"traceback": "".join(traceback.format_tb(e.__traceback__)),
},
),
)

@staticmethod
@validate_safely("Validate the order-by field in a saved query.")
def _check_order_by(saved_query: SavedQuery) -> Sequence[ValidationIssue]:
"""Check that the order-by items in a saved query are valid.
The order-by item without the `.descending()` should match with one of the metric items or group-by items.
"""
validation_issues: List[ValidationIssue] = []
if len(saved_query.query_params.order_by) == 0:
return validation_issues

valid_query_item_descriptions = set()
text_processor = QueryItemTextProcessor()
for metric in saved_query.query_params.metrics:
# In an order-by, a metric is specified as "Metric('bookings')" while in the metrics section, it's only the
# metric name.
result = SavedQueryRule._parse_query_item(
saved_query=saved_query,
text_processor=text_processor,
query_item_input=f"{QueryItemType.METRIC.value}('{metric}')",
element_type=SavedQueryElementType.METRIC,
valid_method_mapping=ConfiguredValidMethodMapping.DEFAULT_MAPPING,
)
if result.item_description is not None:
valid_query_item_descriptions.add(result.item_description)
if result.validation_issue is not None:
validation_issues.append(result.validation_issue)

for group_by in saved_query.query_params.group_by:
result = SavedQueryRule._parse_query_item(
saved_query=saved_query,
text_processor=text_processor,
query_item_input=group_by,
element_type=SavedQueryElementType.GROUP_BY,
valid_method_mapping=ConfiguredValidMethodMapping.DEFAULT_MAPPING,
)
if result.item_description is not None:
valid_query_item_descriptions.add(result.item_description)
if result.validation_issue is not None:
validation_issues.append(result.validation_issue)

# If there are issues with the metrics or group-by items, checking the order-by may lead to erroneous issues.
if len(validation_issues) > 0:
return validation_issues

for order_by in saved_query.query_params.order_by:
result = SavedQueryRule._parse_query_item(
saved_query=saved_query,
text_processor=text_processor,
query_item_input=order_by,
element_type=SavedQueryElementType.GROUP_BY,
valid_method_mapping=ConfiguredValidMethodMapping.DEFAULT_MAPPING_FOR_ORDER_BY,
)
if result.validation_issue is not None:
validation_issues.append(result.validation_issue)
continue
item_description = result.item_description
assert item_description is not None, "This should have been ensured by the result class."

# The value of `descending` should be unset as only an order-by item would have it set.
if item_description.with_descending_unset() not in valid_query_item_descriptions:
validation_issues.append(
ValidationError(
message=f"{order_by} does not match any of the listed metrics or group-by items. ",
context=SavedQueryContext(
file_context=FileContext.from_metadata(metadata=saved_query.metadata),
element_type=SavedQueryElementType.ORDER_BY,
element_value=order_by,
),
)
)

return validation_issues

@staticmethod
@validate_safely("Validate all saved queries in a semantic manifest.")
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D
Expand All @@ -146,5 +267,17 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
valid_group_by_element_names=valid_group_by_element_names,
saved_query=saved_query,
)
issues += SavedQueryRule._check_where(saved_query=saved_query)
issues += SavedQueryRule._check_where(saved_query)
issues += SavedQueryRule._check_order_by(saved_query)
return issues


@dataclass(frozen=True)
class _ParseQueryItemResult:
"""Result of parsing a string like `Dimension('listing__country')`."""

item_description: Optional[QueryItemDescription]
validation_issue: Optional[ValidationIssue]

def __post_init__(self) -> None:
assert (self.item_description is not None) ^ (self.validation_issue is not None)
2 changes: 2 additions & 0 deletions dbt_semantic_interfaces/validations/validator_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ class SavedQueryElementType(Enum):
METRIC = "metric"
GROUP_BY = "group by"
WHERE = "where"
ORDER_BY = "order by"
LIMIT = "limit"


class SavedQueryContext(BaseModel):
Expand Down
102 changes: 102 additions & 0 deletions tests/validations/test_saved_query_order_by.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import copy

from dbt_semantic_interfaces.implementations.saved_query import (
PydanticSavedQuery,
PydanticSavedQueryQueryParams,
)
from dbt_semantic_interfaces.implementations.semantic_manifest import (
PydanticSemanticManifest,
)
from dbt_semantic_interfaces.validations.saved_query import SavedQueryRule
from dbt_semantic_interfaces.validations.semantic_manifest_validator import (
SemanticManifestValidator,
)
from tests.validations.test_saved_query import check_only_one_error_with_message


def test_invalid_order_by_descending_arg( # noqa: D
simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
) -> None:
manifest = copy.deepcopy(simple_semantic_manifest__with_primary_transforms)
manifest.saved_queries = [
PydanticSavedQuery(
name="Example Saved Query",
description="Example description.",
query_params=PydanticSavedQueryQueryParams(
metrics=["listings"],
group_by=["TimeDimension('metric_time')"],
order_by=["TimeDimension('metric_time').descending(foo)"],
),
),
]

manifest_validator = SemanticManifestValidator[PydanticSemanticManifest]([SavedQueryRule()])
check_only_one_error_with_message(
manifest_validator.validate_semantic_manifest(manifest),
"An error occurred while parsing a field",
)


def test_invalid_order_by_due_to_mismatch( # noqa: D
simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
) -> None:
manifest = copy.deepcopy(simple_semantic_manifest__with_primary_transforms)
manifest.saved_queries = [
PydanticSavedQuery(
name="Example Saved Query",
description="Example description.",
query_params=PydanticSavedQueryQueryParams(
metrics=["listings"],
group_by=["TimeDimension('metric_time')"],
order_by=["Dimension('listing__country')"],
),
),
]

manifest_validator = SemanticManifestValidator[PydanticSemanticManifest]([SavedQueryRule()])
check_only_one_error_with_message(
manifest_validator.validate_semantic_manifest(manifest),
"does not match any of the listed metrics or group-by items.",
)


def test_order_by_matches_metric( # noqa: D
simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
) -> None:
manifest = copy.deepcopy(simple_semantic_manifest__with_primary_transforms)
manifest.saved_queries = [
PydanticSavedQuery(
name="Example Saved Query",
description="Example description.",
query_params=PydanticSavedQueryQueryParams(
metrics=["listings"],
group_by=["TimeDimension('metric_time')"],
order_by=["Metric('listings').descending(True)"],
),
),
]

manifest_validator = SemanticManifestValidator[PydanticSemanticManifest]([SavedQueryRule()])
results = manifest_validator.validate_semantic_manifest(manifest)
assert results.all_issues == ()


def test_invalid_order_by_due_to_bare_metric( # noqa: D
simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
) -> None:
manifest = copy.deepcopy(simple_semantic_manifest__with_primary_transforms)
manifest.saved_queries = [
PydanticSavedQuery(
name="Example Saved Query",
description="Example description.",
query_params=PydanticSavedQueryQueryParams(
metrics=["listings"], group_by=["TimeDimension('metric_time')"], order_by=["listings"]
),
),
]

manifest_validator = SemanticManifestValidator[PydanticSemanticManifest]([SavedQueryRule()])
check_only_one_error_with_message(
manifest_validator.validate_semantic_manifest(manifest),
"An error occurred while parsing a field",
)
11 changes: 7 additions & 4 deletions tests/validations/test_semantic_manifest_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@
SemanticManifestValidator,
)

# Note: Using `assert results.errors == ()` instead of `assert not results.has_blocking_issues` as the diff shows up
# better in pytest output.


def test_semantic_manifest_validator_default_success( # noqa:D
simple_semantic_manifest: PydanticSemanticManifest,
) -> None:
semantic_manifest = deepcopy(simple_semantic_manifest)
validator = SemanticManifestValidator[PydanticSemanticManifest]()
results = validator.validate_semantic_manifest(semantic_manifest)
assert not results.has_blocking_issues
assert results.errors == ()


def test_semantic_manifest_validator_default_failure( # noqa:D
Expand All @@ -26,7 +29,7 @@ def test_semantic_manifest_validator_default_failure( # noqa:D

validator = SemanticManifestValidator[PydanticSemanticManifest]()
results = validator.validate_semantic_manifest(semantic_manifest)
assert results.has_blocking_issues
assert results.errors == ()


def test_multi_process_validator_results_same_as_sync( # noqa:D
Expand All @@ -49,6 +52,6 @@ def test_multi_process_validator_results_same_as_sync( # noqa:D
multi_process_results = validator.validate_semantic_manifest(
semantic_manifest=semantic_manifest, multi_process=True
)
assert default_results.has_blocking_issues
assert multi_process_results.has_blocking_issues
assert default_results.errors == ()
assert multi_process_results.errors == ()
assert default_results.all_issues == multi_process_results.all_issues

0 comments on commit 6d7c27a

Please sign in to comment.