Add validation for order-by in saved queries.

dbt-labs · Aug 5, 2024 · 6d7c27a · 6d7c27a
1 parent 17aa359
commit 6d7c27a
Show file tree

Hide file tree

Showing 5 changed files with 258 additions and 6 deletions.
diff --git a/dbt_semantic_interfaces/parsing/text_input/ti_description.py b/dbt_semantic_interfaces/parsing/text_input/ti_description.py
@@ -66,6 +66,18 @@ def create_modified(
             descending=descending or self.descending,
         )
 
+    def with_descending_unset(self) -> QueryItemDescription:
+        """Return this with the `descending` field set to None."""
+        return QueryItemDescription(
+            item_type=self.item_type,
+            item_name=self.item_name,
+            entity_path=self.entity_path,
+            time_granularity_name=self.time_granularity_name,
+            date_part_name=self.date_part_name,
+            group_by_for_metric_item=self.group_by_for_metric_item,
+            descending=None,
+        )
+
 
 class QueryItemType(Enum):
     """Enumerates the types of items that a used to group items in a filter or a query.

diff --git a/dbt_semantic_interfaces/validations/saved_query.py b/dbt_semantic_interfaces/validations/saved_query.py
@@ -1,9 +1,23 @@
+from __future__ import annotations
+
 import logging
 import traceback
-from typing import Generic, List, Sequence, Set
+from dataclasses import dataclass
+from typing import Generic, List, Optional, Sequence, Set
 
 from dbt_semantic_interfaces.call_parameter_sets import FilterCallParameterSets
 from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME
+from dbt_semantic_interfaces.parsing.text_input.ti_description import (
+    QueryItemDescription,
+    QueryItemType,
+)
+from dbt_semantic_interfaces.parsing.text_input.ti_processor import (
+    QueryItemTextProcessor,
+)
+from dbt_semantic_interfaces.parsing.text_input.valid_method import (
+    ConfiguredValidMethodMapping,
+    ValidMethodMapping,
+)
 from dbt_semantic_interfaces.parsing.where_filter.where_filter_parser import (
     WhereFilterParser,
 )
@@ -125,6 +139,113 @@ def _check_where(saved_query: SavedQuery) -> Sequence[ValidationIssue]:
 
         return issues
 
+    @staticmethod
+    def _parse_query_item(
+        saved_query: SavedQuery,
+        text_processor: QueryItemTextProcessor,
+        query_item_input: str,
+        element_type: SavedQueryElementType,
+        valid_method_mapping: ValidMethodMapping,
+    ) -> _ParseQueryItemResult:
+        try:
+            item_description = text_processor.get_description(query_item_input, valid_method_mapping)
+            return _ParseQueryItemResult(item_description=item_description, validation_issue=None)
+        except Exception as e:
+            return _ParseQueryItemResult(
+                item_description=None,
+                validation_issue=generate_exception_issue(
+                    what_was_being_done=(
+                        f"parsing a field in {saved_query.name!r}."
+                        f" Note that metrics need to be specified using the object-builder syntax"
+                        f" (`Metric('metric_name')`) and if `.descending(...)` is specified, it should be at the"
+                        f" end."
+                    ),
+                    e=e,
+                    context=SavedQueryContext(
+                        file_context=FileContext.from_metadata(metadata=saved_query.metadata),
+                        element_type=element_type,
+                        element_value=query_item_input,
+                    ),
+                    extras={
+                        "traceback": "".join(traceback.format_tb(e.__traceback__)),
+                    },
+                ),
+            )
+
+    @staticmethod
+    @validate_safely("Validate the order-by field in a saved query.")
+    def _check_order_by(saved_query: SavedQuery) -> Sequence[ValidationIssue]:
+        """Check that the order-by items in a saved query are valid.
+
+        The order-by item without the `.descending()` should match with one of the metric items or group-by items.
+        """
+        validation_issues: List[ValidationIssue] = []
+        if len(saved_query.query_params.order_by) == 0:
+            return validation_issues
+
+        valid_query_item_descriptions = set()
+        text_processor = QueryItemTextProcessor()
+        for metric in saved_query.query_params.metrics:
+            # In an order-by, a metric is specified as "Metric('bookings')" while in the metrics section, it's only the
+            # metric name.
+            result = SavedQueryRule._parse_query_item(
+                saved_query=saved_query,
+                text_processor=text_processor,
+                query_item_input=f"{QueryItemType.METRIC.value}('{metric}')",
+                element_type=SavedQueryElementType.METRIC,
+                valid_method_mapping=ConfiguredValidMethodMapping.DEFAULT_MAPPING,
+            )
+            if result.item_description is not None:
+                valid_query_item_descriptions.add(result.item_description)
+            if result.validation_issue is not None:
+                validation_issues.append(result.validation_issue)
+
+        for group_by in saved_query.query_params.group_by:
+            result = SavedQueryRule._parse_query_item(
+                saved_query=saved_query,
+                text_processor=text_processor,
+                query_item_input=group_by,
+                element_type=SavedQueryElementType.GROUP_BY,
+                valid_method_mapping=ConfiguredValidMethodMapping.DEFAULT_MAPPING,
+            )
+            if result.item_description is not None:
+                valid_query_item_descriptions.add(result.item_description)
+            if result.validation_issue is not None:
+                validation_issues.append(result.validation_issue)
+
+        # If there are issues with the metrics or group-by items, checking the order-by may lead to erroneous issues.
+        if len(validation_issues) > 0:
+            return validation_issues
+
+        for order_by in saved_query.query_params.order_by:
+            result = SavedQueryRule._parse_query_item(
+                saved_query=saved_query,
+                text_processor=text_processor,
+                query_item_input=order_by,
+                element_type=SavedQueryElementType.GROUP_BY,
+                valid_method_mapping=ConfiguredValidMethodMapping.DEFAULT_MAPPING_FOR_ORDER_BY,
+            )
+            if result.validation_issue is not None:
+                validation_issues.append(result.validation_issue)
+                continue
+            item_description = result.item_description
+            assert item_description is not None, "This should have been ensured by the result class."
+
+            # The value of `descending` should be unset as only an order-by item would have it set.
+            if item_description.with_descending_unset() not in valid_query_item_descriptions:
+                validation_issues.append(
+                    ValidationError(
+                        message=f"{order_by} does not match any of the listed metrics or group-by items. ",
+                        context=SavedQueryContext(
+                            file_context=FileContext.from_metadata(metadata=saved_query.metadata),
+                            element_type=SavedQueryElementType.ORDER_BY,
+                            element_value=order_by,
+                        ),
+                    )
+                )
+
+        return validation_issues
+
     @staticmethod
     @validate_safely("Validate all saved queries in a semantic manifest.")
     def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]:  # noqa: D
@@ -146,5 +267,17 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
                 valid_group_by_element_names=valid_group_by_element_names,
                 saved_query=saved_query,
             )
-            issues += SavedQueryRule._check_where(saved_query=saved_query)
+            issues += SavedQueryRule._check_where(saved_query)
+            issues += SavedQueryRule._check_order_by(saved_query)
         return issues
+
+
+@dataclass(frozen=True)
+class _ParseQueryItemResult:
+    """Result of parsing a string like `Dimension('listing__country')`."""
+
+    item_description: Optional[QueryItemDescription]
+    validation_issue: Optional[ValidationIssue]
+
+    def __post_init__(self) -> None:
+        assert (self.item_description is not None) ^ (self.validation_issue is not None)
diff --git a/dbt_semantic_interfaces/validations/validator_helpers.py b/dbt_semantic_interfaces/validations/validator_helpers.py
@@ -153,6 +153,8 @@ class SavedQueryElementType(Enum):
     METRIC = "metric"
     GROUP_BY = "group by"
     WHERE = "where"
+    ORDER_BY = "order by"
+    LIMIT = "limit"
 
 
 class SavedQueryContext(BaseModel):

diff --git a/tests/validations/test_saved_query_order_by.py b/tests/validations/test_saved_query_order_by.py
@@ -0,0 +1,102 @@
+import copy
+
+from dbt_semantic_interfaces.implementations.saved_query import (
+    PydanticSavedQuery,
+    PydanticSavedQueryQueryParams,
+)
+from dbt_semantic_interfaces.implementations.semantic_manifest import (
+    PydanticSemanticManifest,
+)
+from dbt_semantic_interfaces.validations.saved_query import SavedQueryRule
+from dbt_semantic_interfaces.validations.semantic_manifest_validator import (
+    SemanticManifestValidator,
+)
+from tests.validations.test_saved_query import check_only_one_error_with_message
+
+
+def test_invalid_order_by_descending_arg(  # noqa: D
+    simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
+) -> None:
+    manifest = copy.deepcopy(simple_semantic_manifest__with_primary_transforms)
+    manifest.saved_queries = [
+        PydanticSavedQuery(
+            name="Example Saved Query",
+            description="Example description.",
+            query_params=PydanticSavedQueryQueryParams(
+                metrics=["listings"],
+                group_by=["TimeDimension('metric_time')"],
+                order_by=["TimeDimension('metric_time').descending(foo)"],
+            ),
+        ),
+    ]
+
+    manifest_validator = SemanticManifestValidator[PydanticSemanticManifest]([SavedQueryRule()])
+    check_only_one_error_with_message(
+        manifest_validator.validate_semantic_manifest(manifest),
+        "An error occurred while parsing a field",
+    )
+
+
+def test_invalid_order_by_due_to_mismatch(  # noqa: D
+    simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
+) -> None:
+    manifest = copy.deepcopy(simple_semantic_manifest__with_primary_transforms)
+    manifest.saved_queries = [
+        PydanticSavedQuery(
+            name="Example Saved Query",
+            description="Example description.",
+            query_params=PydanticSavedQueryQueryParams(
+                metrics=["listings"],
+                group_by=["TimeDimension('metric_time')"],
+                order_by=["Dimension('listing__country')"],
+            ),
+        ),
+    ]
+
+    manifest_validator = SemanticManifestValidator[PydanticSemanticManifest]([SavedQueryRule()])
+    check_only_one_error_with_message(
+        manifest_validator.validate_semantic_manifest(manifest),
+        "does not match any of the listed metrics or group-by items.",
+    )
+
+
+def test_order_by_matches_metric(  # noqa: D
+    simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
+) -> None:
+    manifest = copy.deepcopy(simple_semantic_manifest__with_primary_transforms)
+    manifest.saved_queries = [
+        PydanticSavedQuery(
+            name="Example Saved Query",
+            description="Example description.",
+            query_params=PydanticSavedQueryQueryParams(
+                metrics=["listings"],
+                group_by=["TimeDimension('metric_time')"],
+                order_by=["Metric('listings').descending(True)"],
+            ),
+        ),
+    ]
+
+    manifest_validator = SemanticManifestValidator[PydanticSemanticManifest]([SavedQueryRule()])
+    results = manifest_validator.validate_semantic_manifest(manifest)
+    assert results.all_issues == ()
+
+
+def test_invalid_order_by_due_to_bare_metric(  # noqa: D
+    simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest,
+) -> None:
+    manifest = copy.deepcopy(simple_semantic_manifest__with_primary_transforms)
+    manifest.saved_queries = [
+        PydanticSavedQuery(
+            name="Example Saved Query",
+            description="Example description.",
+            query_params=PydanticSavedQueryQueryParams(
+                metrics=["listings"], group_by=["TimeDimension('metric_time')"], order_by=["listings"]
+            ),
+        ),
+    ]
+
+    manifest_validator = SemanticManifestValidator[PydanticSemanticManifest]([SavedQueryRule()])
+    check_only_one_error_with_message(
+        manifest_validator.validate_semantic_manifest(manifest),
+        "An error occurred while parsing a field",
+    )
diff --git a/tests/validations/test_semantic_manifest_validator.py b/tests/validations/test_semantic_manifest_validator.py
@@ -7,14 +7,17 @@
     SemanticManifestValidator,
 )
 
+# Note: Using `assert results.errors == ()` instead of `assert not results.has_blocking_issues` as the diff shows up
+# better in pytest output.
+
 
 def test_semantic_manifest_validator_default_success(  # noqa:D
     simple_semantic_manifest: PydanticSemanticManifest,
 ) -> None:
     semantic_manifest = deepcopy(simple_semantic_manifest)
     validator = SemanticManifestValidator[PydanticSemanticManifest]()
     results = validator.validate_semantic_manifest(semantic_manifest)
-    assert not results.has_blocking_issues
+    assert results.errors == ()
 
 
 def test_semantic_manifest_validator_default_failure(  # noqa:D
@@ -26,7 +29,7 @@ def test_semantic_manifest_validator_default_failure(  # noqa:D
 
     validator = SemanticManifestValidator[PydanticSemanticManifest]()
     results = validator.validate_semantic_manifest(semantic_manifest)
-    assert results.has_blocking_issues
+    assert results.errors == ()
 
 
 def test_multi_process_validator_results_same_as_sync(  # noqa:D
@@ -49,6 +52,6 @@ def test_multi_process_validator_results_same_as_sync(  # noqa:D
     multi_process_results = validator.validate_semantic_manifest(
         semantic_manifest=semantic_manifest, multi_process=True
     )
-    assert default_results.has_blocking_issues
-    assert multi_process_results.has_blocking_issues
+    assert default_results.errors == ()
+    assert multi_process_results.errors == ()
     assert default_results.all_issues == multi_process_results.all_issues