From 313c957882fc358da425a0a6ca12cdc4c341232b Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Tue, 14 Nov 2023 11:13:41 -0800 Subject: [PATCH] Support hierarchical config setting for SavedQueryExport configs (#9065) * Add test asserting `SavedQuery` configs can be set from `dbt_project.yml` * Allow extraneous properties in Export configs This brings the Export config object more in line with how other config objects are specified in the unparsed definition. It allows for specifying of extra configs, although they won't get propagate to the final config. * Add `ExportConfig` options to `SavedQueryConfig` options This allows for specifying `ExportConfig` options at the `SavedQueryConfig` level. This also therefore allows these options to be specified in the dbt_project.yml config. The plan in the follow up commit is to merge the `SavedQueryConfig` options into all configs of `Exports` belonging to the saved query. There are a couple caveots to call out: 1. We've used `schema` instead of `schema_name` on the `SavedQueryConfig` despite it being called `schema_name` on the `ExportConfig`. This is because need `schema_name` to be the name of the property on the `ExportConfig`, but `schema` is the user facing specification. 2. We didn't add the `ExportConfig` `alias` property to the `SavedQueryConfig` This is because `alias` will always be specific to a single export, and thus it doesn't make sense to allow defining it on the `SavedQueryConfig` to then apply to all `Exports` belonging to the `SavedQuery` * Begin inheriting configs from saved query config, and transitively from project config Export configs will now inherit from saved query configs, with a preference for export config specifications. That is to say an export config will inherity a config attr from the saved query config only if a value hasn't been supplied on the export config directly. Additionally because the saved query config has a similar relationship with the project config, exports configs can inherit from the project config (again with a preference for export config specifications). * Correct conditional in export config building for map schema to schema_name I somehow wrote a really weird, but also valid, conditional statement. Previously the conditional was ``` if combined.get("schema") is not combined.get("schema_name") is None: ``` which basically checked whether `schema` was a boolean that didn't match the boolean of whether `schema_name` was None. This would pretty much always evaluate to True because `schema` should be a string or none, not a bool, and thus would never match the right hand side. Crazy. It has now been fixed to do the thing we want to it to do. If `schema` isn't `None`, and `schema_name` is `None`, then set `schema_name` to have the value of `schema`. * Update parameter names in `_get_export_config` to be more verbose (cherry picked from commit c2f7d75e9ee96b7ae9de0af24feb3d346ad741b7) --- .../unreleased/Features-20231110-154255.yaml | 6 + core/dbt/contracts/graph/model_config.py | 3 + core/dbt/contracts/graph/unparsed.py | 12 +- core/dbt/parser/schema_yaml_readers.py | 31 +-- tests/functional/saved_queries/fixtures.py | 67 +++++++ .../functional/saved_queries/test_configs.py | 186 ++++++++++++++++++ 6 files changed, 283 insertions(+), 22 deletions(-) create mode 100644 .changes/unreleased/Features-20231110-154255.yaml create mode 100644 tests/functional/saved_queries/test_configs.py diff --git a/.changes/unreleased/Features-20231110-154255.yaml b/.changes/unreleased/Features-20231110-154255.yaml new file mode 100644 index 00000000000..77283846646 --- /dev/null +++ b/.changes/unreleased/Features-20231110-154255.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Support setting export configs hierarchically via saved query and project configs +time: 2023-11-10T15:42:55.042317-08:00 +custom: + Author: QMalcolm + Issue: "8956" diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index e827cd80067..41947496f36 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -15,6 +15,7 @@ from dbt.exceptions import DbtInternalError, CompilationError from dbt import hooks from dbt.node_types import NodeType, AccessType +from dbt_semantic_interfaces.type_enums.export_destination_type import ExportDestinationType from mashumaro.jsonschema.annotations import Pattern @@ -407,6 +408,8 @@ class SavedQueryConfig(BaseConfig): default_factory=dict, metadata=MergeBehavior.Update.meta(), ) + export_as: Optional[ExportDestinationType] = None + schema: Optional[str] = None @dataclass diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index e0d91759d65..b8aee05e806 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -19,7 +19,6 @@ from dbt.exceptions import CompilationError, ParsingError, DbtInternalError from dbt.dataclass_schema import dbtClassMixin, StrEnum, ExtensibleDbtClassMixin, ValidationError -from dbt_semantic_interfaces.type_enums.export_destination_type import ExportDestinationType from dataclasses import dataclass, field from datetime import timedelta @@ -729,21 +728,12 @@ class UnparsedQueryParams(dbtClassMixin): where: Optional[Union[str, List[str]]] = None -@dataclass -class UnparsedExportConfig(dbtClassMixin): - """Nested configuration attributes for exports.""" - - export_as: ExportDestinationType - schema: Optional[str] = None - alias: Optional[str] = None - - @dataclass class UnparsedExport(dbtClassMixin): """Configuration for writing query results to a table.""" name: str - config: UnparsedExportConfig + config: Dict[str, Any] = field(default_factory=dict) @dataclass diff --git a/core/dbt/parser/schema_yaml_readers.py b/core/dbt/parser/schema_yaml_readers.py index 5572e10d16a..e9eb2b5c76c 100644 --- a/core/dbt/parser/schema_yaml_readers.py +++ b/core/dbt/parser/schema_yaml_readers.py @@ -6,7 +6,6 @@ UnparsedDimensionTypeParams, UnparsedEntity, UnparsedExport, - UnparsedExportConfig, UnparsedExposure, UnparsedGroup, UnparsedMeasure, @@ -19,6 +18,7 @@ UnparsedSavedQuery, UnparsedSemanticModel, ) +from dbt.contracts.graph.model_config import SavedQueryConfig from dbt.contracts.graph.nodes import ( Exposure, Group, @@ -57,7 +57,7 @@ MetricType, TimeGranularity, ) -from typing import List, Optional, Union +from typing import Any, Dict, List, Optional, Union def parse_where_filter( @@ -669,16 +669,25 @@ def _generate_saved_query_config( return config - def _get_export_config(self, unparsed: UnparsedExportConfig) -> ExportConfig: - return ExportConfig( - export_as=unparsed.export_as, - schema_name=unparsed.schema, - alias=unparsed.alias, + def _get_export_config( + self, unparsed_export_config: Dict[str, Any], saved_query_config: SavedQueryConfig + ) -> ExportConfig: + # Combine the two dictionaries using dictionary unpacking + # the second dictionary is the one whose keys take priority + combined = {**saved_query_config.__dict__, **unparsed_export_config} + # `schema` is the user facing attribute, but for DSI protocol purposes we track it as `schema_name` + if combined.get("schema") is not None and combined.get("schema_name") is None: + combined["schema_name"] = combined["schema"] + + return ExportConfig.from_dict(combined) + + def _get_export( + self, unparsed: UnparsedExport, saved_query_config: SavedQueryConfig + ) -> Export: + return Export( + name=unparsed.name, config=self._get_export_config(unparsed.config, saved_query_config) ) - def _get_export(self, unparsed: UnparsedExport) -> Export: - return Export(name=unparsed.name, config=self._get_export_config(unparsed.config)) - def _get_query_params(self, unparsed: UnparsedQueryParams) -> QueryParams: return QueryParams( group_by=unparsed.group_by, @@ -721,7 +730,7 @@ def parse_saved_query(self, unparsed: UnparsedSavedQuery) -> None: resource_type=NodeType.SavedQuery, unique_id=unique_id, query_params=self._get_query_params(unparsed.query_params), - exports=[self._get_export(export) for export in unparsed.exports], + exports=[self._get_export(export, config) for export in unparsed.exports], config=config, unrendered_config=unrendered_config, group=config.group, diff --git a/tests/functional/saved_queries/fixtures.py b/tests/functional/saved_queries/fixtures.py index a3be4ba8a94..68565d82e08 100644 --- a/tests/functional/saved_queries/fixtures.py +++ b/tests/functional/saved_queries/fixtures.py @@ -24,3 +24,70 @@ export_as: table schema: my_export_schema_name """ + +saved_query_with_extra_config_attributes_yml = """ +version: 2 + +saved_queries: + - name: test_saved_query + description: "{{ doc('saved_query_description') }}" + label: Test Saved Query + query_params: + metrics: + - simple_metric + group_by: + - "Dimension('user__ds')" + where: + - "{{ Dimension('user__ds', 'DAY') }} <= now()" + - "{{ Dimension('user__ds', 'DAY') }} >= '2023-01-01'" + exports: + - name: my_export + config: + my_random_config: 'I have this for some reason' + export_as: table +""" + +saved_query_with_export_configs_defined_at_saved_query_level_yml = """ +version: 2 + +saved_queries: + - name: test_saved_query + description: "{{ doc('saved_query_description') }}" + label: Test Saved Query + config: + export_as: table + schema: my_default_export_schema + query_params: + metrics: + - simple_metric + group_by: + - "Dimension('user__ds')" + where: + - "{{ Dimension('user__ds', 'DAY') }} <= now()" + - "{{ Dimension('user__ds', 'DAY') }} >= '2023-01-01'" + exports: + - name: my_export + config: + export_as: view + schema: my_custom_export_schema + - name: my_export2 +""" + +saved_query_without_export_configs_defined_yml = """ +version: 2 + +saved_queries: + - name: test_saved_query + description: "{{ doc('saved_query_description') }}" + label: Test Saved Query + query_params: + metrics: + - simple_metric + group_by: + - "Dimension('user__ds')" + where: + - "{{ Dimension('user__ds', 'DAY') }} <= now()" + - "{{ Dimension('user__ds', 'DAY') }} >= '2023-01-01'" + exports: + - name: my_export +""" diff --git a/tests/functional/saved_queries/test_configs.py b/tests/functional/saved_queries/test_configs.py new file mode 100644 index 00000000000..396637928ef --- /dev/null +++ b/tests/functional/saved_queries/test_configs.py @@ -0,0 +1,186 @@ +import pytest + +from dbt.contracts.graph.manifest import Manifest +from dbt.tests.util import update_config_file +from dbt_semantic_interfaces.type_enums.export_destination_type import ExportDestinationType +from tests.functional.assertions.test_runner import dbtTestRunner +from tests.functional.configs.fixtures import BaseConfigProject +from tests.functional.saved_queries.fixtures import ( + saved_queries_yml, + saved_query_description, + saved_query_with_extra_config_attributes_yml, + saved_query_with_export_configs_defined_at_saved_query_level_yml, + saved_query_without_export_configs_defined_yml, +) +from tests.functional.semantic_models.fixtures import ( + fct_revenue_sql, + metricflow_time_spine_sql, + schema_yml, +) + + +class TestSavedQueryConfigs(BaseConfigProject): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "saved-queries": { + "test": { + "test_saved_query": { + "+enabled": True, + "+export_as": ExportDestinationType.VIEW.value, + "+schema": "my_default_export_schema", + } + }, + }, + } + + @pytest.fixture(scope="class") + def models(self): + return { + "saved_queries.yml": saved_query_with_extra_config_attributes_yml, + "schema.yml": schema_yml, + "fct_revenue.sql": fct_revenue_sql, + "metricflow_time_spine.sql": metricflow_time_spine_sql, + "docs.md": saved_query_description, + } + + def test_basic_saved_query_config( + self, + project, + ): + runner = dbtTestRunner() + + # parse with default fixture project config + result = runner.invoke(["parse"]) + assert result.success + assert isinstance(result.result, Manifest) + assert len(result.result.saved_queries) == 1 + saved_query = result.result.saved_queries["saved_query.test.test_saved_query"] + assert saved_query.config.export_as == ExportDestinationType.VIEW + assert saved_query.config.schema == "my_default_export_schema" + + # disable the saved_query via project config and rerun + config_patch = {"saved-queries": {"test": {"test_saved_query": {"+enabled": False}}}} + update_config_file(config_patch, project.project_root, "dbt_project.yml") + result = runner.invoke(["parse"]) + assert result.success + assert len(result.result.saved_queries) == 0 + + +class TestExportConfigsWithAdditionalProperties(BaseConfigProject): + @pytest.fixture(scope="class") + def models(self): + return { + "saved_queries.yml": saved_queries_yml, + "schema.yml": schema_yml, + "fct_revenue.sql": fct_revenue_sql, + "metricflow_time_spine.sql": metricflow_time_spine_sql, + "docs.md": saved_query_description, + } + + def test_extra_config_properties_dont_break_parsing(self, project): + runner = dbtTestRunner() + + # parse with default fixture project config + result = runner.invoke(["parse"]) + assert result.success + assert isinstance(result.result, Manifest) + assert len(result.result.saved_queries) == 1 + saved_query = result.result.saved_queries["saved_query.test.test_saved_query"] + assert len(saved_query.exports) == 1 + assert saved_query.exports[0].config.__dict__.get("my_random_config") is None + + +class TestInheritingExportConfigFromSavedQueryConfig(BaseConfigProject): + @pytest.fixture(scope="class") + def models(self): + return { + "saved_queries.yml": saved_query_with_export_configs_defined_at_saved_query_level_yml, + "schema.yml": schema_yml, + "fct_revenue.sql": fct_revenue_sql, + "metricflow_time_spine.sql": metricflow_time_spine_sql, + "docs.md": saved_query_description, + } + + def test_export_config_inherits_from_saved_query(self, project): + runner = dbtTestRunner() + + # parse with default fixture project config + result = runner.invoke(["parse"]) + assert result.success + assert isinstance(result.result, Manifest) + assert len(result.result.saved_queries) == 1 + saved_query = result.result.saved_queries["saved_query.test.test_saved_query"] + assert len(saved_query.exports) == 2 + + # assert Export `my_export` has its configs defined from itself because they should take priority + export1 = next( + (export for export in saved_query.exports if export.name == "my_export"), None + ) + assert export1 is not None + assert export1.config.export_as == ExportDestinationType.VIEW + assert export1.config.export_as != saved_query.config.export_as + assert export1.config.schema_name == "my_custom_export_schema" + assert export1.config.schema_name != saved_query.config.schema + + # assert Export `my_export` has its configs defined from the saved_query because they should take priority + export2 = next( + (export for export in saved_query.exports if export.name == "my_export2"), None + ) + assert export2 is not None + assert export2.config.export_as == ExportDestinationType.TABLE + assert export2.config.export_as == saved_query.config.export_as + assert export2.config.schema_name == "my_default_export_schema" + assert export2.config.schema_name == saved_query.config.schema + + +class TestInheritingExportConfigsFromProject(BaseConfigProject): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "saved-queries": { + "test": { + "test_saved_query": { + "+export_as": ExportDestinationType.VIEW.value, + } + }, + }, + } + + @pytest.fixture(scope="class") + def models(self): + return { + "saved_queries.yml": saved_query_without_export_configs_defined_yml, + "schema.yml": schema_yml, + "fct_revenue.sql": fct_revenue_sql, + "metricflow_time_spine.sql": metricflow_time_spine_sql, + "docs.md": saved_query_description, + } + + def test_export_config_inherits_from_project( + self, + project, + ): + runner = dbtTestRunner() + + # parse with default fixture project config + result = runner.invoke(["parse"]) + assert result.success + assert isinstance(result.result, Manifest) + assert len(result.result.saved_queries) == 1 + saved_query = result.result.saved_queries["saved_query.test.test_saved_query"] + assert saved_query.config.export_as == ExportDestinationType.VIEW + + # change export's `export_as` to `TABLE` via project config + config_patch = { + "saved-queries": { + "test": {"test_saved_query": {"+export_as": ExportDestinationType.TABLE.value}} + } + } + update_config_file(config_patch, project.project_root, "dbt_project.yml") + result = runner.invoke(["parse"]) + assert result.success + assert isinstance(result.result, Manifest) + assert len(result.result.saved_queries) == 1 + saved_query = result.result.saved_queries["saved_query.test.test_saved_query"] + assert saved_query.config.export_as == ExportDestinationType.TABLE