From d36f3182af37fa122d6b7952ce1ee3f9779c812d Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Mon, 29 Apr 2024 11:21:35 -0500 Subject: [PATCH 1/9] add support for explicit nulls for loaded_at_field --- .../resources/v1/source_definition.py | 1 + core/dbt/contracts/graph/nodes.py | 2 ++ core/dbt/contracts/graph/unparsed.py | 4 ++++ core/dbt/parser/schemas.py | 15 ++++++++++++++- core/dbt/parser/sources.py | 19 ++++++++++++++++++- 5 files changed, 39 insertions(+), 2 deletions(-) diff --git a/core/dbt/artifacts/resources/v1/source_definition.py b/core/dbt/artifacts/resources/v1/source_definition.py index e5a9ab1d98e..54de7feb015 100644 --- a/core/dbt/artifacts/resources/v1/source_definition.py +++ b/core/dbt/artifacts/resources/v1/source_definition.py @@ -58,6 +58,7 @@ class ParsedSourceMandatory(GraphResource, HasRelationMetadata): class SourceDefinition(ParsedSourceMandatory): quoting: Quoting = field(default_factory=Quoting) loaded_at_field: Optional[str] = None + loaded_at_field_present: Optional[bool] = False freshness: Optional[FreshnessThreshold] = None external: Optional[ExternalTable] = None description: str = "" diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index e1f409ff1de..a159a39e993 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -1190,6 +1190,7 @@ def same_freshness(self, other: "SourceDefinition") -> bool: return ( self.freshness == other.freshness and self.loaded_at_field == other.loaded_at_field + and self.loaded_at_field_present == other.loaded_at_field_present and True ) @@ -1213,6 +1214,7 @@ def same_contents(self, old: Optional["SourceDefinition"]) -> bool: # messing around with external stuff is a change (uh, right?) # quoting changes are changes # freshness changes are changes, I guess + # TODO: what about loaded_at_field? # metadata/tags changes are not "changes" # patching/description changes are not "changes" return ( diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index caeaa5cee85..d1023f218eb 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -269,6 +269,7 @@ class UnparsedMacroUpdate(HasConfig, HasColumnProps, HasYamlMetadata): class UnparsedSourceTableDefinition(HasColumnTests, HasColumnAndTestProps): config: Dict[str, Any] = field(default_factory=dict) loaded_at_field: Optional[str] = None + loaded_at_field_present: Optional[bool] = False identifier: Optional[str] = None quoting: Quoting = field(default_factory=Quoting) freshness: Optional[FreshnessThreshold] = field(default_factory=FreshnessThreshold) @@ -293,6 +294,7 @@ class UnparsedSourceDefinition(dbtClassMixin): quoting: Quoting = field(default_factory=Quoting) freshness: Optional[FreshnessThreshold] = field(default_factory=FreshnessThreshold) loaded_at_field: Optional[str] = None + loaded_at_field_present: Optional[bool] = False tables: List[UnparsedSourceTableDefinition] = field(default_factory=list) tags: List[str] = field(default_factory=list) config: Dict[str, Any] = field(default_factory=dict) @@ -316,6 +318,7 @@ class SourceTablePatch(dbtClassMixin): data_type: Optional[str] = None docs: Optional[Docs] = None loaded_at_field: Optional[str] = None + loaded_at_field_present: Optional[bool] = False identifier: Optional[str] = None quoting: Quoting = field(default_factory=Quoting) freshness: Optional[FreshnessThreshold] = field(default_factory=FreshnessThreshold) @@ -358,6 +361,7 @@ class SourcePatch(dbtClassMixin): quoting: Optional[Quoting] = None freshness: Optional[Optional[FreshnessThreshold]] = field(default_factory=FreshnessThreshold) loaded_at_field: Optional[str] = None + loaded_at_field_present: Optional[bool] = False tables: Optional[List[SourceTablePatch]] = None tags: Optional[List[str]] = None diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 838939b83fc..05daea91822 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -115,7 +115,20 @@ def yaml_from_file(source_file: SchemaSourceFile) -> Dict[str, Any]: """If loading the yaml fails, raise an exception.""" try: # source_file.contents can sometimes be None - return load_yaml_text(source_file.contents or "", source_file.path) + contents = load_yaml_text(source_file.contents or "", source_file.path) + + # When loaded_loaded_at_field is defined as None or null, it shows up in + # the dict but when it is not defined, it does not show up in the dict + # We need to capture this to be able to override source level settings later. + # It defaults to False so only need to set to true here. + for source in contents.get("sources", []): + if "loaded_at_field" in source: + source["loaded_at_field_present"] = True + for table in source.get("tables", []): + if "loaded_at_field" in table: + table["loaded_at_field_present"] = True + + return contents except DbtValidationError as e: raise YamlLoadError( project_name=source_file.project_name, path=source_file.path.relative_path, exc=e diff --git a/core/dbt/parser/sources.py b/core/dbt/parser/sources.py index 1f57efe79ce..9e73b923a8f 100644 --- a/core/dbt/parser/sources.py +++ b/core/dbt/parser/sources.py @@ -133,7 +133,11 @@ def parse_source(self, target: UnpatchedSourceDefinition) -> SourceDefinition: unique_id = target.unique_id description = table.description or "" source_description = source.description or "" - loaded_at_field = table.loaded_at_field or source.loaded_at_field + + loaded_at_field = determine_loaded_at( + source.loaded_at_field, table.loaded_at_field, table.loaded_at_field_present + ) + loaded_at_field_present = table.loaded_at_field_present freshness = merge_freshness(source.freshness, table.freshness) quoting = source.quoting.merged(table.quoting) @@ -182,6 +186,7 @@ def parse_source(self, target: UnpatchedSourceDefinition) -> SourceDefinition: meta=meta, loader=source.loader, loaded_at_field=loaded_at_field, + loaded_at_field_present=loaded_at_field_present, freshness=freshness, quoting=quoting, resource_type=NodeType.Source, @@ -377,3 +382,15 @@ def merge_freshness( return update else: return None + + +def determine_loaded_at( + source: Optional[str], table: Optional[str], table_loaded_at_field_present: Optional[bool] +) -> Optional[str]: + # We need to be able to tell teh difference between explicitly setting the loaded_at_field to None/null + # and when it's simply not set. This allows a user to override teh source level loaded_at_field so that + # specific table can default to metadata-based freshness. + if table_loaded_at_field_present or table is not None: + return table + + return source # may be None, that's okay From a2c73f99c5746996bf7ad6150eeef009e67ce5e0 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Mon, 29 Apr 2024 11:45:04 -0500 Subject: [PATCH 2/9] add test --- .../sources/test_source_loaded_at_field.py | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 tests/functional/sources/test_source_loaded_at_field.py diff --git a/tests/functional/sources/test_source_loaded_at_field.py b/tests/functional/sources/test_source_loaded_at_field.py new file mode 100644 index 00000000000..77701a6f37c --- /dev/null +++ b/tests/functional/sources/test_source_loaded_at_field.py @@ -0,0 +1,54 @@ +import pytest +from dbt.tests.util import run_dbt, get_manifest + + +loaded_at_field_source_schema_yml = """ +sources: + - name: test_source + freshness: + warn_after: + count: 1 + period: day + error_after: + count: 4 + period: day + loaded_at_field: updated_at + tables: + - name: table_null + identifier: example + loaded_at_field: null + - name: table_none + identifier: example + - name: table_override + identifier: example + loaded_at_field: updated_at_another_place +""" + + +class TestLoadedAtSourceLevel: + @pytest.fixture(scope="class") + def models(self): + return {"schema.yml": loaded_at_field_source_schema_yml} + + def test_source_level(self, project): + run_dbt(["parse"]) + manifest = get_manifest(project.project_root) + + # test setting loaded_at_field at source level, should trickle to table + assert "source.test.test_source.table_null" in manifest.sources + assert manifest.sources.get("source.test.test_source.table_null").loaded_at_field is None + + # test setting loaded_at_field at source level, and explicitly set to + # null at table level, end up with source level being None + assert "source.test.test_source.table_none" in manifest.sources + assert ( + manifest.sources.get("source.test.test_source.table_none").loaded_at_field + == "updated_at" + ) + + # test setting loaded_at_field at table level overrides source level + assert "source.test.test_source.table_override" in manifest.sources + assert ( + manifest.sources.get("source.test.test_source.table_override").loaded_at_field + == "updated_at_another_place" + ) From 6c516c8c0f0c504c90d3151e27552faf876aa397 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Mon, 29 Apr 2024 11:46:20 -0500 Subject: [PATCH 3/9] changelog --- .changes/unreleased/Fixes-20240429-114610.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Fixes-20240429-114610.yaml diff --git a/.changes/unreleased/Fixes-20240429-114610.yaml b/.changes/unreleased/Fixes-20240429-114610.yaml new file mode 100644 index 00000000000..97e377a0216 --- /dev/null +++ b/.changes/unreleased/Fixes-20240429-114610.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Support overriding source level loaded_at_field with a null table level definition +time: 2024-04-29T11:46:10.100373-05:00 +custom: + Author: emmyoop + Issue: "9320" From 2c6535bbc1152af3279fbccec96b49d77b55abad Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Mon, 29 Apr 2024 12:01:37 -0500 Subject: [PATCH 4/9] add parsing for tests --- .../resources/v1/source_definition.py | 2 +- core/dbt/contracts/graph/nodes.py | 1 - core/dbt/contracts/graph/unparsed.py | 8 +-- .../sources/test_source_loaded_at_field.py | 63 +++++++++++++++---- 4 files changed, 55 insertions(+), 19 deletions(-) diff --git a/core/dbt/artifacts/resources/v1/source_definition.py b/core/dbt/artifacts/resources/v1/source_definition.py index 54de7feb015..9ddac819564 100644 --- a/core/dbt/artifacts/resources/v1/source_definition.py +++ b/core/dbt/artifacts/resources/v1/source_definition.py @@ -58,7 +58,7 @@ class ParsedSourceMandatory(GraphResource, HasRelationMetadata): class SourceDefinition(ParsedSourceMandatory): quoting: Quoting = field(default_factory=Quoting) loaded_at_field: Optional[str] = None - loaded_at_field_present: Optional[bool] = False + loaded_at_field_present: Optional[bool] = None freshness: Optional[FreshnessThreshold] = None external: Optional[ExternalTable] = None description: str = "" diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index a159a39e993..4fa38a96a6a 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -1214,7 +1214,6 @@ def same_contents(self, old: Optional["SourceDefinition"]) -> bool: # messing around with external stuff is a change (uh, right?) # quoting changes are changes # freshness changes are changes, I guess - # TODO: what about loaded_at_field? # metadata/tags changes are not "changes" # patching/description changes are not "changes" return ( diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index d1023f218eb..dd84551ae6e 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -269,7 +269,7 @@ class UnparsedMacroUpdate(HasConfig, HasColumnProps, HasYamlMetadata): class UnparsedSourceTableDefinition(HasColumnTests, HasColumnAndTestProps): config: Dict[str, Any] = field(default_factory=dict) loaded_at_field: Optional[str] = None - loaded_at_field_present: Optional[bool] = False + loaded_at_field_present: Optional[bool] = None identifier: Optional[str] = None quoting: Quoting = field(default_factory=Quoting) freshness: Optional[FreshnessThreshold] = field(default_factory=FreshnessThreshold) @@ -294,7 +294,7 @@ class UnparsedSourceDefinition(dbtClassMixin): quoting: Quoting = field(default_factory=Quoting) freshness: Optional[FreshnessThreshold] = field(default_factory=FreshnessThreshold) loaded_at_field: Optional[str] = None - loaded_at_field_present: Optional[bool] = False + loaded_at_field_present: Optional[bool] = None tables: List[UnparsedSourceTableDefinition] = field(default_factory=list) tags: List[str] = field(default_factory=list) config: Dict[str, Any] = field(default_factory=dict) @@ -318,7 +318,7 @@ class SourceTablePatch(dbtClassMixin): data_type: Optional[str] = None docs: Optional[Docs] = None loaded_at_field: Optional[str] = None - loaded_at_field_present: Optional[bool] = False + loaded_at_field_present: Optional[bool] = None identifier: Optional[str] = None quoting: Quoting = field(default_factory=Quoting) freshness: Optional[FreshnessThreshold] = field(default_factory=FreshnessThreshold) @@ -361,7 +361,7 @@ class SourcePatch(dbtClassMixin): quoting: Optional[Quoting] = None freshness: Optional[Optional[FreshnessThreshold]] = field(default_factory=FreshnessThreshold) loaded_at_field: Optional[str] = None - loaded_at_field_present: Optional[bool] = False + loaded_at_field_present: Optional[bool] = None tables: Optional[List[SourceTablePatch]] = None tags: Optional[List[str]] = None diff --git a/tests/functional/sources/test_source_loaded_at_field.py b/tests/functional/sources/test_source_loaded_at_field.py index 77701a6f37c..8298f933fd3 100644 --- a/tests/functional/sources/test_source_loaded_at_field.py +++ b/tests/functional/sources/test_source_loaded_at_field.py @@ -1,8 +1,8 @@ import pytest -from dbt.tests.util import run_dbt, get_manifest +from dbt.tests.util import run_dbt, get_manifest, write_file -loaded_at_field_source_schema_yml = """ +loaded_at_field_null_schema_yml = """ sources: - name: test_source freshness: @@ -14,12 +14,40 @@ period: day loaded_at_field: updated_at tables: - - name: table_null + - name: table1 identifier: example loaded_at_field: null - - name: table_none +""" + +loaded_at_field_missing_schema_yml = """ +sources: + - name: test_source + freshness: + warn_after: + count: 1 + period: day + error_after: + count: 4 + period: day + loaded_at_field: updated_at + tables: + - name: table1 identifier: example - - name: table_override +""" + +loaded_at_field_defined_schema_yml = """ +sources: + - name: test_source + freshness: + warn_after: + count: 1 + period: day + error_after: + count: 4 + period: day + loaded_at_field: updated_at + tables: + - name: table1 identifier: example loaded_at_field: updated_at_another_place """ @@ -28,27 +56,36 @@ class TestLoadedAtSourceLevel: @pytest.fixture(scope="class") def models(self): - return {"schema.yml": loaded_at_field_source_schema_yml} + return {"schema.yml": loaded_at_field_null_schema_yml} def test_source_level(self, project): run_dbt(["parse"]) manifest = get_manifest(project.project_root) # test setting loaded_at_field at source level, should trickle to table - assert "source.test.test_source.table_null" in manifest.sources - assert manifest.sources.get("source.test.test_source.table_null").loaded_at_field is None + assert "source.test.test_source.table1" in manifest.sources + assert manifest.sources.get("source.test.test_source.table1").loaded_at_field is None # test setting loaded_at_field at source level, and explicitly set to # null at table level, end up with source level being None - assert "source.test.test_source.table_none" in manifest.sources + write_file( + loaded_at_field_missing_schema_yml, project.project_root, "models", "schema.yml" + ) + run_dbt(["parse"]) + manifest = get_manifest(project.project_root) + assert "source.test.test_source.table1" in manifest.sources assert ( - manifest.sources.get("source.test.test_source.table_none").loaded_at_field - == "updated_at" + manifest.sources.get("source.test.test_source.table1").loaded_at_field == "updated_at" ) # test setting loaded_at_field at table level overrides source level - assert "source.test.test_source.table_override" in manifest.sources + write_file( + loaded_at_field_defined_schema_yml, project.project_root, "models", "schema.yml" + ) + run_dbt(["parse"]) + manifest = get_manifest(project.project_root) + assert "source.test.test_source.table1" in manifest.sources assert ( - manifest.sources.get("source.test.test_source.table_override").loaded_at_field + manifest.sources.get("source.test.test_source.table1").loaded_at_field == "updated_at_another_place" ) From 2d29fb1b0051dbaa4de1f676b8c12f0e0d4ca5de Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 30 Apr 2024 07:43:35 -0500 Subject: [PATCH 5/9] centralize logic a bit --- core/dbt/parser/sources.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/core/dbt/parser/sources.py b/core/dbt/parser/sources.py index 9e73b923a8f..78d5a6d03f4 100644 --- a/core/dbt/parser/sources.py +++ b/core/dbt/parser/sources.py @@ -134,9 +134,14 @@ def parse_source(self, target: UnpatchedSourceDefinition) -> SourceDefinition: description = table.description or "" source_description = source.description or "" - loaded_at_field = determine_loaded_at( - source.loaded_at_field, table.loaded_at_field, table.loaded_at_field_present - ) + # We need to be able to tell the difference between explicitly setting the loaded_at_field to None/null + # and when it's simply not set. This allows a user to override the source level loaded_at_field so that + # specific table can default to metadata-based freshness. + if table.loaded_at_field_present or table is not None: + loaded_at_field = table.loaded_at_field + else: + loaded_at_field = source.loaded_at_field # may be None, that's okay + loaded_at_field_present = table.loaded_at_field_present freshness = merge_freshness(source.freshness, table.freshness) @@ -382,15 +387,3 @@ def merge_freshness( return update else: return None - - -def determine_loaded_at( - source: Optional[str], table: Optional[str], table_loaded_at_field_present: Optional[bool] -) -> Optional[str]: - # We need to be able to tell teh difference between explicitly setting the loaded_at_field to None/null - # and when it's simply not set. This allows a user to override teh source level loaded_at_field so that - # specific table can default to metadata-based freshness. - if table_loaded_at_field_present or table is not None: - return table - - return source # may be None, that's okay From e67517bd0cc9fc43516387ba24184b6df1ea8aae Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 30 Apr 2024 14:52:15 -0500 Subject: [PATCH 6/9] account for sources being None --- core/dbt/parser/schemas.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 05daea91822..fa1cf1587a6 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -111,12 +111,15 @@ # =============================================================================== -def yaml_from_file(source_file: SchemaSourceFile) -> Dict[str, Any]: +def yaml_from_file(source_file: SchemaSourceFile) -> Optional[Dict[str, Any]]: """If loading the yaml fails, raise an exception.""" try: # source_file.contents can sometimes be None contents = load_yaml_text(source_file.contents or "", source_file.path) + if contents is None: + return contents + # When loaded_loaded_at_field is defined as None or null, it shows up in # the dict but when it is not defined, it does not show up in the dict # We need to capture this to be able to override source level settings later. From 277d590007ea615c690b7e30d751a0b75d382f55 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Wed, 1 May 2024 07:44:59 -0500 Subject: [PATCH 7/9] fix bug --- core/dbt/parser/sources.py | 2 +- tests/functional/sources/test_source_loaded_at_field.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/core/dbt/parser/sources.py b/core/dbt/parser/sources.py index 78d5a6d03f4..27b6d94c5fc 100644 --- a/core/dbt/parser/sources.py +++ b/core/dbt/parser/sources.py @@ -137,7 +137,7 @@ def parse_source(self, target: UnpatchedSourceDefinition) -> SourceDefinition: # We need to be able to tell the difference between explicitly setting the loaded_at_field to None/null # and when it's simply not set. This allows a user to override the source level loaded_at_field so that # specific table can default to metadata-based freshness. - if table.loaded_at_field_present or table is not None: + if table.loaded_at_field_present or table.loaded_at_field is not None: loaded_at_field = table.loaded_at_field else: loaded_at_field = source.loaded_at_field # may be None, that's okay diff --git a/tests/functional/sources/test_source_loaded_at_field.py b/tests/functional/sources/test_source_loaded_at_field.py index 8298f933fd3..9df7c802b0e 100644 --- a/tests/functional/sources/test_source_loaded_at_field.py +++ b/tests/functional/sources/test_source_loaded_at_field.py @@ -53,12 +53,13 @@ """ -class TestLoadedAtSourceLevel: +class TestLoadedAtField: @pytest.fixture(scope="class") def models(self): return {"schema.yml": loaded_at_field_null_schema_yml} - def test_source_level(self, project): + def test_loaded_at_field(self, project): + # falls back to meteadata-based freshness but postgres doesn't support it run_dbt(["parse"]) manifest = get_manifest(project.project_root) From 0c2e94d39cb5e60d73fbd86e0030e41d1a02f2a8 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Thu, 2 May 2024 08:13:51 -0500 Subject: [PATCH 8/9] remove new field from SourceDefinition --- .../resources/v1/source_definition.py | 1 - core/dbt/contracts/graph/nodes.py | 1 - core/dbt/parser/schemas.py | 3 --- core/dbt/parser/sources.py | 3 --- .../sources/test_source_loaded_at_field.py | 23 +++++++++++-------- 5 files changed, 14 insertions(+), 17 deletions(-) diff --git a/core/dbt/artifacts/resources/v1/source_definition.py b/core/dbt/artifacts/resources/v1/source_definition.py index 9ddac819564..e5a9ab1d98e 100644 --- a/core/dbt/artifacts/resources/v1/source_definition.py +++ b/core/dbt/artifacts/resources/v1/source_definition.py @@ -58,7 +58,6 @@ class ParsedSourceMandatory(GraphResource, HasRelationMetadata): class SourceDefinition(ParsedSourceMandatory): quoting: Quoting = field(default_factory=Quoting) loaded_at_field: Optional[str] = None - loaded_at_field_present: Optional[bool] = None freshness: Optional[FreshnessThreshold] = None external: Optional[ExternalTable] = None description: str = "" diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 4fa38a96a6a..e1f409ff1de 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -1190,7 +1190,6 @@ def same_freshness(self, other: "SourceDefinition") -> bool: return ( self.freshness == other.freshness and self.loaded_at_field == other.loaded_at_field - and self.loaded_at_field_present == other.loaded_at_field_present and True ) diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index fa1cf1587a6..68fafba85f5 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -123,10 +123,7 @@ def yaml_from_file(source_file: SchemaSourceFile) -> Optional[Dict[str, Any]]: # When loaded_loaded_at_field is defined as None or null, it shows up in # the dict but when it is not defined, it does not show up in the dict # We need to capture this to be able to override source level settings later. - # It defaults to False so only need to set to true here. for source in contents.get("sources", []): - if "loaded_at_field" in source: - source["loaded_at_field_present"] = True for table in source.get("tables", []): if "loaded_at_field" in table: table["loaded_at_field_present"] = True diff --git a/core/dbt/parser/sources.py b/core/dbt/parser/sources.py index 27b6d94c5fc..f94262aac03 100644 --- a/core/dbt/parser/sources.py +++ b/core/dbt/parser/sources.py @@ -142,8 +142,6 @@ def parse_source(self, target: UnpatchedSourceDefinition) -> SourceDefinition: else: loaded_at_field = source.loaded_at_field # may be None, that's okay - loaded_at_field_present = table.loaded_at_field_present - freshness = merge_freshness(source.freshness, table.freshness) quoting = source.quoting.merged(table.quoting) # path = block.path.original_file_path @@ -191,7 +189,6 @@ def parse_source(self, target: UnpatchedSourceDefinition) -> SourceDefinition: meta=meta, loader=source.loader, loaded_at_field=loaded_at_field, - loaded_at_field_present=loaded_at_field_present, freshness=freshness, quoting=quoting, resource_type=NodeType.Source, diff --git a/tests/functional/sources/test_source_loaded_at_field.py b/tests/functional/sources/test_source_loaded_at_field.py index 9df7c802b0e..8636566879e 100644 --- a/tests/functional/sources/test_source_loaded_at_field.py +++ b/tests/functional/sources/test_source_loaded_at_field.py @@ -15,7 +15,6 @@ loaded_at_field: updated_at tables: - name: table1 - identifier: example loaded_at_field: null """ @@ -32,7 +31,6 @@ loaded_at_field: updated_at tables: - name: table1 - identifier: example """ loaded_at_field_defined_schema_yml = """ @@ -48,27 +46,25 @@ loaded_at_field: updated_at tables: - name: table1 - identifier: example loaded_at_field: updated_at_another_place """ -class TestLoadedAtField: +class TestParsingLoadedAtField: @pytest.fixture(scope="class") def models(self): return {"schema.yml": loaded_at_field_null_schema_yml} def test_loaded_at_field(self, project): - # falls back to meteadata-based freshness but postgres doesn't support it + # test setting loaded_at_field to null explicitly at table level run_dbt(["parse"]) manifest = get_manifest(project.project_root) - # test setting loaded_at_field at source level, should trickle to table assert "source.test.test_source.table1" in manifest.sources assert manifest.sources.get("source.test.test_source.table1").loaded_at_field is None - # test setting loaded_at_field at source level, and explicitly set to - # null at table level, end up with source level being None + # test setting loaded_at_field at source level, do not set at table level + # end up with source level loaded_at_field write_file( loaded_at_field_missing_schema_yml, project.project_root, "models", "schema.yml" ) @@ -79,7 +75,16 @@ def test_loaded_at_field(self, project): manifest.sources.get("source.test.test_source.table1").loaded_at_field == "updated_at" ) - # test setting loaded_at_field at table level overrides source level + # test setting loaded_at_field to null explicitly again to make sure the change is picked up + # by parser + write_file(loaded_at_field_null_schema_yml, project.project_root, "models", "schema.yml") + run_dbt(["parse"]) + manifest = get_manifest(project.project_root) + + assert "source.test.test_source.table1" in manifest.sources + assert manifest.sources.get("source.test.test_source.table1").loaded_at_field is None + + # test setting loaded_at_field at table level to a value - it should override source level write_file( loaded_at_field_defined_schema_yml, project.project_root, "models", "schema.yml" ) From 797bee6b02ba4b7477920742e7381fb02f2c16c6 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Thu, 2 May 2024 08:49:11 -0500 Subject: [PATCH 9/9] add validation for empty string, mroe tests --- core/dbt/contracts/graph/unparsed.py | 11 +++++ .../sources/test_source_loaded_at_field.py | 45 +++++++++++++++++-- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index dd84551ae6e..ef2012221b5 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -299,6 +299,17 @@ class UnparsedSourceDefinition(dbtClassMixin): tags: List[str] = field(default_factory=list) config: Dict[str, Any] = field(default_factory=dict) + @classmethod + def validate(cls, data): + super(UnparsedSourceDefinition, cls).validate(data) + + if data.get("loaded_at_field", None) == "": + raise ValidationError("loaded_at_field cannot be an empty string.") + if "tables" in data: + for table in data["tables"]: + if table.get("loaded_at_field", None) == "": + raise ValidationError("loaded_at_field cannot be an empty string.") + @property def yaml_key(self) -> "str": return "sources" diff --git a/tests/functional/sources/test_source_loaded_at_field.py b/tests/functional/sources/test_source_loaded_at_field.py index 8636566879e..bc5e5fc05bc 100644 --- a/tests/functional/sources/test_source_loaded_at_field.py +++ b/tests/functional/sources/test_source_loaded_at_field.py @@ -1,5 +1,6 @@ import pytest from dbt.tests.util import run_dbt, get_manifest, write_file +from dbt.exceptions import YamlParseDictError loaded_at_field_null_schema_yml = """ @@ -18,6 +19,22 @@ loaded_at_field: null """ +loaded_at_field_blank_schema_yml = """ +sources: + - name: test_source + freshness: + warn_after: + count: 1 + period: day + error_after: + count: 4 + period: day + loaded_at_field: updated_at + tables: + - name: table1 + loaded_at_field: null +""" + loaded_at_field_missing_schema_yml = """ sources: - name: test_source @@ -49,6 +66,22 @@ loaded_at_field: updated_at_another_place """ +loaded_at_field_empty_string_schema_yml = """ +sources: + - name: test_source + freshness: + warn_after: + count: 1 + period: day + error_after: + count: 4 + period: day + loaded_at_field: updated_at + tables: + - name: table1 + loaded_at_field: "" +""" + class TestParsingLoadedAtField: @pytest.fixture(scope="class") @@ -75,9 +108,8 @@ def test_loaded_at_field(self, project): manifest.sources.get("source.test.test_source.table1").loaded_at_field == "updated_at" ) - # test setting loaded_at_field to null explicitly again to make sure the change is picked up - # by parser - write_file(loaded_at_field_null_schema_yml, project.project_root, "models", "schema.yml") + # test setting loaded_at_field to nothing, should override Source value for None + write_file(loaded_at_field_blank_schema_yml, project.project_root, "models", "schema.yml") run_dbt(["parse"]) manifest = get_manifest(project.project_root) @@ -95,3 +127,10 @@ def test_loaded_at_field(self, project): manifest.sources.get("source.test.test_source.table1").loaded_at_field == "updated_at_another_place" ) + + # test setting loaded_at_field at table level to an empty string - should error + write_file( + loaded_at_field_empty_string_schema_yml, project.project_root, "models", "schema.yml" + ) + with pytest.raises(YamlParseDictError): + run_dbt(["parse"])