Skip to content

Commit

Permalink
[Backport 1.7.latest] Implementation of metadata-based freshness (#721)
Browse files Browse the repository at this point in the history
* cherry pick pr#649

* turn off catalog by relations that was inadvertently turned on during the cherry pick
  • Loading branch information
mikealfare authored Feb 27, 2024
1 parent 7e3891f commit bf80a11
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20231219-120533.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add support for checking table-last-modified by metadata
time: 2023-12-19T12:05:33.784649-05:00
custom:
Author: mikealfare
Issue: "615"
7 changes: 7 additions & 0 deletions dbt/adapters/redshift/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from dbt.adapters.base import PythonJobHelper
from dbt.adapters.base.impl import AdapterConfig, ConstraintSupport
from dbt.adapters.base.meta import available
from dbt.adapters.capability import Capability, CapabilityDict, CapabilitySupport, Support
from dbt.adapters.sql import SQLAdapter
from dbt.contracts.connection import AdapterResponse
from dbt.contracts.graph.nodes import ConstraintType
Expand Down Expand Up @@ -46,6 +47,12 @@ class RedshiftAdapter(SQLAdapter):
ConstraintType.foreign_key: ConstraintSupport.NOT_ENFORCED,
}

_capabilities = CapabilityDict(
{
Capability.TableLastModifiedMetadata: CapabilitySupport(support=Support.Full),
}
)

@classmethod
def date_function(cls):
return "getdate()"
Expand Down
29 changes: 29 additions & 0 deletions dbt/include/redshift/macros/metadata/relation_last_modified.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{% macro redshift__get_relation_last_modified(information_schema, relations) -%}

{%- call statement('last_modified', fetch_result=True) -%}
select
ns.nspname as "schema",
c.relname as identifier,
max(qd.start_time) as last_modified,
{{ current_timestamp() }} as snapshotted_at
from pg_class c
join pg_namespace ns
on ns.oid = c.relnamespace
join sys_query_detail qd
on qd.table_id = c.oid
where qd.step_name = 'insert'
and (
{%- for relation in relations -%}
(
upper(ns.nspname) = upper('{{ relation.schema }}')
and upper(c.relname) = upper('{{ relation.identifier }}')
)
{%- if not loop.last %} or {% endif -%}
{%- endfor -%}
)
group by 1, 2, 4
{%- endcall -%}

{{ return(load_result('last_modified')) }}

{% endmacro %}
38 changes: 38 additions & 0 deletions tests/functional/adapter/sources_freshness_tests/files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
SCHEMA_YML = """version: 2
sources:
- name: test_source
freshness:
warn_after: {count: 10, period: hour}
error_after: {count: 1, period: day}
schema: "{{ env_var('DBT_GET_LAST_RELATION_TEST_SCHEMA') }}"
tables:
- name: test_source_no_last_modified
- name: test_source_last_modified
loaded_at_field: last_modified
"""

SEED_TEST_SOURCE_NO_LAST_MODIFIED_CSV = """
id,name
1,Martin
2,Jeter
3,Ruth
4,Gehrig
5,DiMaggio
6,Torre
7,Mantle
8,Berra
9,Maris
""".strip()

SEED_TEST_SOURCE_LAST_MODIFIED_CSV = """
id,name,last_modified
1,Martin,2023-01-01 00:00:00
2,Jeter,2023-02-01 00:00:00
3,Ruth,2023-03-01 00:00:00
4,Gehrig,2023-04-01 00:00:00
5,DiMaggio,2023-05-01 00:00:00
6,Torre,2023-06-01 00:00:00
7,Mantle,2023-07-01 00:00:00
8,Berra,2023-08-01 00:00:00
9,Maris,2023-09-01 00:00:00
""".strip()
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os

from dbt.tests.util import run_dbt
import pytest

from tests.functional.adapter.sources_freshness_tests import files


class TestGetLastRelationModified:
@pytest.fixture(scope="class")
def seeds(self):
return {
"test_source_no_last_modified.csv": files.SEED_TEST_SOURCE_NO_LAST_MODIFIED_CSV,
"test_source_last_modified.csv": files.SEED_TEST_SOURCE_LAST_MODIFIED_CSV,
}

@pytest.fixture(scope="class")
def models(self):
return {"schema.yml": files.SCHEMA_YML}

@pytest.fixture(scope="class", autouse=True)
def setup(self, project):
# we need the schema name for the sources section
os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"] = project.test_schema
run_dbt(["seed"])
yield
del os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"]

@pytest.mark.parametrize(
"source,status,expect_pass",
[
("test_source.test_source_no_last_modified", "pass", True),
("test_source.test_source_last_modified", "error", False), # stale
],
)
def test_get_last_relation_modified(self, project, source, status, expect_pass):
results = run_dbt(
["source", "freshness", "--select", f"source:{source}"], expect_pass=expect_pass
)
assert len(results) == 1
result = results[0]
assert result.status == status

0 comments on commit bf80a11

Please sign in to comment.