Skip to content

Commit

Permalink
Freshness: support variables in thresholds (#2146)
Browse files Browse the repository at this point in the history
  • Loading branch information
m1n0 authored Aug 14, 2024
1 parent 0563049 commit ad9db79
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 28 deletions.
40 changes: 12 additions & 28 deletions soda/core/soda/sodacl/sodacl_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,34 +499,6 @@ def parse_user_defined_failed_rows_check_cfg(self, check_configurations, check_s
else:
self.logs.error(f'Check "{check_str}" expects a nested object/dict, but was {check_configurations}')

def __parse_failed_rows_table_expression_check(
self,
header_str: str,
check_str: str,
check_configurations: dict | None,
) -> CheckCfg:
if isinstance(check_configurations, dict):
from soda.sodacl.user_defined_failed_rows_expression_check_cfg import (
UserDefinedFailedRowsExpressionCheckCfg,
)

self._push_path_element(check_str, check_configurations)
try:
expression = self._get_required("failed rows expression", str)
name = self._get_optional(NAME, str)
return UserDefinedFailedRowsExpressionCheckCfg(
source_header=header_str,
source_line=check_str,
source_configurations=check_configurations,
location=self.location,
name=name,
fail_condition_sql_expr=expression,
)
finally:
self._pop_path_element()
else:
self.logs.error(f'Check "{check_str}" expects a nested object/dict, but was {check_configurations}')

def parse_failed_rows_data_source_query_check(
self,
header_str: str,
Expand Down Expand Up @@ -1582,8 +1554,20 @@ def __antlr_threshold_value(self, antlr_threshold_value: SodaCLAntlrParser.Thres
freshness_threshold = antlr_threshold_value.freshness_threshold_value().getText()
return self.parse_freshness_threshold(freshness_threshold)
if antlr_threshold_value.IDENTIFIER_UNQUOTED():
resolved_value = self._resolve_jinja(
antlr_threshold_value.IDENTIFIER_UNQUOTED().getText(), self.sodacl_cfg.scan._variables
)
if self.__str_looks_like_freshness_threshold(resolved_value):
return self.parse_freshness_threshold(resolved_value)
return antlr_threshold_value.IDENTIFIER_UNQUOTED().getText()

def __str_looks_like_freshness_threshold(self, str_value: str) -> bool:
"""
This method replicates the antlr parsing logic for freshness threshold values. It is needed to validate input after parsing so that
variables can be used in thresholds.
"""
return re.match(r"^(\d+[dhm])+$", str_value) is not None

def __antlr_parse_signed_number(self, antlr_signed_number):
signed_number_str = antlr_signed_number.getText()
if signed_number_str.startswith("+"):
Expand Down
40 changes: 40 additions & 0 deletions soda/core/tests/data_source/test_freshness.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from textwrap import dedent

import pytest
from helpers.common_test_tables import customers_test_table
from helpers.data_source_fixture import DataSourceFixture
Expand Down Expand Up @@ -289,3 +291,41 @@ def test_freshness_mixed_threshold_hm(data_source_fixture: DataSourceFixture):
scan.execute()

scan.assert_all_checks_pass()


@pytest.mark.parametrize(
"sodacl",
[
pytest.param(
dedent(
"""
checks for {table_name}:
- freshness(ts) < ${{threshold}}
"""
),
id="simple threshold",
),
pytest.param(
dedent(
"""
checks for {table_name}:
- freshness(ts):
fail: when > ${{threshold}}
"""
),
id="fail threshold",
),
],
)
def test_freshness_variable_in_threshold(data_source_fixture: DataSourceFixture, sodacl: str):
table_name = data_source_fixture.ensure_test_table(customers_test_table)
scan = data_source_fixture.create_test_scan()
scan.add_variables(
{
"threshold": "1h",
}
)
scan.add_sodacl_yaml_str(sodacl.format(table_name=table_name))
scan.execute()

scan.assert_all_checks_fail()

0 comments on commit ad9db79

Please sign in to comment.