Skip to content

Commit

Permalink
Add variable resolution to threshold values and queries/expressions (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
vijaykiran authored Sep 21, 2022
1 parent ee5bb81 commit 16c075f
Show file tree
Hide file tree
Showing 25 changed files with 596 additions and 553 deletions.
2 changes: 1 addition & 1 deletion scripts/generate_antlr_parser.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# set -x
set -e

ANTLR_FILE="antlr-4.9.3-complete.jar"
ANTLR_FILE="antlr-4.11.1-complete.jar"
ANTLR_DOWNLOAD_URL="https://www.antlr.org/download/$ANTLR_FILE"
SCRIPTS_DIR=$(cd "$(dirname "$0")"; pwd -P)
ANTLR_LOCAL_JAR_PATH="$SCRIPTS_DIR/$ANTLR_FILE"
Expand Down
2 changes: 1 addition & 1 deletion soda/core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"click~=8.0",
"ruamel.yaml~=0.17.21",
"requests~=2.27.1",
"antlr4-python3-runtime~=4.9.3",
"antlr4-python3-runtime~=4.11.1",
"opentelemetry-api~=1.11.0",
"opentelemetry-exporter-otlp-proto-http~=1.11.0",
"protobuf~=3.19.0",
Expand Down
1 change: 1 addition & 0 deletions soda/core/soda/execution/check/metric_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def get_metric(self):
def set_outcome_based_on_check_value(self):
metric_check_cfg: MetricCheckCfg = self.check_cfg
if self.check_value is not None and metric_check_cfg.has_threshold():
metric_check_cfg.resolve_thresholds(self.data_source_scan.scan.jinja_resolve)
if isinstance(self.check_value, Number):
if metric_check_cfg.fail_threshold_cfg and metric_check_cfg.fail_threshold_cfg.is_bad(self.check_value):
self.outcome = CheckOutcome.FAIL
Expand Down
2 changes: 1 addition & 1 deletion soda/core/soda/execution/query/aggregation_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def execute(self):
if partition_filter:
resolved_filter = scan.jinja_resolve(definition=partition_filter)
self.sql += f"\nWHERE {resolved_filter}"

self.sql = self.data_source_scan.scan.jinja_resolve(self.sql)
self.fetchone()
if self.row:
for i in range(0, len(self.row)):
Expand Down
2 changes: 1 addition & 1 deletion soda/core/soda/execution/query/duplicates_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self, partition: "Partition", metric: "Metric"):

column_names = ", ".join(self.metric.metric_args)

self.sql = (
self.sql = self.data_source_scan.scan.jinja_resolve(
f"WITH frequencies AS (\n"
f" SELECT {column_names}, COUNT(*) AS frequency \n"
f" FROM {self.partition.table.qualified_table_name} \n"
Expand Down
2 changes: 1 addition & 1 deletion soda/core/soda/execution/query/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(
# The SQL query that is used _fetchone or _fetchall or _store
# This field can also be initialized in the execute method before any of _fetchone,
# _fetchall or _store are called
self.sql: str = sql
self.sql: str = data_source_scan.scan.jinja_resolve(sql)

# Following fields are initialized in execute method
self.description: tuple | None = None
Expand Down
3 changes: 2 additions & 1 deletion soda/core/soda/execution/query/reference_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,13 @@ def __init__(
]
)

self.sql = (
self.sql = self.data_source_scan.scan.jinja_resolve(
f"SELECT {source_diagnostic_column_fields} \n"
f"FROM {source_table_name} as SOURCE \n"
f" LEFT JOIN {target_table_name} as TARGET on {join_condition} \n"
f"WHERE {where_condition}"
)

self.metric = metric

def execute(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ class UserDefinedFailedRowsExpressionQuery(Query):
def __init__(self, data_source_scan: DataSourceScan, check_name: str, sql: str, samples_limit: int | None = 100):
super().__init__(
data_source_scan=data_source_scan,
sql=sql,
unqualified_query_name=f"user_defined_failed_rows_expression_query[{check_name}]",
)
self.sql: str = data_source_scan.scan.jinja_resolve(sql)
self.samples_limit = samples_limit

def execute(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def __init__(
unqualified_query_name=f"failed_rows[{metric.name}]",
location=location,
samples_limit=samples_limit,
sql=metric.query,
)
self.sql = data_source_scan.scan.jinja_resolve(metric.query)
self.metric = metric

def execute(self):
Expand Down
4 changes: 1 addition & 3 deletions soda/core/soda/execution/query/user_defined_numeric_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@ def __init__(
metric: Metric,
):
super().__init__(
data_source_scan=data_source_scan,
unqualified_query_name=f"user_defined_query[{check_name}]",
data_source_scan=data_source_scan, unqualified_query_name=f"user_defined_query[{check_name}]", sql=sql
)
self.sql: str = sql
self.metric = metric

def execute(self):
Expand Down
6 changes: 4 additions & 2 deletions soda/core/soda/sodacl/antlr/SodaCLAntlr.g4
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,10 @@ comparator
threshold_value
: signed_number (S? PERCENT)?
| freshness_threshold_value
| IDENTIFIER_UNQUOTED
;


freshness_threshold_value
: (integer ('d'|'h'|'m'))+ integer?
;
Expand Down Expand Up @@ -220,8 +222,8 @@ GT: '>';

IDENTIFIER_DOUBLE_QUOTE: '"' ( ~'"' | '\\"')+ '"';
IDENTIFIER_BACKTICK: '`' ( ~'`' | '\\`')+ '`';
IDENTIFIER_UNQUOTED: [a-zA-Z_] ~(' ' | '<' | '=' | '>' | '(' | ')' | '[' | ']' | ',')*;

IDENTIFIER_UNQUOTED: [a-zA-Z_$] ~(' ' | '<' | '=' | '>' | '(' | ')' | '[' | ']' | ',')*;
STRING: [a-z]+;
DIGITS: [0-9]+;

S: ' ';
4 changes: 3 additions & 1 deletion soda/core/soda/sodacl/antlr/SodaCLAntlr.interp

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions soda/core/soda/sodacl/antlr/SodaCLAntlr.tokens
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ GT=50
IDENTIFIER_DOUBLE_QUOTE=51
IDENTIFIER_BACKTICK=52
IDENTIFIER_UNQUOTED=53
DIGITS=54
S=55
STRING=54
DIGITS=55
S=56
'freshness using'=1
'with'=2
'row_count same as'=3
Expand Down Expand Up @@ -103,4 +104,4 @@ S=55
'='=48
'<'=49
'>'=50
' '=55
' '=56
5 changes: 4 additions & 1 deletion soda/core/soda/sodacl/antlr/SodaCLAntlrLexer.interp

Large diffs are not rendered by default.

Loading

0 comments on commit 16c075f

Please sign in to comment.