Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Order-insensitive unit test equality assertion for expected/actual with multiple nulls #10202

Merged
merged 5 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20240522-182855.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: 'Fix: Order-insensitive unit test equality assertion for expected/actual with
multiple nulls'
time: 2024-05-22T18:28:55.91733-04:00
custom:
Author: michelleark
Issue: "10167"
36 changes: 26 additions & 10 deletions core/dbt/task/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,16 +338,17 @@
def _get_daff_diff(
self, expected: "agate.Table", actual: "agate.Table", ordered: bool = False
) -> daff.TableDiff:

expected_daff_table = daff.PythonTableView(list_rows_from_table(expected))
actual_daff_table = daff.PythonTableView(list_rows_from_table(actual))

alignment = daff.Coopy.compareTables(expected_daff_table, actual_daff_table).align()
result = daff.PythonTableView([])
# Sort expected and actual inputs prior to creating daff diff to ensure order insensitivity
# https://github.com/paulfitz/daff/issues/200
expected_daff_table = daff.PythonTableView(list_rows_from_table(expected, sort=True))
actual_daff_table = daff.PythonTableView(list_rows_from_table(actual, sort=True))

Check warning on line 344 in core/dbt/task/test.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/task/test.py#L343-L344

Added lines #L343 - L344 were not covered by tests

flags = daff.CompareFlags()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved flags initialization higher up as they should also be used to create alignment. This doesn't fix the issue, but it makes sense to use the same flags for alignment and diff, assuming flags.order worked perfectly :)

flags.ordered = ordered

alignment = daff.Coopy.compareTables(expected_daff_table, actual_daff_table, flags).align()
result = daff.PythonTableView([])

Check warning on line 350 in core/dbt/task/test.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/task/test.py#L349-L350

Added lines #L349 - L350 were not covered by tests

diff = daff.TableDiff(alignment, flags)
diff.hilite(result)
return diff
Expand Down Expand Up @@ -408,10 +409,25 @@


# This was originally in agate_helper, but that was moved out into dbt_common
def list_rows_from_table(table: "agate.Table") -> List[Any]:
"Convert a table to a list of lists, where the first element represents the header"
rows = [[col.name for col in table.columns]]
def list_rows_from_table(table: "agate.Table", sort: bool = False) -> List[Any]:
"""
Convert given table to a list of lists, where the first element represents the header

By default, sort is False and no sort order is applied to the non-header rows of the given table.

If sort is True, sort the non-header rows hierarchically, treating None values as lower in order.
Examples:
* [['a','b','c'],[4,5,6],[1,2,3]] -> [['a','b','c'],[1,2,3],[4,5,6]]
* [['a','b','c'],[4,5,6],[1,null,3]] -> [['a','b','c'],[1,null,3],[4,5,6]]
* [['a','b','c'],[4,5,6],[null,2,3]] -> [['a','b','c'],[4,5,6],[null,2,3]]
"""
header = [col.name for col in table.columns]

rows = []
for row in table.rows:
rows.append(list(row.values()))

return rows
if sort:
rows = sorted(rows, key=lambda x: [(elem is None, elem) for elem in x])

return [header] + rows
113 changes: 113 additions & 0 deletions tests/functional/unit_testing/test_ut_diffing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import pytest

from dbt.tests.util import run_dbt

my_input_model = """
SELECT 1 as id, 'some string' as status
"""

my_model = """
SELECT * FROM {{ ref("my_input_model") }}
"""

test_my_model_order_insensitive = """
unit_tests:
- name: unordered_no_nulls
model: my_model
given:
- input: ref("my_input_model")
rows:
- {"id": 1, "status": 'B'}
- {"id": 2, "status": 'B'}
- {"id": 3, "status": 'A'}
expect:
rows:
- {"id": 3, "status": 'A'}
- {"id": 2, "status": 'B'}
- {"id": 1, "status": 'B'}

- name: unordered_with_nulls
model: my_model
given:
- input: ref("my_input_model")
rows:
- {"id": , "status": 'B'}
- {"id": , "status": 'B'}
- {"id": 3, "status": 'A'}
expect:
rows:
- {"id": 3, "status": 'A'}
- {"id": , "status": 'B'}
- {"id": , "status": 'B'}

- name: unordered_with_nulls_2
model: my_model
given:
- input: ref("my_input_model")
rows:
- {"id": 3, "status": 'A'}
- {"id": , "status": 'B'}
- {"id": , "status": 'B'}
expect:
rows:
- {"id": , "status": 'B'}
- {"id": , "status": 'B'}
- {"id": 3, "status": 'A'}

- name: unordered_with_nulls_mixed_columns
model: my_model
given:
- input: ref("my_input_model")
rows:
- {"id": 3, "status": 'A'}
- {"id": , "status": 'B'}
- {"id": 1, "status": }
expect:
rows:
- {"id": 1, "status": }
- {"id": , "status": 'B'}
- {"id": 3, "status": 'A'}

- name: unordered_with_null
model: my_model
given:
- input: ref("my_input_model")
rows:
- {"id": 3, "status": 'A'}
- {"id": , "status": 'B'}
expect:
rows:
- {"id": , "status": 'B'}
- {"id": 3, "status": 'A'}

- name: ordered_with_nulls
model: my_model
given:
- input: ref("my_input_model")
rows:
- {"id": 3, "status": 'A'}
- {"id": , "status": 'B'}
- {"id": , "status": 'B'}
expect:
rows:
- {"id": 3, "status": 'A'}
- {"id": , "status": 'B'}
- {"id": , "status": 'B'}
"""


class TestUnitTestingDiffIsOrderAgnostic:
@pytest.fixture(scope="class")
def models(self):
return {
"my_input_model.sql": my_input_model,
"my_model.sql": my_model,
"test_my_model.yml": test_my_model_order_insensitive,
}

def test_unit_testing_diff_is_order_insensitive(self, project):
run_dbt(["run"])

# Select by model name
results = run_dbt(["test", "--select", "my_model"], expect_pass=True)
assert len(results) == 6
71 changes: 71 additions & 0 deletions tests/unit/task/test_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import agate
import pytest

from dbt.task.test import list_rows_from_table


class TestListRowsFromTable:
@pytest.mark.parametrize(
"agate_table_cols,agate_table_rows,expected_list_rows",
[
(["a", "b", "c"], [], [["a", "b", "c"]]), # no rows
(["a", "b", "c"], [[1, 2, 3]], [["a", "b", "c"], [1, 2, 3]]), # single row, no nulls
(
["a", "b", "c"],
[[1, 2, 3], [2, 3, 4]],
[["a", "b", "c"], [1, 2, 3], [2, 3, 4]],
), # multiple rows
(
["a", "b", "c"],
[[None, 2, 3], [2, None, 4]],
[["a", "b", "c"], [None, 2, 3], [2, None, 4]],
), # multiple rows, with nulls
],
)
def test_list_rows_from_table_no_sort(
self, agate_table_cols, agate_table_rows, expected_list_rows
):
table = agate.Table(rows=agate_table_rows, column_names=agate_table_cols)

list_rows = list_rows_from_table(table)
assert list_rows == expected_list_rows

@pytest.mark.parametrize(
"agate_table_cols,agate_table_rows,expected_list_rows",
[
(["a", "b", "c"], [], [["a", "b", "c"]]), # no rows
(["a", "b", "c"], [[1, 2, 3]], [["a", "b", "c"], [1, 2, 3]]), # single row, no nulls
(
["a", "b", "c"],
[[1, 2, 3], [2, 3, 4]],
[["a", "b", "c"], [1, 2, 3], [2, 3, 4]],
), # multiple rows, in order
(
["a", "b", "c"],
[[2, 3, 4], [1, 2, 3]],
[["a", "b", "c"], [1, 2, 3], [2, 3, 4]],
), # multiple rows, out of order
(
["a", "b", "c"],
[[None, 2, 3], [2, 3, 4]],
[["a", "b", "c"], [2, 3, 4], [None, 2, 3]],
), # multiple rows, out of order with nulls in first position
(
["a", "b", "c"],
[[4, 5, 6], [1, None, 3]],
[["a", "b", "c"], [1, None, 3], [4, 5, 6]],
), # multiple rows, out of order with null in non-first position
(
["a", "b", "c"],
[[None, 5, 6], [1, None, 3]],
[["a", "b", "c"], [1, None, 3], [None, 5, 6]],
), # multiple rows, out of order with nulls in many positions
],
)
def test_list_rows_from_table_with_sort(
self, agate_table_cols, agate_table_rows, expected_list_rows
):
table = agate.Table(rows=agate_table_rows, column_names=agate_table_cols)

list_rows = list_rows_from_table(table, sort=True)
assert list_rows == expected_list_rows
Loading