From 84456f50f687b352072f64867247d21bef2647ff Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Fri, 24 May 2024 10:12:26 -0400 Subject: [PATCH] Fix: Order-insensitive unit test equality assertion for expected/actual with multiple nulls (#10202) --- .../unreleased/Fixes-20240522-182855.yaml | 7 ++ core/dbt/task/test.py | 36 ++++-- .../unit_testing/test_ut_diffing.py | 113 ++++++++++++++++++ tests/unit/task/test_test.py | 71 +++++++++++ 4 files changed, 217 insertions(+), 10 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240522-182855.yaml create mode 100644 tests/functional/unit_testing/test_ut_diffing.py create mode 100644 tests/unit/task/test_test.py diff --git a/.changes/unreleased/Fixes-20240522-182855.yaml b/.changes/unreleased/Fixes-20240522-182855.yaml new file mode 100644 index 00000000000..b0963b4a6cc --- /dev/null +++ b/.changes/unreleased/Fixes-20240522-182855.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: 'Fix: Order-insensitive unit test equality assertion for expected/actual with + multiple nulls' +time: 2024-05-22T18:28:55.91733-04:00 +custom: + Author: michelleark + Issue: "10167" diff --git a/core/dbt/task/test.py b/core/dbt/task/test.py index 3be5f5c5504..2ae65dc3ebe 100644 --- a/core/dbt/task/test.py +++ b/core/dbt/task/test.py @@ -338,16 +338,17 @@ def _get_unit_test_agate_table(self, result_table, actual_or_expected: str): def _get_daff_diff( self, expected: "agate.Table", actual: "agate.Table", ordered: bool = False ) -> daff.TableDiff: - - expected_daff_table = daff.PythonTableView(list_rows_from_table(expected)) - actual_daff_table = daff.PythonTableView(list_rows_from_table(actual)) - - alignment = daff.Coopy.compareTables(expected_daff_table, actual_daff_table).align() - result = daff.PythonTableView([]) + # Sort expected and actual inputs prior to creating daff diff to ensure order insensitivity + # https://github.com/paulfitz/daff/issues/200 + expected_daff_table = daff.PythonTableView(list_rows_from_table(expected, sort=True)) + actual_daff_table = daff.PythonTableView(list_rows_from_table(actual, sort=True)) flags = daff.CompareFlags() flags.ordered = ordered + alignment = daff.Coopy.compareTables(expected_daff_table, actual_daff_table, flags).align() + result = daff.PythonTableView([]) + diff = daff.TableDiff(alignment, flags) diff.hilite(result) return diff @@ -408,10 +409,25 @@ def json_rows_from_table(table: "agate.Table") -> List[Dict[str, Any]]: # This was originally in agate_helper, but that was moved out into dbt_common -def list_rows_from_table(table: "agate.Table") -> List[Any]: - "Convert a table to a list of lists, where the first element represents the header" - rows = [[col.name for col in table.columns]] +def list_rows_from_table(table: "agate.Table", sort: bool = False) -> List[Any]: + """ + Convert given table to a list of lists, where the first element represents the header + + By default, sort is False and no sort order is applied to the non-header rows of the given table. + + If sort is True, sort the non-header rows hierarchically, treating None values as lower in order. + Examples: + * [['a','b','c'],[4,5,6],[1,2,3]] -> [['a','b','c'],[1,2,3],[4,5,6]] + * [['a','b','c'],[4,5,6],[1,null,3]] -> [['a','b','c'],[1,null,3],[4,5,6]] + * [['a','b','c'],[4,5,6],[null,2,3]] -> [['a','b','c'],[4,5,6],[null,2,3]] + """ + header = [col.name for col in table.columns] + + rows = [] for row in table.rows: rows.append(list(row.values())) - return rows + if sort: + rows = sorted(rows, key=lambda x: [(elem is None, elem) for elem in x]) + + return [header] + rows diff --git a/tests/functional/unit_testing/test_ut_diffing.py b/tests/functional/unit_testing/test_ut_diffing.py new file mode 100644 index 00000000000..dd23ea25fdb --- /dev/null +++ b/tests/functional/unit_testing/test_ut_diffing.py @@ -0,0 +1,113 @@ +import pytest + +from dbt.tests.util import run_dbt + +my_input_model = """ +SELECT 1 as id, 'some string' as status +""" + +my_model = """ +SELECT * FROM {{ ref("my_input_model") }} +""" + +test_my_model_order_insensitive = """ +unit_tests: + - name: unordered_no_nulls + model: my_model + given: + - input: ref("my_input_model") + rows: + - {"id": 1, "status": 'B'} + - {"id": 2, "status": 'B'} + - {"id": 3, "status": 'A'} + expect: + rows: + - {"id": 3, "status": 'A'} + - {"id": 2, "status": 'B'} + - {"id": 1, "status": 'B'} + + - name: unordered_with_nulls + model: my_model + given: + - input: ref("my_input_model") + rows: + - {"id": , "status": 'B'} + - {"id": , "status": 'B'} + - {"id": 3, "status": 'A'} + expect: + rows: + - {"id": 3, "status": 'A'} + - {"id": , "status": 'B'} + - {"id": , "status": 'B'} + + - name: unordered_with_nulls_2 + model: my_model + given: + - input: ref("my_input_model") + rows: + - {"id": 3, "status": 'A'} + - {"id": , "status": 'B'} + - {"id": , "status": 'B'} + expect: + rows: + - {"id": , "status": 'B'} + - {"id": , "status": 'B'} + - {"id": 3, "status": 'A'} + + - name: unordered_with_nulls_mixed_columns + model: my_model + given: + - input: ref("my_input_model") + rows: + - {"id": 3, "status": 'A'} + - {"id": , "status": 'B'} + - {"id": 1, "status": } + expect: + rows: + - {"id": 1, "status": } + - {"id": , "status": 'B'} + - {"id": 3, "status": 'A'} + + - name: unordered_with_null + model: my_model + given: + - input: ref("my_input_model") + rows: + - {"id": 3, "status": 'A'} + - {"id": , "status": 'B'} + expect: + rows: + - {"id": , "status": 'B'} + - {"id": 3, "status": 'A'} + + - name: ordered_with_nulls + model: my_model + given: + - input: ref("my_input_model") + rows: + - {"id": 3, "status": 'A'} + - {"id": , "status": 'B'} + - {"id": , "status": 'B'} + expect: + rows: + - {"id": 3, "status": 'A'} + - {"id": , "status": 'B'} + - {"id": , "status": 'B'} +""" + + +class TestUnitTestingDiffIsOrderAgnostic: + @pytest.fixture(scope="class") + def models(self): + return { + "my_input_model.sql": my_input_model, + "my_model.sql": my_model, + "test_my_model.yml": test_my_model_order_insensitive, + } + + def test_unit_testing_diff_is_order_insensitive(self, project): + run_dbt(["run"]) + + # Select by model name + results = run_dbt(["test", "--select", "my_model"], expect_pass=True) + assert len(results) == 6 diff --git a/tests/unit/task/test_test.py b/tests/unit/task/test_test.py new file mode 100644 index 00000000000..350f80950eb --- /dev/null +++ b/tests/unit/task/test_test.py @@ -0,0 +1,71 @@ +import agate +import pytest + +from dbt.task.test import list_rows_from_table + + +class TestListRowsFromTable: + @pytest.mark.parametrize( + "agate_table_cols,agate_table_rows,expected_list_rows", + [ + (["a", "b", "c"], [], [["a", "b", "c"]]), # no rows + (["a", "b", "c"], [[1, 2, 3]], [["a", "b", "c"], [1, 2, 3]]), # single row, no nulls + ( + ["a", "b", "c"], + [[1, 2, 3], [2, 3, 4]], + [["a", "b", "c"], [1, 2, 3], [2, 3, 4]], + ), # multiple rows + ( + ["a", "b", "c"], + [[None, 2, 3], [2, None, 4]], + [["a", "b", "c"], [None, 2, 3], [2, None, 4]], + ), # multiple rows, with nulls + ], + ) + def test_list_rows_from_table_no_sort( + self, agate_table_cols, agate_table_rows, expected_list_rows + ): + table = agate.Table(rows=agate_table_rows, column_names=agate_table_cols) + + list_rows = list_rows_from_table(table) + assert list_rows == expected_list_rows + + @pytest.mark.parametrize( + "agate_table_cols,agate_table_rows,expected_list_rows", + [ + (["a", "b", "c"], [], [["a", "b", "c"]]), # no rows + (["a", "b", "c"], [[1, 2, 3]], [["a", "b", "c"], [1, 2, 3]]), # single row, no nulls + ( + ["a", "b", "c"], + [[1, 2, 3], [2, 3, 4]], + [["a", "b", "c"], [1, 2, 3], [2, 3, 4]], + ), # multiple rows, in order + ( + ["a", "b", "c"], + [[2, 3, 4], [1, 2, 3]], + [["a", "b", "c"], [1, 2, 3], [2, 3, 4]], + ), # multiple rows, out of order + ( + ["a", "b", "c"], + [[None, 2, 3], [2, 3, 4]], + [["a", "b", "c"], [2, 3, 4], [None, 2, 3]], + ), # multiple rows, out of order with nulls in first position + ( + ["a", "b", "c"], + [[4, 5, 6], [1, None, 3]], + [["a", "b", "c"], [1, None, 3], [4, 5, 6]], + ), # multiple rows, out of order with null in non-first position + ( + ["a", "b", "c"], + [[None, 5, 6], [1, None, 3]], + [["a", "b", "c"], [1, None, 3], [None, 5, 6]], + ), # multiple rows, out of order with nulls in many positions + ], + ) + def test_list_rows_from_table_with_sort( + self, agate_table_cols, agate_table_rows, expected_list_rows + ): + table = agate.Table(rows=agate_table_rows, column_names=agate_table_cols) + + list_rows = list_rows_from_table(table, sort=True) + assert list_rows == expected_list_rows