Skip to content

Commit

Permalink
Support for redshift 821 (#10366)
Browse files Browse the repository at this point in the history
* Add changelog.

* Avoid sorting for the string case.

* add good unit tests for coverage of sort method.

* add sql format coverage.

---------

Co-authored-by: Mila Page <[email protected]>
  • Loading branch information
VersusFacit and VersusFacit authored Jul 12, 2024
1 parent 1aeff2c commit 6e4564a
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 1 deletion.
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20240625-171737.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Attempt to provide test fixture tables with all values to set types correctly
for comparisong with source tables
time: 2024-06-25T17:17:37.514619-07:00
custom:
Author: versusfacit
Issue: "10365"
36 changes: 36 additions & 0 deletions core/dbt/parser/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,42 @@ def _validate_and_normalize_rows(self, ut_fixture, unit_test_definition, fixture
ut_fixture.fixture, self.project.project_name, unit_test_definition.unique_id
)

# sanitize order of input
if ut_fixture.rows and (
ut_fixture.format == UnitTestFormat.Dict or ut_fixture.format == UnitTestFormat.CSV
):
self._promote_first_non_none_row(ut_fixture)

def _promote_first_non_none_row(self, ut_fixture):
"""
Promote the first row with no None values to the top of the ut_fixture.rows list.
This function modifies the ut_fixture object in place.
Needed for databases like Redshift which uses the first value in a column to determine
the column type. If the first value is None, the type is assumed to be VARCHAR(1).
This leads to obscure type mismatch errors centered on a unit test fixture's `expect`.
See https://github.com/dbt-labs/dbt-redshift/issues/821 for more info.
"""
non_none_row_index = None

# Iterate through each row and its index
for index, row in enumerate(ut_fixture.rows):
# Check if all values in the row are not None
if all(value is not None for value in row.values()):
non_none_row_index = index
break

if non_none_row_index is None:
raise ParsingError(
"Unit Test fixtures require at least one row free of Nones to ensure consistent column types."
)
else:
ut_fixture.rows[0], ut_fixture.rows[non_none_row_index] = (
ut_fixture.rows[non_none_row_index],
ut_fixture.rows[0],
)

def get_fixture_file_rows(self, fixture_name, project_name, utdef_unique_id):
# find fixture file object and store unit_test_definition unique_id
fixture = self._get_fixture(fixture_name, project_name)
Expand Down
117 changes: 116 additions & 1 deletion tests/unit/parser/test_unit_tests.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from unittest import mock

from dbt.artifacts.resources import DependsOn, UnitTestConfig
from dbt.artifacts.resources import DependsOn, UnitTestConfig, UnitTestFormat
from dbt.contracts.graph.nodes import NodeType, UnitTestDefinition
from dbt.contracts.graph.unparsed import UnitTestOutputFixture
from dbt.exceptions import ParsingError
from dbt.parser import SchemaParser
from dbt.parser.unit_tests import UnitTestParser
from tests.unit.parser.test_parser import SchemaParserTest, assertEqualNodes
Expand Down Expand Up @@ -79,6 +80,59 @@
- {a: 1}
"""

UNIT_TEST_NONE_ROWS_SORT = """
unit_tests:
- name: test_my_model_null_handling
model: my_model
description: "unit test description"
given: []
expect:
rows:
- {"id": , "col1": "d"}
- {"id": , "col1": "e"}
- {"id": 6, "col1": "f"}
"""

UNIT_TEST_NONE_ROWS_SORT_CSV = """
unit_tests:
- name: test_my_model_null_handling
model: my_model
description: "unit test description"
given: []
expect:
format: csv
rows: |
id,col1
,d
,e
6,f
"""

UNIT_TEST_NONE_ROWS_SORT_SQL = """
unit_tests:
- name: test_my_model_null_handling
model: my_model
description: "unit test description"
given: []
expect:
format: sql
rows: |
select null
select 1
"""

UNIT_TEST_NONE_ROWS_SORT_FAILS = """
unit_tests:
- name: test_my_model_null_handling
model: my_model
description: "this unit test needs one non-None value row"
given: []
expect:
rows:
- {"id": , "col1": "d"}
- {"id": , "col1": "e"}
"""


class UnitTestParserTest(SchemaParserTest):
def setUp(self):
Expand Down Expand Up @@ -173,3 +227,64 @@ def test_multiple_unit_tests(self):
for unit_test in self.parser.manifest.unit_tests.values():
self.assertEqual(len(unit_test.depends_on.nodes), 1)
self.assertEqual(unit_test.depends_on.nodes[0], "model.snowplow.my_model")

def _parametrize_test_promote_non_none_row(
self, unit_test_fixture_yml, fixture_expected_field_format, expected_rows
):
block = self.yaml_block_for(unit_test_fixture_yml, "test_my_model.yml")

UnitTestParser(self.parser, block).parse()

self.assert_has_manifest_lengths(self.parser.manifest, nodes=1, unit_tests=1)
unit_test = list(self.parser.manifest.unit_tests.values())[0]
expected = UnitTestDefinition(
name="test_my_model_null_handling",
model="my_model",
resource_type=NodeType.Unit,
package_name="snowplow",
path=block.path.relative_path,
original_file_path=block.path.original_file_path,
unique_id="unit_test.snowplow.my_model.test_my_model_null_handling",
given=[],
expect=UnitTestOutputFixture(format=fixture_expected_field_format, rows=expected_rows),
description="unit test description",
overrides=None,
depends_on=DependsOn(nodes=["model.snowplow.my_model"]),
fqn=["snowplow", "my_model", "test_my_model_null_handling"],
config=UnitTestConfig(),
schema="test_schema",
)
expected.build_unit_test_checksum()
assertEqualNodes(unit_test, expected)

def test_expected_promote_non_none_row_dct(self):
expected_rows = [
{"id": 6, "col1": "f"},
{"id": None, "col1": "e"},
{"id": None, "col1": "d"},
]
self._parametrize_test_promote_non_none_row(
UNIT_TEST_NONE_ROWS_SORT, UnitTestFormat.Dict, expected_rows
)

def test_expected_promote_non_none_row_csv(self):
expected_rows = [
{"id": "6", "col1": "f"},
{"id": None, "col1": "e"},
{"id": None, "col1": "d"},
]
self._parametrize_test_promote_non_none_row(
UNIT_TEST_NONE_ROWS_SORT_CSV, UnitTestFormat.CSV, expected_rows
)

def test_expected_promote_non_none_row_sql(self):
expected_rows = "select null\n" + "select 1"
self._parametrize_test_promote_non_none_row(
UNIT_TEST_NONE_ROWS_SORT_SQL, UnitTestFormat.SQL, expected_rows
)

def test_no_full_row_throws_error(self):
with self.assertRaises(ParsingError):
block = self.yaml_block_for(UNIT_TEST_NONE_ROWS_SORT_FAILS, "test_my_model.yml")

UnitTestParser(self.parser, block).parse()

0 comments on commit 6e4564a

Please sign in to comment.