From e62b73d126db886be38596cc4467d54a37183fb6 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Wed, 7 Feb 2024 11:01:29 -0500 Subject: [PATCH 1/5] setup CI --- dev-requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 28a626fc3..6c4d19e81 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,6 +1,8 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? -git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter +git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-adapters.git@unit-testing-case-insensitive-comparisons +git+https://github.com/dbt-labs/dbt-adapters.git@unit-testing-case-insensitive-comparisons#subdirectory=dbt-tests-adapter # if version 1.x or greater -> pin to major version # if version 0.x -> pin to minor From 688baa6f25cb92791c4327f4eea1499a8d14f31e Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Wed, 7 Feb 2024 11:40:06 -0500 Subject: [PATCH 2/5] TestSparkUnitTestCaseInsensitivity, TestSparkUnitTestInvalidInput --- dbt/include/spark/macros/adapters.sql | 1 + .../adapter/unit_testing/test_unit_testing.py | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 tests/functional/adapter/unit_testing/test_unit_testing.py diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index bf9f63cf9..a6404a2de 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -387,6 +387,7 @@ "identifier": tmp_identifier }) -%} + {%- set tmp_relation = tmp_relation.include(database=false, schema=false) -%} {% do return(tmp_relation) %} {% endmacro %} diff --git a/tests/functional/adapter/unit_testing/test_unit_testing.py b/tests/functional/adapter/unit_testing/test_unit_testing.py new file mode 100644 index 000000000..998f34b31 --- /dev/null +++ b/tests/functional/adapter/unit_testing/test_unit_testing.py @@ -0,0 +1,10 @@ +from dbt.tests.adapter.unit_testing.test_case_insensitivity import BaseUnitTestCaseInsensivity +from dbt.tests.adapter.unit_testing.test_invalid_input import BaseUnitTestInvalidInput + + +class TestSparkUnitTestCaseInsensitivity(BaseUnitTestCaseInsensivity): + pass + + +class TestSparkUnitTestInvalidInput(BaseUnitTestInvalidInput): + pass From 0b378366564395dd24a7f82e0560463cdea99e39 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Fri, 9 Feb 2024 18:42:21 -0500 Subject: [PATCH 3/5] TestSparkUnitTestingTypes - primative types --- dev-requirements.txt | 4 ++-- .../adapter/unit_testing/test_unit_testing.py | 21 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 6c4d19e81..ec7c69ff8 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,8 +1,8 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-adapters.git@unit-testing-case-insensitive-comparisons -git+https://github.com/dbt-labs/dbt-adapters.git@unit-testing-case-insensitive-comparisons#subdirectory=dbt-tests-adapter +git+https://github.com/dbt-labs/dbt-adapters.git +git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter # if version 1.x or greater -> pin to major version # if version 0.x -> pin to minor diff --git a/tests/functional/adapter/unit_testing/test_unit_testing.py b/tests/functional/adapter/unit_testing/test_unit_testing.py index 998f34b31..45dbc3356 100644 --- a/tests/functional/adapter/unit_testing/test_unit_testing.py +++ b/tests/functional/adapter/unit_testing/test_unit_testing.py @@ -1,7 +1,28 @@ +import pytest + +from dbt.tests.adapter.unit_testing.test_types import BaseUnitTestingTypes from dbt.tests.adapter.unit_testing.test_case_insensitivity import BaseUnitTestCaseInsensivity from dbt.tests.adapter.unit_testing.test_invalid_input import BaseUnitTestInvalidInput +class TestSparkUnitTestingTypes(BaseUnitTestingTypes): + @pytest.fixture + def data_types(self): + # sql_value, yaml_value + return [ + ["1", "1"], + ["2.0", "2.0"], + ["'12345'", "12345"], + ["'string'", "string"], + ["true", "true"], + ["date '2011-11-11'", "2011-11-11"], + ["timestamp '2013-11-03 00:00:00-0'", "2013-11-03 00:00:00-0"], + # ["map(struct('Hello', 'World'), 'Greeting')", '''"map(struct('Hello', 'World'), 'Greeting')"'''], + # ['named_struct("a", 1, "b", 2, "c", 3)', """'named_struct("a", 1, "b", 2, "c", 3)'"""], + # ["array(1, 2, 3)", "'array(1, 2, 3)'"], + ] + + class TestSparkUnitTestCaseInsensitivity(BaseUnitTestCaseInsensivity): pass From fa083cf1553e65ef4a4f0ed766003a13c14870ae Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Tue, 20 Feb 2024 17:27:05 -0500 Subject: [PATCH 4/5] implement safe_cast + add tests for array, map, named_struct for unit testing --- dbt/include/spark/macros/utils/safe_cast.sql | 11 +++++++++++ .../adapter/unit_testing/test_unit_testing.py | 9 ++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 dbt/include/spark/macros/utils/safe_cast.sql diff --git a/dbt/include/spark/macros/utils/safe_cast.sql b/dbt/include/spark/macros/utils/safe_cast.sql new file mode 100644 index 000000000..294ca8d1c --- /dev/null +++ b/dbt/include/spark/macros/utils/safe_cast.sql @@ -0,0 +1,11 @@ +{% macro spark__safe_cast(field, type) %} +{%- if cast_from_string_unsupported_for(type) and field is string -%} + cast({{field.strip('"').strip("'")}} as {{type}}) +{%- else -%} + cast({{field}} as {{type}}) +{%- endif -%} +{% endmacro %} + +{% macro cast_from_string_unsupported_for(type) %} + {{ return(type.lower().startswith('struct') or type.lower().startswith('array') or type.lower().startswith('map')) }} +{% endmacro %} diff --git a/tests/functional/adapter/unit_testing/test_unit_testing.py b/tests/functional/adapter/unit_testing/test_unit_testing.py index 45dbc3356..b70c581d1 100644 --- a/tests/functional/adapter/unit_testing/test_unit_testing.py +++ b/tests/functional/adapter/unit_testing/test_unit_testing.py @@ -17,9 +17,12 @@ def data_types(self): ["true", "true"], ["date '2011-11-11'", "2011-11-11"], ["timestamp '2013-11-03 00:00:00-0'", "2013-11-03 00:00:00-0"], - # ["map(struct('Hello', 'World'), 'Greeting')", '''"map(struct('Hello', 'World'), 'Greeting')"'''], - # ['named_struct("a", 1, "b", 2, "c", 3)', """'named_struct("a", 1, "b", 2, "c", 3)'"""], - # ["array(1, 2, 3)", "'array(1, 2, 3)'"], + ["array(1, 2, 3)", "'array(1, 2, 3)'"], + [ + "map('10', 't', '15', 'f', '20', NULL)", + """'map("10", "t", "15", "f", "20", NULL)'""", + ], + ['named_struct("a", 1, "b", 2, "c", 3)', """'named_struct("a", 1, "b", 2, "c", 3)'"""], ] From f45280d6c7c65dcf3c2a9ed82c6c88826a2e5d3e Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Tue, 20 Feb 2024 19:59:53 -0500 Subject: [PATCH 5/5] refactor safe_cast for readability, changelog entry --- .changes/unreleased/Features-20240220-195925.yaml | 6 ++++++ dbt/include/spark/macros/utils/safe_cast.sql | 7 ++----- 2 files changed, 8 insertions(+), 5 deletions(-) create mode 100644 .changes/unreleased/Features-20240220-195925.yaml diff --git a/.changes/unreleased/Features-20240220-195925.yaml b/.changes/unreleased/Features-20240220-195925.yaml new file mode 100644 index 000000000..c5d86ab7c --- /dev/null +++ b/.changes/unreleased/Features-20240220-195925.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Implement spark__safe_cast and add functional tests for unit testing +time: 2024-02-20T19:59:25.907821-05:00 +custom: + Author: michelleark + Issue: "987" diff --git a/dbt/include/spark/macros/utils/safe_cast.sql b/dbt/include/spark/macros/utils/safe_cast.sql index 294ca8d1c..3ce5820a8 100644 --- a/dbt/include/spark/macros/utils/safe_cast.sql +++ b/dbt/include/spark/macros/utils/safe_cast.sql @@ -1,9 +1,6 @@ {% macro spark__safe_cast(field, type) %} -{%- if cast_from_string_unsupported_for(type) and field is string -%} - cast({{field.strip('"').strip("'")}} as {{type}}) -{%- else -%} - cast({{field}} as {{type}}) -{%- endif -%} +{%- set field_clean = field.strip('"').strip("'") if (cast_from_string_unsupported_for(type) and field is string) else field -%} +cast({{field_clean}} as {{type}}) {% endmacro %} {% macro cast_from_string_unsupported_for(type) %}