diff --git a/.changes/unreleased/Dependency-20220913-225328.yaml b/.changes/unreleased/Dependency-20220913-225328.yaml new file mode 100644 index 000000000..b934c08c7 --- /dev/null +++ b/.changes/unreleased/Dependency-20220913-225328.yaml @@ -0,0 +1,7 @@ +kind: Dependency +body: "Bump pyodbc from 4.0.32 to 4.0.34" +time: 2022-09-13T22:53:28.00000Z +custom: + Author: dependabot[bot] + Issue: 417 + PR: 459 diff --git a/.changes/unreleased/Dependency-20220914-191910.yaml b/.changes/unreleased/Dependency-20220914-191910.yaml new file mode 100644 index 000000000..ad2534c16 --- /dev/null +++ b/.changes/unreleased/Dependency-20220914-191910.yaml @@ -0,0 +1,7 @@ +kind: "Dependency" +body: "Bump black from 22.3.0 to 22.8.0" +time: 2022-09-14T19:19:10.00000Z +custom: + Author: dependabot[bot] + Issue: 417 + PR: 458 diff --git a/.changes/unreleased/Dependency-20220914-192027.yaml b/.changes/unreleased/Dependency-20220914-192027.yaml new file mode 100644 index 000000000..1863b52fc --- /dev/null +++ b/.changes/unreleased/Dependency-20220914-192027.yaml @@ -0,0 +1,7 @@ +kind: "Dependency" +body: "Update click requirement from ~=8.0.4 to ~=8.1.3" +time: 2022-09-14T19:20:27.00000Z +custom: + Author: dependabot[bot] + Issue: 417 + PR: 457 diff --git a/.changes/unreleased/Dependency-20220914-192102.yaml b/.changes/unreleased/Dependency-20220914-192102.yaml new file mode 100644 index 000000000..f13fd6c45 --- /dev/null +++ b/.changes/unreleased/Dependency-20220914-192102.yaml @@ -0,0 +1,7 @@ +kind: "Dependency" +body: "Bump mypy from 0.950 to 0.971" +time: 2022-09-14T19:21:02.00000Z +custom: + Author: dependabot[bot] + Issue: 417 + PR: 456 diff --git a/.changes/unreleased/Dependency-20220914-192125.yaml b/.changes/unreleased/Dependency-20220914-192125.yaml new file mode 100644 index 000000000..78234be80 --- /dev/null +++ b/.changes/unreleased/Dependency-20220914-192125.yaml @@ -0,0 +1,7 @@ +kind: "Dependency" +body: "Bump thrift-sasl from 0.4.1 to 0.4.3" +time: 2022-09-14T19:21:25.00000Z +custom: + Author: dependabot[bot] + Issue: 417 + PR: 455 diff --git a/.changes/unreleased/Features-20220920-000814.yaml b/.changes/unreleased/Features-20220920-000814.yaml new file mode 100644 index 000000000..96ba63648 --- /dev/null +++ b/.changes/unreleased/Features-20220920-000814.yaml @@ -0,0 +1,7 @@ +kind: Features +body: implement testing for type_boolean in spark +time: 2022-09-20T00:08:14.15447+01:00 +custom: + Author: jpmmcneill + Issue: "470" + PR: "471" diff --git a/.changes/unreleased/Under the Hood-20220916-125706.yaml b/.changes/unreleased/Under the Hood-20220916-125706.yaml new file mode 100644 index 000000000..54b82eb55 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20220916-125706.yaml @@ -0,0 +1,7 @@ +kind: Under the Hood +body: Enable Pandas and Pandas-on-Spark DataFrames for dbt python models +time: 2022-09-16T12:57:06.846297-06:00 +custom: + Author: chamini2 dbeatty10 + Issue: "468" + PR: "469" diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index d39ba0b44..25d70c722 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -41,7 +41,40 @@ # --- Autogenerated dbt materialization code. --- # dbt = dbtObj(spark.table) df = model(dbt, spark) -df.write.mode("overwrite").format("delta").saveAsTable("{{ target_relation }}") + +import importlib.util + +pandas_available = False +pyspark_available = False + +# make sure pandas exists before using it +if importlib.util.find_spec("pandas"): + import pandas + pandas_available = True + +# make sure pyspark.pandas exists before using it +if importlib.util.find_spec("pyspark.pandas"): + import pyspark.pandas + pyspark_available = True + +# preferentially convert pandas DataFrames to pandas-on-Spark DataFrames first +# since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)` +# and converting from pandas-on-Spark to Spark DataFrame has no overhead +if pyspark_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame): + df = pyspark.pandas.frame.DataFrame(df) + +# convert to pyspark.sql.dataframe.DataFrame +if isinstance(df, pyspark.sql.dataframe.DataFrame): + pass # since it is already a Spark DataFrame +elif pyspark_available and isinstance(df, pyspark.pandas.frame.DataFrame): + df = df.to_spark() +elif pandas_available and isinstance(df, pandas.core.frame.DataFrame): + df = spark.createDataFrame(df) +else: + msg = f"{type(df)} is not a supported type for dbt Python materialization" + raise Exception(msg) + +df.write.mode("overwrite").format("delta").option("overwriteSchema", "true").saveAsTable("{{ target_relation }}") {%- endmacro -%} {%macro py_script_comment()%} diff --git a/dev-requirements.txt b/dev-requirements.txt index 5b29e5e9d..e93c1b41a 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -5,15 +5,15 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory= -black==22.3.0 +black==22.8.0 bumpversion -click~=8.0.4 +click~=8.1.3 flake8 flaky freezegun==0.3.9 ipdb mock>=1.3.0 -mypy==0.950 +mypy==0.971 pre-commit pytest-csv pytest-dotenv @@ -24,4 +24,4 @@ tox>=3.2.0 # Test requirements sasl>=0.2.1 -thrift_sasl==0.4.1 +thrift_sasl==0.4.3 diff --git a/requirements.txt b/requirements.txt index 5d774e4f7..14b36b723 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ PyHive[hive]>=0.6.0,<0.7.0 requests[python]>=2.28.1 -pyodbc==4.0.32 +pyodbc==4.0.34 sqlparams>=3.0.0 thrift>=0.13.0 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/functional/adapter/utils/test_data_types.py b/tests/functional/adapter/utils/test_data_types.py index 65a24a3a9..ce6085803 100644 --- a/tests/functional/adapter/utils/test_data_types.py +++ b/tests/functional/adapter/utils/test_data_types.py @@ -9,6 +9,7 @@ from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString from dbt.tests.adapter.utils.data_types.test_type_timestamp import BaseTypeTimestamp +from dbt.tests.adapter.utils.data_types.test_type_boolean import BaseTypeBoolean class TestTypeBigInt(BaseTypeBigInt): @@ -65,3 +66,7 @@ class TestTypeString(BaseTypeString): class TestTypeTimestamp(BaseTypeTimestamp): pass + + +class TestTypeBoolean(BaseTypeBoolean): + pass