From 5210d0a9746443c8a2cf217c16c919a73f3ba543 Mon Sep 17 00:00:00 2001 From: Ben Schreiber <74134279+ben-schreiber@users.noreply.github.com> Date: Thu, 21 Dec 2023 19:45:14 +0200 Subject: [PATCH 01/44] Fix hardcoded file format in python materializations (#955) * Fix hardcoded file format in python materializations * Add changelog --- .changes/unreleased/Fixes-20231221-081949.yaml | 6 ++++++ dbt/include/spark/macros/materializations/table.sql | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Fixes-20231221-081949.yaml diff --git a/.changes/unreleased/Fixes-20231221-081949.yaml b/.changes/unreleased/Fixes-20231221-081949.yaml new file mode 100644 index 000000000..b10c8141e --- /dev/null +++ b/.changes/unreleased/Fixes-20231221-081949.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix hardcoded file format for python models +time: 2023-12-21T08:19:49.630806+02:00 +custom: + Author: ben-schreiber + Issue: "803" diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index 39a9caba9..b7c56c424 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -98,7 +98,7 @@ else: msg = f"{type(df)} is not a supported type for dbt Python materialization" raise Exception(msg) -df.write.mode("overwrite").format("delta").option("overwriteSchema", "true").saveAsTable("{{ target_relation }}") +df.write.mode("overwrite").format("{{ config.get('file_format', 'delta') }}").option("overwriteSchema", "true").saveAsTable("{{ target_relation }}") {%- endmacro -%} {%macro py_script_comment()%} From f9f75e92a3177979cd745440297d33536c93a348 Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Wed, 10 Jan 2024 13:36:17 -0800 Subject: [PATCH 02/44] Migrate Off Circle CI / To Github Actions + dagger.io (#923) * Add Github action for integration test * Update tox * Fetch spark from https link * Use Spark version 3.1.2 * Seperate running Spark session and thrift * Use Spark 3.1.2 and Hadoop 3.2 * Reset tox.ini * Remove base pythons in tox.ini * Fix reference to Docker compose file * Remove timeout * Remove artifact steps * Bump Spark and Hadoop versions * Reset Spark and Hadoop version * Update comment * Add changie * add databricks and PR execution protections * use single quotes * remove `_target` suffix * add comment to test * specify container user as root * formatting * remove python setup for pre-existing container * download simba * fix curl call * fix curl call * fix curl call * fix curl call * fix curl call * fix curl call * fix db test naming * confirm ODBC driver installed * add odbc driver env var * add odbc driver env var * specify platform * check odbc driver integrity * add dbt user env var * add dbt user env var * fix host_name env var * try removing architecture arg * swap back to pull_request_target * try running on host instead of container * Update .github/workflows/integration.yml Co-authored-by: Emily Rockman * try running odbcinst -j * remove bash * add sudo * add sudo * update odbc.ini * install libsasl2-modules-gssapi-mit * install libsasl2-modules-gssapi-mit * set -e on odbc install * set -e on odbc install * set -e on odbc install * sudo echo odbc.inst * remove postgres components * remove release related items * remove irrelevant output * move long bash script into its own file * update integration.yml to align with other adapters * revert name change * revert name change * combine databricks and spark tests * combine databricks and spark tests * Add dagger * remove platform * add dagger setup * add dagger setup * set env vars * install requirements * install requirements * add DEFAULT_ENV_VARS and test_path arg * remove circle ci * formatting * update changie * Update .changes/unreleased/Under the Hood-20230929-161218.yaml Co-authored-by: Emily Rockman * formatting fixes and simplify env_var handling * remove tox, update CONTRIBUTING.md and cleanup GHA workflows * remove tox, update CONTRIBUTING.md and cleanup GHA workflows * install test reqs in main.yml * install test reqs in main.yml * formatting * remove tox from dev-requirements.txt and Makefile * clarify spark crt instantiation * add comments on python-version --------- Co-authored-by: Cor Zuurmond Co-authored-by: Florian Eiden Co-authored-by: Emily Rockman Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Co-authored-by: Mike Alfare --- .../Under the Hood-20230929-161218.yaml | 6 + .circleci/config.yml | 136 ------------------ .github/scripts/update_dbt_core_branch.sh | 17 +++ .github/workflows/integration.yml | 112 +++++++++++++++ .github/workflows/main.yml | 15 +- .gitignore | 2 + CONTRIBUTING.md | 24 +++- Makefile | 7 +- README.md | 3 - dagger/requirements.txt | 2 + dagger/run_dbt_spark_tests.py | 130 +++++++++++++++++ dagger/scripts/configure_odbc.sh | 11 ++ dagger/scripts/install_os_reqs.sh | 10 ++ dagger/spark-container/entrypoint.sh | 15 ++ dagger/spark-container/hive-site.xml | 46 ++++++ dagger/spark-container/install_spark.sh | 15 ++ dagger/spark-container/spark-defaults.conf | 9 ++ dev-requirements.txt | 5 +- tests/conftest.py | 2 +- tox.ini | 83 ----------- 20 files changed, 408 insertions(+), 242 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20230929-161218.yaml delete mode 100644 .circleci/config.yml create mode 100755 .github/scripts/update_dbt_core_branch.sh create mode 100644 .github/workflows/integration.yml create mode 100644 dagger/requirements.txt create mode 100644 dagger/run_dbt_spark_tests.py create mode 100755 dagger/scripts/configure_odbc.sh create mode 100755 dagger/scripts/install_os_reqs.sh create mode 100644 dagger/spark-container/entrypoint.sh create mode 100644 dagger/spark-container/hive-site.xml create mode 100755 dagger/spark-container/install_spark.sh create mode 100644 dagger/spark-container/spark-defaults.conf delete mode 100644 tox.ini diff --git a/.changes/unreleased/Under the Hood-20230929-161218.yaml b/.changes/unreleased/Under the Hood-20230929-161218.yaml new file mode 100644 index 000000000..9b5c6818b --- /dev/null +++ b/.changes/unreleased/Under the Hood-20230929-161218.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. +time: 2023-09-29T16:12:18.968755+02:00 +custom: + Author: JCZuurmond, colin-rogers-dbt + Issue: "719" diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index f2a3b6357..000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,136 +0,0 @@ -version: 2.1 - -jobs: - unit: - environment: - DBT_INVOCATION_ENV: circle - docker: - - image: fishtownanalytics/test-container:10 - steps: - - checkout - - run: tox -e flake8,unit - -# Turning off for now due to flaky runs of tests will turn back on at later date. - integration-spark-session: - environment: - DBT_INVOCATION_ENV: circle - docker: - - image: godatadriven/pyspark:3.1 - steps: - - checkout - - run: apt-get update - - run: conda install python=3.10 - - run: python3 -m pip install --upgrade pip - - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev libxml2-dev libxslt-dev - - run: python3 -m pip install tox - - run: - name: Run integration tests - command: tox -e integration-spark-session - no_output_timeout: 1h - - store_artifacts: - path: ./logs - - integration-spark-thrift: - environment: - DBT_INVOCATION_ENV: circle - docker: - - image: fishtownanalytics/test-container:10 - - image: godatadriven/spark:3.1.1 - environment: - WAIT_FOR: localhost:5432 - command: > - --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 - --name Thrift JDBC/ODBC Server - - image: postgres:9.6.17-alpine - environment: - POSTGRES_USER: dbt - POSTGRES_PASSWORD: dbt - POSTGRES_DB: metastore - - steps: - - checkout - - - run: - name: Wait for Spark-Thrift - command: dockerize -wait tcp://localhost:10000 -timeout 15m -wait-retry-interval 5s - - - run: - name: Run integration tests - command: tox -e integration-spark-thrift - no_output_timeout: 1h - - store_artifacts: - path: ./logs - - integration-spark-databricks-http: - environment: - DBT_INVOCATION_ENV: circle - DBT_DATABRICKS_RETRY_ALL: True - DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com" - DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com" - DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com" - docker: - - image: fishtownanalytics/test-container:10 - steps: - - checkout - - run: - name: Run integration tests - command: tox -e integration-spark-databricks-http - no_output_timeout: 1h - - store_artifacts: - path: ./logs - - integration-spark-databricks-odbc-cluster: &databricks-odbc - environment: - DBT_INVOCATION_ENV: circle - ODBC_DRIVER: Simba # TODO: move env var to Docker image - DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com" - DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com" - DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com" - docker: - # image based on `fishtownanalytics/test-container` w/ Simba ODBC Spark driver installed - - image: 828731156495.dkr.ecr.us-east-1.amazonaws.com/dbt-spark-odbc-test-container:latest - aws_auth: - aws_access_key_id: $AWS_ACCESS_KEY_ID_STAGING - aws_secret_access_key: $AWS_SECRET_ACCESS_KEY_STAGING - steps: - - checkout - - run: - name: Run integration tests - command: tox -e integration-spark-databricks-odbc-cluster - no_output_timeout: 1h - - store_artifacts: - path: ./logs - - integration-spark-databricks-odbc-endpoint: - <<: *databricks-odbc - steps: - - checkout - - run: - name: Run integration tests - command: tox -e integration-spark-databricks-odbc-sql-endpoint - no_output_timeout: 1h - - store_artifacts: - path: ./logs - -workflows: - version: 2 - test-everything: - jobs: - - unit - - integration-spark-session: - requires: - - unit - - integration-spark-thrift: - requires: - - unit - - integration-spark-databricks-http: - requires: - - integration-spark-thrift - - integration-spark-databricks-odbc-cluster: - context: aws-credentials - requires: - - integration-spark-thrift - - integration-spark-databricks-odbc-endpoint: - context: aws-credentials - requires: - - integration-spark-thrift diff --git a/.github/scripts/update_dbt_core_branch.sh b/.github/scripts/update_dbt_core_branch.sh new file mode 100755 index 000000000..1a5a5c2d7 --- /dev/null +++ b/.github/scripts/update_dbt_core_branch.sh @@ -0,0 +1,17 @@ +#!/bin/bash -e +set -e + +git_branch=$1 +target_req_file="dev-requirements.txt" +core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${git_branch}#egg=dbt-core|g" +tests_req_sed_pattern="s|dbt-core.git.*#egg=dbt-tests|dbt-core.git@${git_branch}#egg=dbt-tests|g" +if [[ "$OSTYPE" == darwin* ]]; then + # mac ships with a different version of sed that requires a delimiter arg + sed -i "" "$core_req_sed_pattern" $target_req_file + sed -i "" "$tests_req_sed_pattern" $target_req_file +else + sed -i "$core_req_sed_pattern" $target_req_file + sed -i "$tests_req_sed_pattern" $target_req_file +fi +core_version=$(curl "https://raw.githubusercontent.com/dbt-labs/dbt-core/${git_branch}/core/dbt/version.py" | grep "__version__ = *"|cut -d'=' -f2) +bumpversion --allow-dirty --new-version "$core_version" major diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml new file mode 100644 index 000000000..94dece350 --- /dev/null +++ b/.github/workflows/integration.yml @@ -0,0 +1,112 @@ +# **what?** +# Runs integration tests. + +# **why?** +# Ensure code runs as expected. + +# **when?** +# This will run for all PRs, when code is pushed to a release +# branch, and when manually triggered. + +name: Adapter Integration Tests + +on: + push: + branches: + - "main" + - "*.latest" + + pull_request_target: + paths-ignore: + - ".changes/**" + - ".flake8" + - ".gitignore" + - "**.md" + + workflow_dispatch: + inputs: + dbt-core-branch: + description: "branch of dbt-core to use in dev-requirements.txt" + required: false + type: string + +# explicitly turn off permissions for `GITHUB_TOKEN` +permissions: read-all + +# will cancel previous workflows triggered by the same event and for the same ref for PRs or same SHA otherwise +concurrency: + group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request_target') && github.event.pull_request.head.ref || github.sha }} + cancel-in-progress: true + +defaults: + run: + shell: bash + +jobs: + + test: + name: ${{ matrix.test }} + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + test: + - "apache_spark" + - "spark_session" + - "databricks_sql_endpoint" + - "databricks_cluster" + - "databricks_http_cluster" + + env: + DBT_INVOCATION_ENV: github-actions + DD_CIVISIBILITY_AGENTLESS_ENABLED: true + DD_API_KEY: ${{ secrets.DATADOG_API_KEY }} + DD_SITE: datadoghq.com + DD_ENV: ci + DD_SERVICE: ${{ github.event.repository.name }} + DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }} + DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST_NAME }} + DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }} + DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }} + DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }} + DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com" + DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com" + DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com" + + steps: + - name: Check out the repository + if: github.event_name != 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + + # explicitly checkout the branch for the PR, + # this is necessary for the `pull_request` event + - name: Check out the repository (PR) + if: github.event_name == 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + ref: ${{ github.event.pull_request.head.sha }} + + # the python version used here is not what is used in the tests themselves + - name: Set up Python for dagger + uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: Install python dependencies + run: | + python -m pip install --user --upgrade pip + python -m pip --version + python -m pip install -r dagger/requirements.txt + + - name: Update dev_requirements.txt + if: inputs.dbt-core-branch != '' + run: | + pip install bumpversion + ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }} + + - name: Run tests for ${{ matrix.test }} + run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 30126325e..20f3f88f4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,7 +19,6 @@ on: branches: - "main" - "*.latest" - - "releases/*" pull_request: workflow_dispatch: @@ -81,10 +80,6 @@ jobs: matrix: python-version: ["3.8", "3.9", "3.10", "3.11"] - env: - TOXENV: "unit" - PYTEST_ADDOPTS: "-v --color=yes --csv unit_results.csv" - steps: - name: Check out the repository uses: actions/checkout@v3 @@ -100,10 +95,12 @@ jobs: sudo apt-get install libsasl2-dev python -m pip install --user --upgrade pip python -m pip --version - python -m pip install tox - tox --version - - name: Run tox - run: tox + python -m pip install -r requirements.txt + python -m pip install -r dev-requirements.txt + python -m pip install -e . + + - name: Run unit tests + run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit - name: Get current date if: always() diff --git a/.gitignore b/.gitignore index 33a83848c..1e8ff7411 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,5 @@ test.env .hive-metastore/ .spark-warehouse/ dbt-integration-tests +/.tool-versions +/.hypothesis/* diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a61306ea5..6fcaacea8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -65,11 +65,27 @@ $EDITOR test.env ### Test commands There are a few methods for running tests locally. -#### `tox` -`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py38`. The configuration of these tests are located in `tox.ini`. +#### dagger +To run functional tests we rely on [dagger](https://dagger.io/). This launches a virtual container or containers to test against. -#### `pytest` -Finally, you can also run a specific test or group of tests using `pytest` directly. With a Python virtualenv active and dev dependencies installed you can do things like: +```sh +pip install -r dagger/requirements.txt +python dagger/run_dbt_spark_tests.py --profile databricks_sql_endpoint --test-path tests/functional/adapter/test_basic.py::TestSimpleMaterializationsSpark::test_base +``` + +`--profile`: required, this is the kind of spark connection to test against + +_options_: + - "apache_spark" + - "spark_session" + - "databricks_sql_endpoint" + - "databricks_cluster" + - "databricks_http_cluster" + +`--test-path`: optional, this is the path to the test file you want to run. If not specified, all tests will be run. + +#### pytest +Finally, you can also run a specific test or group of tests using `pytest` directly (if you have all the dependencies set up on your machine). With a Python virtualenv active and dev dependencies installed you can do things like: ```sh # run all functional tests diff --git a/Makefile b/Makefile index cc1d9f75d..2bd1055fa 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ .PHONY: dev dev: ## Installs adapter in develop mode along with development dependencies @\ - pip install -e . -r requirements.txt -r dev-requirements.txt && pre-commit install + pip install -e . -r requirements.txt -r dev-requirements.txt -r dagger/requirements.txt && pre-commit install .PHONY: dev-uninstall dev-uninstall: ## Uninstalls all packages while maintaining the virtual environment @@ -40,12 +40,13 @@ linecheck: ## Checks for all Python lines 100 characters or more .PHONY: unit unit: ## Runs unit tests with py38. @\ - tox -e py38 + python -m pytest tests/unit .PHONY: test test: ## Runs unit tests with py38 and code checks against staged changes. @\ - tox -p -e py38; \ + python -m pytest tests/unit; \ + python dagger/run_dbt_spark_tests.py --profile spark_session \ pre-commit run black-check --hook-stage manual | grep -v "INFO"; \ pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \ pre-commit run mypy-check --hook-stage manual | grep -v "INFO" diff --git a/README.md b/README.md index 2d2586795..7e95b1fc3 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,6 @@ Unit Tests Badge - - Integration Tests Badge -

**[dbt](https://www.getdbt.com/)** enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications. diff --git a/dagger/requirements.txt b/dagger/requirements.txt new file mode 100644 index 000000000..df36543c2 --- /dev/null +++ b/dagger/requirements.txt @@ -0,0 +1,2 @@ +dagger-io~=0.8.0 +python-dotenv diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py new file mode 100644 index 000000000..718519909 --- /dev/null +++ b/dagger/run_dbt_spark_tests.py @@ -0,0 +1,130 @@ +import os + +import argparse +import sys + +import anyio as anyio +import dagger as dagger +from dotenv import find_dotenv, load_dotenv + +PG_PORT = 5432 +load_dotenv(find_dotenv("test.env")) +# if env vars aren't specified in test.env (i.e. in github actions worker), use the ones from the host +TESTING_ENV_VARS = { + env_name: os.environ[env_name] + for env_name in os.environ + if env_name.startswith(("DD_", "DBT_")) +} + +TESTING_ENV_VARS.update({"ODBC_DRIVER": "Simba"}) + + +def env_variables(envs: dict[str, str]): + def env_variables_inner(ctr: dagger.Container): + for key, value in envs.items(): + ctr = ctr.with_env_variable(key, value) + return ctr + + return env_variables_inner + + +async def get_postgres_container(client: dagger.Client) -> (dagger.Container, str): + ctr = await ( + client.container() + .from_("postgres:13") + .with_env_variable("POSTGRES_PASSWORD", "postgres") + .with_exposed_port(PG_PORT) + ) + + return ctr, "postgres_db" + + +async def get_spark_container(client: dagger.Client) -> (dagger.Container, str): + spark_dir = client.host().directory("./dagger/spark-container") + spark_ctr_base = ( + client.container() + .from_("eclipse-temurin:8-jre") + .with_directory("/spark_setup", spark_dir) + .with_env_variable("SPARK_HOME", "/usr/spark") + .with_env_variable("PATH", "/usr/spark/bin:/usr/spark/sbin:$PATH", expand=True) + .with_file( + "/scripts/entrypoint.sh", + client.host().file("./dagger/spark-container/entrypoint.sh"), + permissions=755, + ) + .with_file( + "/scripts/install_spark.sh", + client.host().file("./dagger/spark-container/install_spark.sh"), + permissions=755, + ) + .with_exec(["./spark_setup/install_spark.sh"]) + .with_file("/usr/spark/conf/hive-site.xml", spark_dir.file("/hive-site.xml")) + .with_file("/usr/spark/conf/spark-defaults.conf", spark_dir.file("spark-defaults.conf")) + ) + + # postgres is the metastore here + pg_ctr, pg_host = await get_postgres_container(client) + + spark_ctr = ( + spark_ctr_base.with_service_binding(alias=pg_host, service=pg_ctr) + .with_exec( + [ + "/scripts/entrypoint.sh", + "--class", + "org.apache.spark.sql.hive.thriftserver.HiveThriftServer2", + "--name", + "Thrift JDBC/ODBC Server", + ] + ) + .with_exposed_port(10000) + ) + + return spark_ctr, "spark_db" + + +async def test_spark(test_args): + async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client: + test_profile = test_args.profile + req_files = client.host().directory("./", include=["*.txt", "*.env", "*.ini"]) + dbt_spark_dir = client.host().directory("./dbt") + test_dir = client.host().directory("./tests") + scripts = client.host().directory("./dagger/scripts") + platform = dagger.Platform("linux/amd64") + tst_container = ( + client.container(platform=platform) + .from_("python:3.8-slim") + .with_directory("/.", req_files) + .with_directory("/dbt", dbt_spark_dir) + .with_directory("/tests", test_dir) + .with_directory("/scripts", scripts) + .with_exec("./scripts/install_os_reqs.sh") + .with_exec(["pip", "install", "-r", "requirements.txt"]) + .with_exec(["pip", "install", "-r", "dev-requirements.txt"]) + ) + + if test_profile == "apache_spark": + spark_ctr, spark_host = await get_spark_container(client) + tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr) + + elif test_profile in ["databricks_cluster", "databricks_sql_endpoint"]: + tst_container = tst_container.with_exec("./scripts/configure_odbc.sh") + + elif test_profile == "spark_session": + tst_container = tst_container.with_exec(["pip", "install", "pyspark"]) + tst_container = tst_container.with_exec(["apt-get", "install", "openjdk-17-jre", "-y"]) + + tst_container = tst_container.with_(env_variables(TESTING_ENV_VARS)) + test_path = test_args.test_path if test_args.test_path else "tests/functional/adapter" + result = await tst_container.with_exec( + ["pytest", "-v", "--profile", test_profile, "-n", "auto", test_path] + ).stdout() + + return result + + +parser = argparse.ArgumentParser() +parser.add_argument("--profile", required=True, type=str) +parser.add_argument("--test-path", required=False, type=str) +args = parser.parse_args() + +anyio.run(test_spark, args) diff --git a/dagger/scripts/configure_odbc.sh b/dagger/scripts/configure_odbc.sh new file mode 100755 index 000000000..ddf020ad2 --- /dev/null +++ b/dagger/scripts/configure_odbc.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -eo +rm -rf /tmp && mkdir /tmp + +curl -OL "https://databricks.com/wp-content/uploads/drivers-2020/SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip" +unzip SimbaSparkODBC-2.6.16.1019-Debian-64bit.zip -d /tmp/ +dpkg -i /tmp/SimbaSparkODBC-2.6.16.1019-Debian-64bit/simbaspark_2.6.16.1019-2_amd64.deb +echo "--------------------------------------------" +echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini +dpkg -l | grep Simba # confirm that the driver is installed +rm -rf /tmp diff --git a/dagger/scripts/install_os_reqs.sh b/dagger/scripts/install_os_reqs.sh new file mode 100755 index 000000000..b50027f52 --- /dev/null +++ b/dagger/scripts/install_os_reqs.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -eo +apt-get update && apt-get install -y --no-install-recommends \ + g++ \ + git \ + curl \ + unixodbc \ + unixodbc-dev \ + libsasl2-modules-gssapi-mit \ + unzip diff --git a/dagger/spark-container/entrypoint.sh b/dagger/spark-container/entrypoint.sh new file mode 100644 index 000000000..4b15cab61 --- /dev/null +++ b/dagger/spark-container/entrypoint.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +if [ -n "$WAIT_FOR" ]; then + IFS=';' read -a HOSTPORT_ARRAY <<< "$WAIT_FOR" + for HOSTPORT in "${HOSTPORT_ARRAY[@]}" + do + WAIT_FOR_HOST=${HOSTPORT%:*} + WAIT_FOR_PORT=${HOSTPORT#*:} + + echo Waiting for $WAIT_FOR_HOST to listen on $WAIT_FOR_PORT... + while ! nc -z $WAIT_FOR_HOST $WAIT_FOR_PORT; do echo sleeping; sleep 2; done + done +fi +echo "$PATH" +exec spark-submit "$@" diff --git a/dagger/spark-container/hive-site.xml b/dagger/spark-container/hive-site.xml new file mode 100644 index 000000000..93e966fb7 --- /dev/null +++ b/dagger/spark-container/hive-site.xml @@ -0,0 +1,46 @@ + + + + + + + + javax.jdo.option.ConnectionURL + jdbc:postgresql://postgres_db/postgres + + + + javax.jdo.option.ConnectionDriverName + org.postgresql.Driver + + + + javax.jdo.option.ConnectionUserName + postgres + + + + javax.jdo.option.ConnectionPassword + postgres + + + + hive.metastore.schema.verification + false + + diff --git a/dagger/spark-container/install_spark.sh b/dagger/spark-container/install_spark.sh new file mode 100755 index 000000000..476f362a9 --- /dev/null +++ b/dagger/spark-container/install_spark.sh @@ -0,0 +1,15 @@ +set -e + +SPARK_VERSION=3.1.3 +HADOOP_VERSION=3.2 + +apt-get update && \ +apt-get install -y wget netcat procps libpostgresql-jdbc-java && \ +wget -q "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ +tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ +rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ +mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \ +ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \ +apt-get remove -y wget && \ +apt-get autoremove -y && \ +apt-get clean diff --git a/dagger/spark-container/spark-defaults.conf b/dagger/spark-container/spark-defaults.conf new file mode 100644 index 000000000..30ec59591 --- /dev/null +++ b/dagger/spark-container/spark-defaults.conf @@ -0,0 +1,9 @@ +spark.driver.memory 2g +spark.executor.memory 2g +spark.hadoop.datanucleus.autoCreateTables true +spark.hadoop.datanucleus.schema.autoCreateTables true +spark.hadoop.datanucleus.fixedDatastore false +spark.serializer org.apache.spark.serializer.KryoSerializer +spark.jars.packages org.apache.hudi:hudi-spark3-bundle_2.12:0.10.0 +spark.sql.extensions org.apache.spark.sql.hudi.HoodieSparkSessionExtension +spark.driver.userClassPathFirst true diff --git a/dev-requirements.txt b/dev-requirements.txt index fe960ed03..765482e25 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,7 +1,7 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter +git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-tests-adapter&subdirectory=tests/adapter # if version 1.x or greater -> pin to major version # if version 0.x -> pin to minor @@ -22,7 +22,6 @@ pytest-dotenv~=0.5.2 pytest-logbook~=1.2 pytest-xdist~=3.5 pytz~=2023.3 -tox~=4.11 types-pytz~=2023.3 types-requests~=2.31 twine~=4.0 diff --git a/tests/conftest.py b/tests/conftest.py index 94969e406..700ade4d3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,7 +38,7 @@ def dbt_profile_target(request): def apache_spark_target(): return { "type": "spark", - "host": "localhost", + "host": "spark_db", "user": "dbt", "method": "thrift", "port": 10000, diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 97017a926..000000000 --- a/tox.ini +++ /dev/null @@ -1,83 +0,0 @@ -[tox] -skipsdist = True -envlist = unit, flake8, integration-spark-thrift - -[testenv:{unit,py38,py39,py310,py}] -allowlist_externals = - /bin/bash -commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit' -passenv = - DBT_* - PYTEST_ADDOPTS -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev-requirements.txt - -[testenv:integration-spark-databricks-http] -allowlist_externals = - /bin/bash -basepython = python3.8 -commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_cluster {posargs} -n4 tests/functional/adapter/*' -passenv = - DBT_* - PYTEST_ADDOPTS -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev-requirements.txt - -e. - -[testenv:integration-spark-databricks-odbc-cluster] -allowlist_externals = - /bin/bash -basepython = python3.8 -commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster {posargs} -n4 tests/functional/adapter/*' -passenv = - DBT_* - PYTEST_ADDOPTS - ODBC_DRIVER -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev-requirements.txt - -e. - -[testenv:integration-spark-databricks-odbc-sql-endpoint] -allowlist_externals = - /bin/bash -basepython = python3.8 -commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint {posargs} -n4 tests/functional/adapter/*' -passenv = - DBT_* - PYTEST_ADDOPTS - ODBC_DRIVER -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev-requirements.txt - -e. - - -[testenv:integration-spark-thrift] -allowlist_externals = - /bin/bash -basepython = python3.8 -commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posargs} -n4 tests/functional/adapter/*' -passenv = - DBT_* - PYTEST_ADDOPTS -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev-requirements.txt - -e. - -[testenv:integration-spark-session] -allowlist_externals = - /bin/bash -basepython = python3.10 -commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*' -passenv = - DBT_* - PYTEST_* - PIP_CACHE_DIR -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev-requirements.txt - -e.[session] From e97918b826d5dfc7853d1c559201171026f1d125 Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Thu, 11 Jan 2024 10:51:46 -0800 Subject: [PATCH 03/44] fix ODBC_DRIVER env var (#971) --- dagger/run_dbt_spark_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index 718519909..91b9988c9 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -16,7 +16,7 @@ if env_name.startswith(("DD_", "DBT_")) } -TESTING_ENV_VARS.update({"ODBC_DRIVER": "Simba"}) +TESTING_ENV_VARS.update({"ODBC_DRIVER": "/opt/simba/spark/lib/64/libsparkodbc_sb64.so"}) def env_variables(envs: dict[str, str]): From 5d90ff9ab4c9c0eb5137f606b843aec3bf35c6d2 Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Wed, 24 Jan 2024 18:48:08 -0800 Subject: [PATCH 04/44] feature/decouple adapters from core (#972) * Add Github action for integration test * Update tox * Fetch spark from https link * Use Spark version 3.1.2 * Seperate running Spark session and thrift * Use Spark 3.1.2 and Hadoop 3.2 * Reset tox.ini * Remove base pythons in tox.ini * Fix reference to Docker compose file * Remove timeout * Remove artifact steps * Bump Spark and Hadoop versions * Reset Spark and Hadoop version * Update comment * Add changie * add databricks and PR execution protections * use single quotes * remove `_target` suffix * add comment to test * specify container user as root * formatting * remove python setup for pre-existing container * download simba * fix curl call * fix curl call * fix curl call * fix curl call * fix curl call * fix curl call * fix db test naming * confirm ODBC driver installed * add odbc driver env var * add odbc driver env var * specify platform * check odbc driver integrity * add dbt user env var * add dbt user env var * fix host_name env var * try removing architecture arg * swap back to pull_request_target * try running on host instead of container * Update .github/workflows/integration.yml Co-authored-by: Emily Rockman * try running odbcinst -j * remove bash * add sudo * add sudo * update odbc.ini * install libsasl2-modules-gssapi-mit * install libsasl2-modules-gssapi-mit * set -e on odbc install * set -e on odbc install * set -e on odbc install * sudo echo odbc.inst * remove postgres components * remove release related items * remove irrelevant output * move long bash script into its own file * update integration.yml to align with other adapters * revert name change * revert name change * combine databricks and spark tests * combine databricks and spark tests * Add dagger * remove platform * add dagger setup * add dagger setup * set env vars * install requirements * install requirements * add DEFAULT_ENV_VARS and test_path arg * remove circle ci * formatting * update changie * Update .changes/unreleased/Under the Hood-20230929-161218.yaml Co-authored-by: Emily Rockman * formatting fixes and simplify env_var handling * remove tox, update CONTRIBUTING.md and cleanup GHA workflows * remove tox, update CONTRIBUTING.md and cleanup GHA workflows * install test reqs in main.yml * install test reqs in main.yml * formatting * remove tox from dev-requirements.txt and Makefile * clarify spark crt instantiation * add comments on python-version * initial migration changes * unpin * implement core / adapters decoupling * fix list_relations * fix typing and exception imports * fix typing and exception imports * add changie * replace dbt.common with dbt_common * update setup.py * add dbt-adapters * update setup.py * fix credentials import * fix dev-requirements.txt * dagger improvements to caching and installing package under test * update requirements * add cluster start fixture * update conftest.py * re-order dagger setup to reduce cache invalidation * renove dbt-core version dependency version check --------- Co-authored-by: Cor Zuurmond Co-authored-by: Florian Eiden Co-authored-by: Emily Rockman Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Co-authored-by: Mike Alfare --- .../Under the Hood-20240111-114806.yaml | 6 ++ dagger/requirements.txt | 2 +- dagger/run_dbt_spark_tests.py | 47 ++++++++++---- dbt/adapters/spark/column.py | 2 +- dbt/adapters/spark/connections.py | 57 ++++++++--------- dbt/adapters/spark/impl.py | 63 ++++++++++++------- dbt/adapters/spark/python_submissions.py | 37 ++++------- dbt/adapters/spark/relation.py | 4 +- dbt/adapters/spark/session.py | 6 +- dbt/include/spark/macros/adapters.sql | 4 +- dev-requirements.txt | 3 +- setup.py | 13 +--- tests/conftest.py | 16 +++-- tests/functional/conftest.py | 19 ++++++ tests/unit/test_adapter.py | 39 ++++++++---- tests/unit/utils.py | 2 +- 16 files changed, 188 insertions(+), 132 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20240111-114806.yaml create mode 100644 tests/functional/conftest.py diff --git a/.changes/unreleased/Under the Hood-20240111-114806.yaml b/.changes/unreleased/Under the Hood-20240111-114806.yaml new file mode 100644 index 000000000..31705f468 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240111-114806.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Update import paths and list_relations to support decoupling adapters/core +time: 2024-01-11T11:48:06.120111-08:00 +custom: + Author: colin-rogers-dbt + Issue: "972" diff --git a/dagger/requirements.txt b/dagger/requirements.txt index df36543c2..b50c448d3 100644 --- a/dagger/requirements.txt +++ b/dagger/requirements.txt @@ -1,2 +1,2 @@ -dagger-io~=0.8.0 +dagger-io~=0.9.7 python-dotenv diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index 91b9988c9..436cb1e92 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -2,6 +2,7 @@ import argparse import sys +from typing import Dict import anyio as anyio import dagger as dagger @@ -19,7 +20,7 @@ TESTING_ENV_VARS.update({"ODBC_DRIVER": "/opt/simba/spark/lib/64/libsparkodbc_sb64.so"}) -def env_variables(envs: dict[str, str]): +def env_variables(envs: Dict[str, str]): def env_variables_inner(ctr: dagger.Container): for key, value in envs.items(): ctr = ctr.with_env_variable(key, value) @@ -28,18 +29,19 @@ def env_variables_inner(ctr: dagger.Container): return env_variables_inner -async def get_postgres_container(client: dagger.Client) -> (dagger.Container, str): - ctr = await ( +def get_postgres_container(client: dagger.Client) -> (dagger.Container, str): + ctr = ( client.container() .from_("postgres:13") .with_env_variable("POSTGRES_PASSWORD", "postgres") .with_exposed_port(PG_PORT) + .as_service() ) return ctr, "postgres_db" -async def get_spark_container(client: dagger.Client) -> (dagger.Container, str): +def get_spark_container(client: dagger.Client) -> (dagger.Service, str): spark_dir = client.host().directory("./dagger/spark-container") spark_ctr_base = ( client.container() @@ -63,7 +65,7 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str): ) # postgres is the metastore here - pg_ctr, pg_host = await get_postgres_container(client) + pg_ctr, pg_host = get_postgres_container(client) spark_ctr = ( spark_ctr_base.with_service_binding(alias=pg_host, service=pg_ctr) @@ -77,6 +79,7 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str): ] ) .with_exposed_port(10000) + .as_service() ) return spark_ctr, "spark_db" @@ -85,29 +88,49 @@ async def get_spark_container(client: dagger.Client) -> (dagger.Container, str): async def test_spark(test_args): async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as client: test_profile = test_args.profile - req_files = client.host().directory("./", include=["*.txt", "*.env", "*.ini"]) + + # create cache volumes, these are persisted between runs saving time when developing locally + os_reqs_cache = client.cache_volume("os_reqs") + pip_cache = client.cache_volume("pip") + + # setup directories as we don't want to copy the whole repo into the container + req_files = client.host().directory( + "./", include=["*.txt", "*.env", "*.ini", "*.md", "setup.py"] + ) dbt_spark_dir = client.host().directory("./dbt") test_dir = client.host().directory("./tests") scripts = client.host().directory("./dagger/scripts") + platform = dagger.Platform("linux/amd64") tst_container = ( client.container(platform=platform) .from_("python:3.8-slim") - .with_directory("/.", req_files) - .with_directory("/dbt", dbt_spark_dir) - .with_directory("/tests", test_dir) + .with_mounted_cache("/var/cache/apt/archives", os_reqs_cache) + .with_mounted_cache("/root/.cache/pip", pip_cache) + # install OS deps first so any local changes don't invalidate the cache .with_directory("/scripts", scripts) - .with_exec("./scripts/install_os_reqs.sh") + .with_exec(["./scripts/install_os_reqs.sh"]) + # install dbt-spark + python deps + .with_directory("/src", req_files) + .with_directory("src/dbt", dbt_spark_dir) + .with_directory("src/tests", test_dir) + .with_workdir("/src") + .with_exec(["pip", "install", "-U", "pip"]) .with_exec(["pip", "install", "-r", "requirements.txt"]) .with_exec(["pip", "install", "-r", "dev-requirements.txt"]) + .with_exec(["pip", "install", "-e", "."]) ) if test_profile == "apache_spark": - spark_ctr, spark_host = await get_spark_container(client) + spark_ctr, spark_host = get_spark_container(client) tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr) elif test_profile in ["databricks_cluster", "databricks_sql_endpoint"]: - tst_container = tst_container.with_exec("./scripts/configure_odbc.sh") + tst_container = ( + tst_container.with_workdir("/") + .with_exec(["./scripts/configure_odbc.sh"]) + .with_workdir("/src") + ) elif test_profile == "spark_session": tst_container = tst_container.with_exec(["pip", "install", "pyspark"]) diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py index a57fa0565..39f6f529e 100644 --- a/dbt/adapters/spark/column.py +++ b/dbt/adapters/spark/column.py @@ -2,7 +2,7 @@ from typing import Any, Dict, Optional, TypeVar, Union from dbt.adapters.base.column import Column -from dbt.dataclass_schema import dbtClassMixin +from dbt_common.dataclass_schema import dbtClassMixin Self = TypeVar("Self", bound="SparkColumn") diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 966f5584e..83048f921 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -1,11 +1,17 @@ from contextlib import contextmanager -import dbt.exceptions -from dbt.adapters.base import Credentials +from dbt.adapters.contracts.connection import ( + AdapterResponse, + ConnectionState, + Connection, + Credentials, +) +from dbt.adapters.events.logging import AdapterLogger +from dbt.adapters.exceptions import FailedToConnectError from dbt.adapters.sql import SQLConnectionManager -from dbt.contracts.connection import ConnectionState, AdapterResponse -from dbt.events import AdapterLogger -from dbt.utils import DECIMALS +from dbt_common.exceptions import DbtConfigError, DbtRuntimeError, DbtDatabaseError + +from dbt_common.utils.encoding import DECIMALS from dbt.adapters.spark import __version__ try: @@ -22,8 +28,7 @@ pyodbc = None from datetime import datetime import sqlparams -from dbt.contracts.connection import Connection -from dbt.dataclass_schema import StrEnum +from dbt_common.dataclass_schema import StrEnum from dataclasses import dataclass, field from typing import Any, Dict, Optional, Union, Tuple, List, Generator, Iterable, Sequence @@ -92,15 +97,15 @@ def cluster_id(self) -> Optional[str]: def __post_init__(self) -> None: if self.method is None: - raise dbt.exceptions.DbtRuntimeError("Must specify `method` in profile") + raise DbtRuntimeError("Must specify `method` in profile") if self.host is None: - raise dbt.exceptions.DbtRuntimeError("Must specify `host` in profile") + raise DbtRuntimeError("Must specify `host` in profile") if self.schema is None: - raise dbt.exceptions.DbtRuntimeError("Must specify `schema` in profile") + raise DbtRuntimeError("Must specify `schema` in profile") # spark classifies database and schema as the same thing if self.database is not None and self.database != self.schema: - raise dbt.exceptions.DbtRuntimeError( + raise DbtRuntimeError( f" schema: {self.schema} \n" f" database: {self.database} \n" f"On Spark, database must be omitted or have the same value as" @@ -112,7 +117,7 @@ def __post_init__(self) -> None: try: import pyodbc # noqa: F401 except ImportError as e: - raise dbt.exceptions.DbtRuntimeError( + raise DbtRuntimeError( f"{self.method} connection method requires " "additional dependencies. \n" "Install the additional required dependencies with " @@ -121,7 +126,7 @@ def __post_init__(self) -> None: ) from e if self.method == SparkConnectionMethod.ODBC and self.cluster and self.endpoint: - raise dbt.exceptions.DbtRuntimeError( + raise DbtRuntimeError( "`cluster` and `endpoint` cannot both be set when" f" using {self.method} method to connect to Spark" ) @@ -130,7 +135,7 @@ def __post_init__(self) -> None: self.method == SparkConnectionMethod.HTTP or self.method == SparkConnectionMethod.THRIFT ) and not (ThriftState and THttpClient and hive): - raise dbt.exceptions.DbtRuntimeError( + raise DbtRuntimeError( f"{self.method} connection method requires " "additional dependencies. \n" "Install the additional required dependencies with " @@ -141,7 +146,7 @@ def __post_init__(self) -> None: try: import pyspark # noqa: F401 except ImportError as e: - raise dbt.exceptions.DbtRuntimeError( + raise DbtRuntimeError( f"{self.method} connection method requires " "additional dependencies. \n" "Install the additional required dependencies with " @@ -291,13 +296,11 @@ def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None: if poll_state.errorMessage: logger.debug("Poll response: {}".format(poll_state)) logger.debug("Poll status: {}".format(state)) - raise dbt.exceptions.DbtDatabaseError(poll_state.errorMessage) + raise DbtDatabaseError(poll_state.errorMessage) elif state not in STATE_SUCCESS: status_type = ThriftState._VALUES_TO_NAMES.get(state, "Unknown<{!r}>".format(state)) - raise dbt.exceptions.DbtDatabaseError( - "Query failed with status: {}".format(status_type) - ) + raise DbtDatabaseError("Query failed with status: {}".format(status_type)) logger.debug("Poll status: {}, query complete".format(state)) @@ -358,9 +361,9 @@ def exception_handler(self, sql: str) -> Generator[None, None, None]: thrift_resp = exc.args[0] if hasattr(thrift_resp, "status"): msg = thrift_resp.status.errorMessage - raise dbt.exceptions.DbtRuntimeError(msg) + raise DbtRuntimeError(msg) else: - raise dbt.exceptions.DbtRuntimeError(str(exc)) + raise DbtRuntimeError(str(exc)) def cancel(self, connection: Connection) -> None: connection.handle.cancel() @@ -390,7 +393,7 @@ def validate_creds(cls, creds: Any, required: Iterable[str]) -> None: for key in required: if not hasattr(creds, key): - raise dbt.exceptions.DbtProfileError( + raise DbtConfigError( "The config '{}' is required when using the {} method" " to connect to Spark".format(key, method) ) @@ -481,7 +484,7 @@ def open(cls, connection: Connection) -> Connection: endpoint=creds.endpoint ) else: - raise dbt.exceptions.DbtProfileError( + raise DbtConfigError( "Either `cluster` or `endpoint` must set when" " using the odbc method to connect to Spark" ) @@ -525,9 +528,7 @@ def open(cls, connection: Connection) -> Connection: Connection(server_side_parameters=creds.server_side_parameters) ) else: - raise dbt.exceptions.DbtProfileError( - f"invalid credential method: {creds.method}" - ) + raise DbtConfigError(f"invalid credential method: {creds.method}") break except Exception as e: exc = e @@ -537,7 +538,7 @@ def open(cls, connection: Connection) -> Connection: msg = "Failed to connect" if creds.token is not None: msg += ", is your token valid?" - raise dbt.exceptions.FailedToConnectError(msg) from e + raise FailedToConnectError(msg) from e retryable_message = _is_retryable_error(e) if retryable_message and creds.connect_retries > 0: msg = ( @@ -558,7 +559,7 @@ def open(cls, connection: Connection) -> Connection: logger.warning(msg) time.sleep(creds.connect_timeout) else: - raise dbt.exceptions.FailedToConnectError("failed to connect") from e + raise FailedToConnectError("failed to connect") from e else: raise exc # type: ignore diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 16c3a3cb7..9a1a7ec06 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -1,18 +1,31 @@ +import os import re from concurrent.futures import Future from dataclasses import dataclass -from typing import Any, Dict, Iterable, List, Optional, Union, Type, Tuple, Callable, Set +from typing import ( + Any, + Dict, + Iterable, + List, + Optional, + Union, + Type, + Tuple, + Callable, + Set, + FrozenSet, +) from dbt.adapters.base.relation import InformationSchema -from dbt.contracts.graph.manifest import Manifest +from dbt.adapters.contracts.connection import AdapterResponse +from dbt.adapters.events.logging import AdapterLogger +from dbt_common.exceptions import DbtRuntimeError, CompilationError +from dbt_common.utils import AttrDict, executor from typing_extensions import TypeAlias import agate -import dbt -import dbt.exceptions - from dbt.adapters.base import AdapterConfig, PythonJobHelper from dbt.adapters.base.impl import catch_as_completed, ConstraintSupport from dbt.adapters.sql import SQLAdapter @@ -24,14 +37,16 @@ AllPurposeClusterPythonJobHelper, ) from dbt.adapters.base import BaseRelation -from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER -from dbt.contracts.connection import AdapterResponse -from dbt.contracts.graph.nodes import ConstraintType -from dbt.contracts.relation import RelationType -from dbt.events import AdapterLogger -from dbt.utils import executor, AttrDict +from dbt.adapters.contracts.relation import RelationType, RelationConfig +from dbt_common.clients.agate_helper import DEFAULT_TYPE_TESTER +from dbt_common.contracts.constraints import ConstraintType logger = AdapterLogger("Spark") +packages = ["pyhive.hive", "thrift.transport", "thrift.protocol"] +log_level = os.getenv("DBT_SPARK_LOG_LEVEL", "ERROR") +for package in packages: + logger.debug(f"Setting {package} logging to {log_level}") + logger.set_adapter_dependency_log_level(package, log_level) GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME = "get_columns_in_relation_raw" LIST_SCHEMAS_MACRO_NAME = "list_schemas" @@ -144,7 +159,7 @@ def _get_relation_information(self, row: agate.Row) -> RelationInfo: try: _schema, name, _, information = row except ValueError: - raise dbt.exceptions.DbtRuntimeError( + raise DbtRuntimeError( f'Invalid value from "show tables extended ...", got {len(row)} values, expected 4' ) @@ -155,7 +170,7 @@ def _get_relation_information_using_describe(self, row: agate.Row) -> RelationIn try: _schema, name, _ = row except ValueError: - raise dbt.exceptions.DbtRuntimeError( + raise DbtRuntimeError( f'Invalid value from "show tables ...", got {len(row)} values, expected 3' ) @@ -164,7 +179,7 @@ def _get_relation_information_using_describe(self, row: agate.Row) -> RelationIn table_results = self.execute_macro( DESCRIBE_TABLE_EXTENDED_MACRO_NAME, kwargs={"table_name": table_name} ) - except dbt.exceptions.DbtRuntimeError as e: + except DbtRuntimeError as e: logger.debug(f"Error while retrieving information about {table_name}: {e.msg}") table_results = AttrDict() @@ -219,7 +234,7 @@ def list_relations_without_caching(self, schema_relation: BaseRelation) -> List[ row_list=show_table_extended_rows, relation_info_func=self._get_relation_information, ) - except dbt.exceptions.DbtRuntimeError as e: + except DbtRuntimeError as e: errmsg = getattr(e, "msg", "") if f"Database '{schema_relation}' not found" in errmsg: return [] @@ -236,7 +251,7 @@ def list_relations_without_caching(self, schema_relation: BaseRelation) -> List[ row_list=show_table_rows, relation_info_func=self._get_relation_information_using_describe, ) - except dbt.exceptions.DbtRuntimeError as e: + except DbtRuntimeError as e: description = "Error while retrieving information about" logger.debug(f"{description} {schema_relation}: {e.msg}") return [] @@ -298,7 +313,7 @@ def get_columns_in_relation(self, relation: BaseRelation) -> List[SparkColumn]: GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME, kwargs={"relation": relation} ) columns = self.parse_describe_extended(relation, rows) - except dbt.exceptions.DbtRuntimeError as e: + except DbtRuntimeError as e: # spark would throw error when table doesn't exist, where other # CDW would just return and empty list, normalizing the behavior here errmsg = getattr(e, "msg", "") @@ -352,11 +367,13 @@ def _get_columns_for_catalog(self, relation: BaseRelation) -> Iterable[Dict[str, yield as_dict def get_catalog( - self, manifest: Manifest, selected_nodes: Optional[Set] = None + self, + relation_configs: Iterable[RelationConfig], + used_schemas: FrozenSet[Tuple[str, str]], ) -> Tuple[agate.Table, List[Exception]]: - schema_map = self._get_catalog_schemas(manifest) + schema_map = self._get_catalog_schemas(relation_configs) if len(schema_map) > 1: - raise dbt.exceptions.CompilationError( + raise CompilationError( f"Expected only one database in get_catalog, found " f"{list(schema_map)}" ) @@ -371,7 +388,7 @@ def get_catalog( self._get_one_catalog, info, [schema], - manifest, + relation_configs, ) ) catalogs, exceptions = catch_as_completed(futures) @@ -381,10 +398,10 @@ def _get_one_catalog( self, information_schema: InformationSchema, schemas: Set[str], - manifest: Manifest, + used_schemas: FrozenSet[Tuple[str, str]], ) -> agate.Table: if len(schemas) != 1: - raise dbt.exceptions.CompilationError( + raise CompilationError( f"Expected only one schema in spark _get_one_catalog, found " f"{schemas}" ) diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py index 89831ca7f..e3e7cb370 100644 --- a/dbt/adapters/spark/python_submissions.py +++ b/dbt/adapters/spark/python_submissions.py @@ -4,8 +4,9 @@ from typing import Any, Dict, Callable, Iterable import uuid -import dbt.exceptions from dbt.adapters.base import PythonJobHelper +from dbt_common.exceptions import DbtRuntimeError + from dbt.adapters.spark import SparkCredentials from dbt.adapters.spark import __version__ @@ -53,7 +54,7 @@ def _create_work_dir(self, path: str) -> None: }, ) if response.status_code != 200: - raise dbt.exceptions.DbtRuntimeError( + raise DbtRuntimeError( f"Error creating work_dir for python notebooks\n {response.content!r}" ) @@ -71,9 +72,7 @@ def _upload_notebook(self, path: str, compiled_code: str) -> None: }, ) if response.status_code != 200: - raise dbt.exceptions.DbtRuntimeError( - f"Error creating python notebook.\n {response.content!r}" - ) + raise DbtRuntimeError(f"Error creating python notebook.\n {response.content!r}") def _submit_job(self, path: str, cluster_spec: dict) -> str: job_spec = { @@ -99,9 +98,7 @@ def _submit_job(self, path: str, cluster_spec: dict) -> str: json=job_spec, ) if submit_response.status_code != 200: - raise dbt.exceptions.DbtRuntimeError( - f"Error creating python run.\n {submit_response.content!r}" - ) + raise DbtRuntimeError(f"Error creating python run.\n {submit_response.content!r}") return submit_response.json()["run_id"] def _submit_through_notebook(self, compiled_code: str, cluster_spec: dict) -> None: @@ -135,7 +132,7 @@ def _submit_through_notebook(self, compiled_code: str, cluster_spec: dict) -> No json_run_output = run_output.json() result_state = json_run_output["metadata"]["state"]["result_state"] if result_state != "SUCCESS": - raise dbt.exceptions.DbtRuntimeError( + raise DbtRuntimeError( "Python model failed with traceback as:\n" "(Note that the line number here does not " "match the line number in your code due to dbt templating)\n" @@ -169,9 +166,9 @@ def polling( response = status_func(**status_func_kwargs) state = get_state_func(response) if exceeded_timeout: - raise dbt.exceptions.DbtRuntimeError("python model run timed out") + raise DbtRuntimeError("python model run timed out") if state != expected_end_state: - raise dbt.exceptions.DbtRuntimeError( + raise DbtRuntimeError( "python model run ended in state" f"{state} with state_message\n{get_state_msg_func(response)}" ) @@ -205,9 +202,7 @@ def create(self) -> str: }, ) if response.status_code != 200: - raise dbt.exceptions.DbtRuntimeError( - f"Error creating an execution context.\n {response.content!r}" - ) + raise DbtRuntimeError(f"Error creating an execution context.\n {response.content!r}") return response.json()["id"] def destroy(self, context_id: str) -> str: @@ -221,9 +216,7 @@ def destroy(self, context_id: str) -> str: }, ) if response.status_code != 200: - raise dbt.exceptions.DbtRuntimeError( - f"Error deleting an execution context.\n {response.content!r}" - ) + raise DbtRuntimeError(f"Error deleting an execution context.\n {response.content!r}") return response.json()["id"] @@ -246,9 +239,7 @@ def execute(self, context_id: str, command: str) -> str: }, ) if response.status_code != 200: - raise dbt.exceptions.DbtRuntimeError( - f"Error creating a command.\n {response.content!r}" - ) + raise DbtRuntimeError(f"Error creating a command.\n {response.content!r}") return response.json()["id"] def status(self, context_id: str, command_id: str) -> Dict[str, Any]: @@ -263,9 +254,7 @@ def status(self, context_id: str, command_id: str) -> Dict[str, Any]: }, ) if response.status_code != 200: - raise dbt.exceptions.DbtRuntimeError( - f"Error getting status of command.\n {response.content!r}" - ) + raise DbtRuntimeError(f"Error getting status of command.\n {response.content!r}") return response.json() @@ -298,7 +287,7 @@ def submit(self, compiled_code: str) -> None: get_state_msg_func=lambda response: response.json()["results"]["data"], ) if response["results"]["resultType"] == "error": - raise dbt.exceptions.DbtRuntimeError( + raise DbtRuntimeError( f"Python model failed with traceback as:\n" f"{response['results']['cause']}" ) diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index e80f2623f..860935cbd 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -2,9 +2,9 @@ from dataclasses import dataclass, field from dbt.adapters.base.relation import BaseRelation, Policy +from dbt.adapters.events.logging import AdapterLogger -from dbt.exceptions import DbtRuntimeError -from dbt.events import AdapterLogger +from dbt_common.exceptions import DbtRuntimeError logger = AdapterLogger("Spark") diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py index b5b2bebdb..7a6982e50 100644 --- a/dbt/adapters/spark/session.py +++ b/dbt/adapters/spark/session.py @@ -7,9 +7,9 @@ from typing import Any, Dict, List, Optional, Tuple, Union, Sequence from dbt.adapters.spark.connections import SparkConnectionWrapper -from dbt.events import AdapterLogger -from dbt.utils import DECIMALS -from dbt.exceptions import DbtRuntimeError +from dbt.adapters.events.logging import AdapterLogger +from dbt_common.utils.encoding import DECIMALS +from dbt_common.exceptions import DbtRuntimeError from pyspark.sql import DataFrame, Row, SparkSession from pyspark.sql.utils import AnalysisException diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index bfc1f198d..bf9f63cf9 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -294,7 +294,7 @@ {% macro spark__list_relations_without_caching(relation) %} {% call statement('list_relations_without_caching', fetch_result=True) -%} - show table extended in {{ relation }} like '*' + show table extended in {{ relation.schema }} like '*' {% endcall %} {% do return(load_result('list_relations_without_caching').table) %} @@ -305,7 +305,7 @@ {#-- V2 iceberg tables #} {#-- https://issues.apache.org/jira/browse/SPARK-33393 #} {% call statement('list_relations_without_caching_show_tables', fetch_result=True) -%} - show tables in {{ schema_relation }} like '*' + show tables in {{ schema_relation.schema }} like '*' {% endcall %} {% do return(load_result('list_relations_without_caching_show_tables').table) %} diff --git a/dev-requirements.txt b/dev-requirements.txt index 765482e25..28a626fc3 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,7 +1,6 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? -git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-core.git@c2bc2f009bbeeb46b3c69d082ab4d485597898af#egg=dbt-tests-adapter&subdirectory=tests/adapter +git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter # if version 1.x or greater -> pin to major version # if version 0.x -> pin to minor diff --git a/setup.py b/setup.py index 301b4a41f..2d6e00e53 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,6 @@ print('Please upgrade setuptools with "pip install --upgrade setuptools" ' "and try again") sys.exit(1) - # pull long description from README this_directory = os.path.abspath(os.path.dirname(__file__)) with open(os.path.join(this_directory, "README.md"), "r", encoding="utf8") as f: @@ -40,17 +39,8 @@ def _get_plugin_version_dict(): return match.groupdict() -# require a compatible minor version (~=), prerelease if this is a prerelease -def _get_dbt_core_version(): - parts = _get_plugin_version_dict() - minor = "{major}.{minor}.0".format(**parts) - pre = parts["prekind"] + "1" if parts["prekind"] else "" - return f"{minor}{pre}" - - package_name = "dbt-spark" package_version = "1.8.0a1" -dbt_core_version = _get_dbt_core_version() description = """The Apache Spark adapter plugin for dbt""" odbc_extras = ["pyodbc~=4.0.39"] @@ -73,8 +63,9 @@ def _get_dbt_core_version(): packages=find_namespace_packages(include=["dbt", "dbt.*"]), include_package_data=True, install_requires=[ - "dbt-core~={}".format(dbt_core_version), "sqlparams>=3.0.0", + "dbt-common<1.0", + "dbt-adapters~=0.1.0a1", ], extras_require={ "ODBC": odbc_extras, diff --git a/tests/conftest.py b/tests/conftest.py index 700ade4d3..efba41a5f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,9 +42,9 @@ def apache_spark_target(): "user": "dbt", "method": "thrift", "port": 10000, - "connect_retries": 3, - "connect_timeout": 5, - "retry_all": True, + "connect_retries": 2, + "connect_timeout": 3, + "retry_all": False, } @@ -59,7 +59,7 @@ def databricks_cluster_target(): "port": 443, "connect_retries": 3, "connect_timeout": 5, - "retry_all": True, + "retry_all": False, "user": os.getenv("DBT_DATABRICKS_USER"), } @@ -87,11 +87,9 @@ def databricks_http_cluster_target(): "token": os.getenv("DBT_DATABRICKS_TOKEN"), "method": "http", "port": 443, - # more retries + longer timout to handle unavailability while cluster is restarting - # return failures quickly in dev, retry all failures in CI (up to 5 min) - "connect_retries": 5, - "connect_timeout": 60, - "retry_all": bool(os.getenv("DBT_DATABRICKS_RETRY_ALL", False)), + "connect_retries": 3, + "connect_timeout": 5, + "retry_all": False, "user": os.getenv("DBT_DATABRICKS_USER"), } diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py new file mode 100644 index 000000000..c1a0397bd --- /dev/null +++ b/tests/functional/conftest.py @@ -0,0 +1,19 @@ +from multiprocessing import Lock + +import pytest + +_db_start_lock = Lock() +_DB_CLUSTER_STARTED = False + + +# Running this should prevent tests from needing to be retried because the Databricks cluster isn't available +@pytest.fixture(scope="class", autouse=True) +def start_databricks_cluster(project, request): + global _DB_CLUSTER_STARTED + profile_type = request.config.getoption("--profile") + with _db_start_lock: + if "databricks" in profile_type and not _DB_CLUSTER_STARTED: + print("Starting Databricks cluster") + project.run_sql("SELECT 1") + + _DB_CLUSTER_STARTED = True diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py index a7da63301..54e9f0158 100644 --- a/tests/unit/test_adapter.py +++ b/tests/unit/test_adapter.py @@ -1,4 +1,5 @@ import unittest +from multiprocessing import get_context from unittest import mock import dbt.flags as flags @@ -146,7 +147,7 @@ def _get_target_odbc_sql_endpoint(self, project): def test_http_connection(self): config = self._get_target_http(self.project_cfg) - adapter = SparkAdapter(config) + adapter = SparkAdapter(config, get_context("spawn")) def hive_http_connect(thrift_transport, configuration): self.assertEqual(thrift_transport.scheme, "https") @@ -171,7 +172,7 @@ def hive_http_connect(thrift_transport, configuration): def test_thrift_connection(self): config = self._get_target_thrift(self.project_cfg) - adapter = SparkAdapter(config) + adapter = SparkAdapter(config, get_context("spawn")) def hive_thrift_connect( host, port, username, auth, kerberos_service_name, password, configuration @@ -195,7 +196,7 @@ def hive_thrift_connect( def test_thrift_ssl_connection(self): config = self._get_target_use_ssl_thrift(self.project_cfg) - adapter = SparkAdapter(config) + adapter = SparkAdapter(config, get_context("spawn")) def hive_thrift_connect(thrift_transport, configuration): self.assertIsNotNone(thrift_transport) @@ -215,7 +216,7 @@ def hive_thrift_connect(thrift_transport, configuration): def test_thrift_connection_kerberos(self): config = self._get_target_thrift_kerberos(self.project_cfg) - adapter = SparkAdapter(config) + adapter = SparkAdapter(config, get_context("spawn")) def hive_thrift_connect( host, port, username, auth, kerberos_service_name, password, configuration @@ -239,7 +240,7 @@ def hive_thrift_connect( def test_odbc_cluster_connection(self): config = self._get_target_odbc_cluster(self.project_cfg) - adapter = SparkAdapter(config) + adapter = SparkAdapter(config, get_context("spawn")) def pyodbc_connect(connection_str, autocommit): self.assertTrue(autocommit) @@ -266,7 +267,7 @@ def pyodbc_connect(connection_str, autocommit): def test_odbc_endpoint_connection(self): config = self._get_target_odbc_sql_endpoint(self.project_cfg) - adapter = SparkAdapter(config) + adapter = SparkAdapter(config, get_context("spawn")) def pyodbc_connect(connection_str, autocommit): self.assertTrue(autocommit) @@ -329,7 +330,9 @@ def test_parse_relation(self): input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows] config = self._get_target_http(self.project_cfg) - rows = SparkAdapter(config).parse_describe_extended(relation, input_cols) + rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended( + relation, input_cols + ) self.assertEqual(len(rows), 4) self.assertEqual( rows[0].to_column_dict(omit_none=False), @@ -418,7 +421,9 @@ def test_parse_relation_with_integer_owner(self): input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows] config = self._get_target_http(self.project_cfg) - rows = SparkAdapter(config).parse_describe_extended(relation, input_cols) + rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended( + relation, input_cols + ) self.assertEqual(rows[0].to_column_dict().get("table_owner"), "1234") @@ -454,7 +459,9 @@ def test_parse_relation_with_statistics(self): input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows] config = self._get_target_http(self.project_cfg) - rows = SparkAdapter(config).parse_describe_extended(relation, input_cols) + rows = SparkAdapter(config, get_context("spawn")).parse_describe_extended( + relation, input_cols + ) self.assertEqual(len(rows), 1) self.assertEqual( rows[0].to_column_dict(omit_none=False), @@ -483,7 +490,7 @@ def test_parse_relation_with_statistics(self): def test_relation_with_database(self): config = self._get_target_http(self.project_cfg) - adapter = SparkAdapter(config) + adapter = SparkAdapter(config, get_context("spawn")) # fine adapter.Relation.create(schema="different", identifier="table") with self.assertRaises(DbtRuntimeError): @@ -564,7 +571,9 @@ def test_parse_columns_from_information_with_table_type_and_delta_provider(self) ) config = self._get_target_http(self.project_cfg) - columns = SparkAdapter(config).parse_columns_from_information(relation) + columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information( + relation + ) self.assertEqual(len(columns), 4) self.assertEqual( columns[0].to_column_dict(omit_none=False), @@ -649,7 +658,9 @@ def test_parse_columns_from_information_with_view_type(self): ) config = self._get_target_http(self.project_cfg) - columns = SparkAdapter(config).parse_columns_from_information(relation) + columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information( + relation + ) self.assertEqual(len(columns), 4) self.assertEqual( columns[1].to_column_dict(omit_none=False), @@ -715,7 +726,9 @@ def test_parse_columns_from_information_with_table_type_and_parquet_provider(sel ) config = self._get_target_http(self.project_cfg) - columns = SparkAdapter(config).parse_columns_from_information(relation) + columns = SparkAdapter(config, get_context("spawn")).parse_columns_from_information( + relation + ) self.assertEqual(len(columns), 4) self.assertEqual( diff --git a/tests/unit/utils.py b/tests/unit/utils.py index ac8c62244..17cd3ee78 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -9,7 +9,7 @@ import agate import pytest -from dbt.dataclass_schema import ValidationError +from dbt_common.dataclass_schema import ValidationError from dbt.config.project import PartialProject From 613fa58ff9d1f06877ad8790a145d91f5913f862 Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Tue, 20 Feb 2024 07:53:05 -0800 Subject: [PATCH 05/44] fix spark cluster start mechanism and add extra dev requirements (#986) --- dagger/run_dbt_spark_tests.py | 18 +++++++++++++++--- dev-requirements.txt | 3 +++ tests/functional/conftest.py | 28 ++++++++++++++++++---------- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index 436cb1e92..15f9cf2c2 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -112,15 +112,27 @@ async def test_spark(test_args): .with_exec(["./scripts/install_os_reqs.sh"]) # install dbt-spark + python deps .with_directory("/src", req_files) - .with_directory("src/dbt", dbt_spark_dir) - .with_directory("src/tests", test_dir) - .with_workdir("/src") .with_exec(["pip", "install", "-U", "pip"]) + .with_workdir("/src") .with_exec(["pip", "install", "-r", "requirements.txt"]) .with_exec(["pip", "install", "-r", "dev-requirements.txt"]) + ) + + # install local dbt-spark changes + tst_container = ( + tst_container.with_workdir("/") + .with_directory("src/dbt", dbt_spark_dir) + .with_workdir("/src") .with_exec(["pip", "install", "-e", "."]) ) + # install local test changes + tst_container = ( + tst_container.with_workdir("/") + .with_directory("src/tests", test_dir) + .with_workdir("/src") + ) + if test_profile == "apache_spark": spark_ctr, spark_host = get_spark_container(client) tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr) diff --git a/dev-requirements.txt b/dev-requirements.txt index 28a626fc3..8f674d84b 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,8 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? +git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-common.git +git+https://github.com/dbt-labs/dbt-adapters.git git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter # if version 1.x or greater -> pin to major version diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py index c1a0397bd..476ffb474 100644 --- a/tests/functional/conftest.py +++ b/tests/functional/conftest.py @@ -1,19 +1,27 @@ -from multiprocessing import Lock - +import time import pytest -_db_start_lock = Lock() -_DB_CLUSTER_STARTED = False + +def _wait_for_databricks_cluster(project): + """ + It takes roughly 3min for the cluster to start, to be safe we'll wait for 5min + """ + for _ in range(60): + try: + project.run_sql("SELECT 1", fetch=True) + return + except Exception: + time.sleep(10) + + raise Exception("Databricks cluster did not start in time") # Running this should prevent tests from needing to be retried because the Databricks cluster isn't available @pytest.fixture(scope="class", autouse=True) def start_databricks_cluster(project, request): - global _DB_CLUSTER_STARTED profile_type = request.config.getoption("--profile") - with _db_start_lock: - if "databricks" in profile_type and not _DB_CLUSTER_STARTED: - print("Starting Databricks cluster") - project.run_sql("SELECT 1") - _DB_CLUSTER_STARTED = True + if "databricks" in profile_type: + _wait_for_databricks_cluster(project) + + yield 1 From ef91425004d58948532af5176be9d18af41d0b87 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Wed, 21 Feb 2024 11:56:09 -0500 Subject: [PATCH 06/44] Add functional tests for unit testing (#976) --- .../unreleased/Features-20240220-195925.yaml | 6 ++++ dbt/include/spark/macros/adapters.sql | 1 + dbt/include/spark/macros/utils/safe_cast.sql | 8 +++++ .../adapter/unit_testing/test_unit_testing.py | 34 +++++++++++++++++++ 4 files changed, 49 insertions(+) create mode 100644 .changes/unreleased/Features-20240220-195925.yaml create mode 100644 dbt/include/spark/macros/utils/safe_cast.sql create mode 100644 tests/functional/adapter/unit_testing/test_unit_testing.py diff --git a/.changes/unreleased/Features-20240220-195925.yaml b/.changes/unreleased/Features-20240220-195925.yaml new file mode 100644 index 000000000..c5d86ab7c --- /dev/null +++ b/.changes/unreleased/Features-20240220-195925.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Implement spark__safe_cast and add functional tests for unit testing +time: 2024-02-20T19:59:25.907821-05:00 +custom: + Author: michelleark + Issue: "987" diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index bf9f63cf9..a6404a2de 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -387,6 +387,7 @@ "identifier": tmp_identifier }) -%} + {%- set tmp_relation = tmp_relation.include(database=false, schema=false) -%} {% do return(tmp_relation) %} {% endmacro %} diff --git a/dbt/include/spark/macros/utils/safe_cast.sql b/dbt/include/spark/macros/utils/safe_cast.sql new file mode 100644 index 000000000..3ce5820a8 --- /dev/null +++ b/dbt/include/spark/macros/utils/safe_cast.sql @@ -0,0 +1,8 @@ +{% macro spark__safe_cast(field, type) %} +{%- set field_clean = field.strip('"').strip("'") if (cast_from_string_unsupported_for(type) and field is string) else field -%} +cast({{field_clean}} as {{type}}) +{% endmacro %} + +{% macro cast_from_string_unsupported_for(type) %} + {{ return(type.lower().startswith('struct') or type.lower().startswith('array') or type.lower().startswith('map')) }} +{% endmacro %} diff --git a/tests/functional/adapter/unit_testing/test_unit_testing.py b/tests/functional/adapter/unit_testing/test_unit_testing.py new file mode 100644 index 000000000..b70c581d1 --- /dev/null +++ b/tests/functional/adapter/unit_testing/test_unit_testing.py @@ -0,0 +1,34 @@ +import pytest + +from dbt.tests.adapter.unit_testing.test_types import BaseUnitTestingTypes +from dbt.tests.adapter.unit_testing.test_case_insensitivity import BaseUnitTestCaseInsensivity +from dbt.tests.adapter.unit_testing.test_invalid_input import BaseUnitTestInvalidInput + + +class TestSparkUnitTestingTypes(BaseUnitTestingTypes): + @pytest.fixture + def data_types(self): + # sql_value, yaml_value + return [ + ["1", "1"], + ["2.0", "2.0"], + ["'12345'", "12345"], + ["'string'", "string"], + ["true", "true"], + ["date '2011-11-11'", "2011-11-11"], + ["timestamp '2013-11-03 00:00:00-0'", "2013-11-03 00:00:00-0"], + ["array(1, 2, 3)", "'array(1, 2, 3)'"], + [ + "map('10', 't', '15', 'f', '20', NULL)", + """'map("10", "t", "15", "f", "20", NULL)'""", + ], + ['named_struct("a", 1, "b", 2, "c", 3)', """'named_struct("a", 1, "b", 2, "c", 3)'"""], + ] + + +class TestSparkUnitTestCaseInsensitivity(BaseUnitTestCaseInsensivity): + pass + + +class TestSparkUnitTestInvalidInput(BaseUnitTestInvalidInput): + pass From 5d600086746d75781838ed71aa266f18c1bb37f1 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Tue, 27 Feb 2024 14:27:34 -0500 Subject: [PATCH 07/44] remove actions that sync github and jira (#977) The failing tests are from the retired CircleCI checks that were implemented in GHA. --- .github/workflows/jira-creation.yml | 28 -------------------------- .github/workflows/jira-label.yml | 28 -------------------------- .github/workflows/jira-transition.yml | 29 --------------------------- 3 files changed, 85 deletions(-) delete mode 100644 .github/workflows/jira-creation.yml delete mode 100644 .github/workflows/jira-label.yml delete mode 100644 .github/workflows/jira-transition.yml diff --git a/.github/workflows/jira-creation.yml b/.github/workflows/jira-creation.yml deleted file mode 100644 index 2611a8bdd..000000000 --- a/.github/workflows/jira-creation.yml +++ /dev/null @@ -1,28 +0,0 @@ -# **what?** -# Mirrors issues into Jira. Includes the information: title, -# GitHub Issue ID and URL - -# **why?** -# Jira is our tool for tracking and we need to see these issues in there - -# **when?** -# On issue creation or when an issue is labeled `Jira` - -name: Jira Issue Creation - -on: - issues: - types: [opened, labeled] - -permissions: - issues: write - -jobs: - call-label-action: - uses: dbt-labs/actions/.github/workflows/jira-creation.yml@main - with: - project_key: ADAP - secrets: - JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} - JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} - JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} diff --git a/.github/workflows/jira-label.yml b/.github/workflows/jira-label.yml deleted file mode 100644 index 1637cbe38..000000000 --- a/.github/workflows/jira-label.yml +++ /dev/null @@ -1,28 +0,0 @@ -# **what?** -# Calls mirroring Jira label Action. Includes adding a new label -# to an existing issue or removing a label as well - -# **why?** -# Jira is our tool for tracking and we need to see these labels in there - -# **when?** -# On labels being added or removed from issues - -name: Jira Label Mirroring - -on: - issues: - types: [labeled, unlabeled] - -permissions: - issues: read - -jobs: - call-label-action: - uses: dbt-labs/actions/.github/workflows/jira-label.yml@main - with: - project_key: ADAP - secrets: - JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} - JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} - JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} diff --git a/.github/workflows/jira-transition.yml b/.github/workflows/jira-transition.yml deleted file mode 100644 index 99158a15f..000000000 --- a/.github/workflows/jira-transition.yml +++ /dev/null @@ -1,29 +0,0 @@ -# **what?** -# Transition a Jira issue to a new state -# Only supports these GitHub Issue transitions: -# closed, deleted, reopened - -# **why?** -# Jira needs to be kept up-to-date - -# **when?** -# On issue closing, deletion, reopened - -name: Jira Issue Transition - -on: - issues: - types: [closed, deleted, reopened] - -# no special access is needed -permissions: read-all - -jobs: - call-label-action: - uses: dbt-labs/actions/.github/workflows/jira-transition.yml@main - with: - project_key: ADAP - secrets: - JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} - JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} - JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} From 5246f8207799ccd8d35cf0693fc0eb931b2aa3f9 Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Tue, 27 Feb 2024 18:41:13 -0800 Subject: [PATCH 08/44] update dbt-common dependency to <2.0 (#992) * update dbt-common dependency to <2.0 * update dbt-adapters dependency to <2.0 --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 2d6e00e53..f87cc3213 100644 --- a/setup.py +++ b/setup.py @@ -64,8 +64,8 @@ def _get_plugin_version_dict(): include_package_data=True, install_requires=[ "sqlparams>=3.0.0", - "dbt-common<1.0", - "dbt-adapters~=0.1.0a1", + "dbt-common<2.0", + "dbt-adapters<2.0", ], extras_require={ "ODBC": odbc_extras, From c56b9ce02000bd4e7a88f9fd0ac913a3f47ff2f3 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Thu, 29 Feb 2024 18:24:24 -0500 Subject: [PATCH 09/44] include a pre-release in the dbt-adapters pin to allow pre-releases to be installed on main (#993) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f87cc3213..7342c8660 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ def _get_plugin_version_dict(): install_requires=[ "sqlparams>=3.0.0", "dbt-common<2.0", - "dbt-adapters<2.0", + "dbt-adapters>=0.1.0a1,<2.0", ], extras_require={ "ODBC": odbc_extras, From 0aee01e7d36754736840008a8fb29c1450752c83 Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Fri, 1 Mar 2024 15:27:35 -0800 Subject: [PATCH 10/44] update install_requires to allow for pre-release common/adapters (#995) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7342c8660..5f6290d1b 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ def _get_plugin_version_dict(): include_package_data=True, install_requires=[ "sqlparams>=3.0.0", - "dbt-common<2.0", + "dbt-common>=0.1.0a1,<2.0", "dbt-adapters>=0.1.0a1,<2.0", ], extras_require={ From 480355936d44353859f5bdbf80863803495c047f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 1 Mar 2024 16:03:21 -0800 Subject: [PATCH 11/44] [create-pull-request] automated change (#994) Co-authored-by: Github Build Bot Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com> --- .bumpversion.cfg | 2 +- .changes/1.8.0-b1.md | 39 ++++++++++++++++++ .../Dependencies-20231009-220732.yaml | 0 .../Dependencies-20231010-222853.yaml | 0 .../Dependencies-20231010-222910.yaml | 0 .../Dependencies-20231013-223750.yaml | 0 .../Dependencies-20231027-230251.yaml | 0 .../Dependencies-20231027-230254.yaml | 0 .../Dependencies-20231027-230301.yaml | 0 .../Dependencies-20231108-222326.yaml | 0 .../Dependencies-20231110-224056.yaml | 0 .../Dependencies-20231113-224111.yaml | 0 .../Dependencies-20231127-220733.yaml | 0 .../Dependencies-20231127-220737.yaml | 0 .../Dependencies-20231127-220741.yaml | 0 .../Dependencies-20231204-224210.yaml | 0 .../Dependencies-20231212-223929.yaml | 0 .../Features-20240220-195925.yaml | 0 .../Fixes-20231107-134141.yaml | 0 .../Fixes-20231221-081949.yaml | 0 .../Under the Hood-20230929-161218.yaml | 0 .../Under the Hood-20231119-132050.yaml | 0 .../Under the Hood-20231214-134728.yaml | 0 .../Under the Hood-20240111-114806.yaml | 0 CHANGELOG.md | 41 +++++++++++++++++++ dbt/adapters/spark/__version__.py | 2 +- setup.py | 2 +- 27 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 .changes/1.8.0-b1.md rename .changes/{unreleased => 1.8.0}/Dependencies-20231009-220732.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231010-222853.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231010-222910.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231013-223750.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231027-230251.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231027-230254.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231027-230301.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231108-222326.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231110-224056.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231113-224111.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231127-220733.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231127-220737.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231127-220741.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231204-224210.yaml (100%) rename .changes/{unreleased => 1.8.0}/Dependencies-20231212-223929.yaml (100%) rename .changes/{unreleased => 1.8.0}/Features-20240220-195925.yaml (100%) rename .changes/{unreleased => 1.8.0}/Fixes-20231107-134141.yaml (100%) rename .changes/{unreleased => 1.8.0}/Fixes-20231221-081949.yaml (100%) rename .changes/{unreleased => 1.8.0}/Under the Hood-20230929-161218.yaml (100%) rename .changes/{unreleased => 1.8.0}/Under the Hood-20231119-132050.yaml (100%) rename .changes/{unreleased => 1.8.0}/Under the Hood-20231214-134728.yaml (100%) rename .changes/{unreleased => 1.8.0}/Under the Hood-20240111-114806.yaml (100%) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 486768676..595914b21 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.8.0a1 +current_version = 1.8.0b1 parse = (?P[\d]+) # major version number \.(?P[\d]+) # minor version number \.(?P[\d]+) # patch version number diff --git a/.changes/1.8.0-b1.md b/.changes/1.8.0-b1.md new file mode 100644 index 000000000..4f4091a91 --- /dev/null +++ b/.changes/1.8.0-b1.md @@ -0,0 +1,39 @@ +## dbt-spark 1.8.0-b1 - March 01, 2024 + +### Features + +- Implement spark__safe_cast and add functional tests for unit testing ([#987](https://github.com/dbt-labs/dbt-spark/issues/987)) + +### Fixes + +- Support new agate Integer type and empty seed test ([#935](https://github.com/dbt-labs/dbt-spark/issues/935)) +- Fix hardcoded file format for python models ([#803](https://github.com/dbt-labs/dbt-spark/issues/803)) + +### Under the Hood + +- Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. ([#719](https://github.com/dbt-labs/dbt-spark/issues/719)) +- Add tests for --empty flag ([#949](https://github.com/dbt-labs/dbt-spark/issues/949)) +- Remove unused `invalid_insert_overwrite_delta_msg` message ([#962](https://github.com/dbt-labs/dbt-spark/issues/962)) +- Update import paths and list_relations to support decoupling adapters/core ([#972](https://github.com/dbt-labs/dbt-spark/issues/972)) + +### Dependencies + +- Update pre-commit-hooks requirement from ~=4.4 to ~=4.5 ([#903](https://github.com/dbt-labs/dbt-spark/pull/903)) +- Bump mypy from 1.5.1 to 1.6.0 ([#904](https://github.com/dbt-labs/dbt-spark/pull/904)) +- Update pyodbc requirement from ~=4.0.39 to ~=5.0.0 ([#905](https://github.com/dbt-labs/dbt-spark/pull/905)) +- Update pre-commit requirement from ~=3.4 to ~=3.5 ([#914](https://github.com/dbt-labs/dbt-spark/pull/914)) +- Update pyodbc requirement from ~=5.0.0 to ~=5.0.1 ([#925](https://github.com/dbt-labs/dbt-spark/pull/925)) +- Bump mypy from 1.6.0 to 1.6.1 ([#926](https://github.com/dbt-labs/dbt-spark/pull/926)) +- Update black requirement from ~=23.9 to ~=23.10 ([#927](https://github.com/dbt-labs/dbt-spark/pull/927)) +- Update black requirement from ~=23.10 to ~=23.11 ([#942](https://github.com/dbt-labs/dbt-spark/pull/942)) +- Bump mypy from 1.6.1 to 1.7.0 ([#946](https://github.com/dbt-labs/dbt-spark/pull/946)) +- Update pytest-xdist requirement from ~=3.3 to ~=3.4 ([#947](https://github.com/dbt-labs/dbt-spark/pull/947)) +- Update pytest-xdist requirement from ~=3.4 to ~=3.5 ([#951](https://github.com/dbt-labs/dbt-spark/pull/951)) +- Update wheel requirement from ~=0.41 to ~=0.42 ([#952](https://github.com/dbt-labs/dbt-spark/pull/952)) +- Bump mypy from 1.7.0 to 1.7.1 ([#953](https://github.com/dbt-labs/dbt-spark/pull/953)) +- Update freezegun requirement from ~=1.2 to ~=1.3 ([#956](https://github.com/dbt-labs/dbt-spark/pull/956)) +- Update black requirement from ~=23.11 to ~=23.12 ([#959](https://github.com/dbt-labs/dbt-spark/pull/959)) + +### Contributors +- [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719)) +- [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803)) diff --git a/.changes/unreleased/Dependencies-20231009-220732.yaml b/.changes/1.8.0/Dependencies-20231009-220732.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231009-220732.yaml rename to .changes/1.8.0/Dependencies-20231009-220732.yaml diff --git a/.changes/unreleased/Dependencies-20231010-222853.yaml b/.changes/1.8.0/Dependencies-20231010-222853.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231010-222853.yaml rename to .changes/1.8.0/Dependencies-20231010-222853.yaml diff --git a/.changes/unreleased/Dependencies-20231010-222910.yaml b/.changes/1.8.0/Dependencies-20231010-222910.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231010-222910.yaml rename to .changes/1.8.0/Dependencies-20231010-222910.yaml diff --git a/.changes/unreleased/Dependencies-20231013-223750.yaml b/.changes/1.8.0/Dependencies-20231013-223750.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231013-223750.yaml rename to .changes/1.8.0/Dependencies-20231013-223750.yaml diff --git a/.changes/unreleased/Dependencies-20231027-230251.yaml b/.changes/1.8.0/Dependencies-20231027-230251.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231027-230251.yaml rename to .changes/1.8.0/Dependencies-20231027-230251.yaml diff --git a/.changes/unreleased/Dependencies-20231027-230254.yaml b/.changes/1.8.0/Dependencies-20231027-230254.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231027-230254.yaml rename to .changes/1.8.0/Dependencies-20231027-230254.yaml diff --git a/.changes/unreleased/Dependencies-20231027-230301.yaml b/.changes/1.8.0/Dependencies-20231027-230301.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231027-230301.yaml rename to .changes/1.8.0/Dependencies-20231027-230301.yaml diff --git a/.changes/unreleased/Dependencies-20231108-222326.yaml b/.changes/1.8.0/Dependencies-20231108-222326.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231108-222326.yaml rename to .changes/1.8.0/Dependencies-20231108-222326.yaml diff --git a/.changes/unreleased/Dependencies-20231110-224056.yaml b/.changes/1.8.0/Dependencies-20231110-224056.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231110-224056.yaml rename to .changes/1.8.0/Dependencies-20231110-224056.yaml diff --git a/.changes/unreleased/Dependencies-20231113-224111.yaml b/.changes/1.8.0/Dependencies-20231113-224111.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231113-224111.yaml rename to .changes/1.8.0/Dependencies-20231113-224111.yaml diff --git a/.changes/unreleased/Dependencies-20231127-220733.yaml b/.changes/1.8.0/Dependencies-20231127-220733.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231127-220733.yaml rename to .changes/1.8.0/Dependencies-20231127-220733.yaml diff --git a/.changes/unreleased/Dependencies-20231127-220737.yaml b/.changes/1.8.0/Dependencies-20231127-220737.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231127-220737.yaml rename to .changes/1.8.0/Dependencies-20231127-220737.yaml diff --git a/.changes/unreleased/Dependencies-20231127-220741.yaml b/.changes/1.8.0/Dependencies-20231127-220741.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231127-220741.yaml rename to .changes/1.8.0/Dependencies-20231127-220741.yaml diff --git a/.changes/unreleased/Dependencies-20231204-224210.yaml b/.changes/1.8.0/Dependencies-20231204-224210.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231204-224210.yaml rename to .changes/1.8.0/Dependencies-20231204-224210.yaml diff --git a/.changes/unreleased/Dependencies-20231212-223929.yaml b/.changes/1.8.0/Dependencies-20231212-223929.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20231212-223929.yaml rename to .changes/1.8.0/Dependencies-20231212-223929.yaml diff --git a/.changes/unreleased/Features-20240220-195925.yaml b/.changes/1.8.0/Features-20240220-195925.yaml similarity index 100% rename from .changes/unreleased/Features-20240220-195925.yaml rename to .changes/1.8.0/Features-20240220-195925.yaml diff --git a/.changes/unreleased/Fixes-20231107-134141.yaml b/.changes/1.8.0/Fixes-20231107-134141.yaml similarity index 100% rename from .changes/unreleased/Fixes-20231107-134141.yaml rename to .changes/1.8.0/Fixes-20231107-134141.yaml diff --git a/.changes/unreleased/Fixes-20231221-081949.yaml b/.changes/1.8.0/Fixes-20231221-081949.yaml similarity index 100% rename from .changes/unreleased/Fixes-20231221-081949.yaml rename to .changes/1.8.0/Fixes-20231221-081949.yaml diff --git a/.changes/unreleased/Under the Hood-20230929-161218.yaml b/.changes/1.8.0/Under the Hood-20230929-161218.yaml similarity index 100% rename from .changes/unreleased/Under the Hood-20230929-161218.yaml rename to .changes/1.8.0/Under the Hood-20230929-161218.yaml diff --git a/.changes/unreleased/Under the Hood-20231119-132050.yaml b/.changes/1.8.0/Under the Hood-20231119-132050.yaml similarity index 100% rename from .changes/unreleased/Under the Hood-20231119-132050.yaml rename to .changes/1.8.0/Under the Hood-20231119-132050.yaml diff --git a/.changes/unreleased/Under the Hood-20231214-134728.yaml b/.changes/1.8.0/Under the Hood-20231214-134728.yaml similarity index 100% rename from .changes/unreleased/Under the Hood-20231214-134728.yaml rename to .changes/1.8.0/Under the Hood-20231214-134728.yaml diff --git a/.changes/unreleased/Under the Hood-20240111-114806.yaml b/.changes/1.8.0/Under the Hood-20240111-114806.yaml similarity index 100% rename from .changes/unreleased/Under the Hood-20240111-114806.yaml rename to .changes/1.8.0/Under the Hood-20240111-114806.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 902db37fc..d65c50be4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,47 @@ - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version. - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry) +## dbt-spark 1.8.0-b1 - March 01, 2024 + +### Features + +- Implement spark__safe_cast and add functional tests for unit testing ([#987](https://github.com/dbt-labs/dbt-spark/issues/987)) + +### Fixes + +- Support new agate Integer type and empty seed test ([#935](https://github.com/dbt-labs/dbt-spark/issues/935)) +- Fix hardcoded file format for python models ([#803](https://github.com/dbt-labs/dbt-spark/issues/803)) + +### Under the Hood + +- Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. ([#719](https://github.com/dbt-labs/dbt-spark/issues/719)) +- Add tests for --empty flag ([#949](https://github.com/dbt-labs/dbt-spark/issues/949)) +- Remove unused `invalid_insert_overwrite_delta_msg` message ([#962](https://github.com/dbt-labs/dbt-spark/issues/962)) +- Update import paths and list_relations to support decoupling adapters/core ([#972](https://github.com/dbt-labs/dbt-spark/issues/972)) + +### Dependencies + +- Update pre-commit-hooks requirement from ~=4.4 to ~=4.5 ([#903](https://github.com/dbt-labs/dbt-spark/pull/903)) +- Bump mypy from 1.5.1 to 1.6.0 ([#904](https://github.com/dbt-labs/dbt-spark/pull/904)) +- Update pyodbc requirement from ~=4.0.39 to ~=5.0.0 ([#905](https://github.com/dbt-labs/dbt-spark/pull/905)) +- Update pre-commit requirement from ~=3.4 to ~=3.5 ([#914](https://github.com/dbt-labs/dbt-spark/pull/914)) +- Update pyodbc requirement from ~=5.0.0 to ~=5.0.1 ([#925](https://github.com/dbt-labs/dbt-spark/pull/925)) +- Bump mypy from 1.6.0 to 1.6.1 ([#926](https://github.com/dbt-labs/dbt-spark/pull/926)) +- Update black requirement from ~=23.9 to ~=23.10 ([#927](https://github.com/dbt-labs/dbt-spark/pull/927)) +- Update black requirement from ~=23.10 to ~=23.11 ([#942](https://github.com/dbt-labs/dbt-spark/pull/942)) +- Bump mypy from 1.6.1 to 1.7.0 ([#946](https://github.com/dbt-labs/dbt-spark/pull/946)) +- Update pytest-xdist requirement from ~=3.3 to ~=3.4 ([#947](https://github.com/dbt-labs/dbt-spark/pull/947)) +- Update pytest-xdist requirement from ~=3.4 to ~=3.5 ([#951](https://github.com/dbt-labs/dbt-spark/pull/951)) +- Update wheel requirement from ~=0.41 to ~=0.42 ([#952](https://github.com/dbt-labs/dbt-spark/pull/952)) +- Bump mypy from 1.7.0 to 1.7.1 ([#953](https://github.com/dbt-labs/dbt-spark/pull/953)) +- Update freezegun requirement from ~=1.2 to ~=1.3 ([#956](https://github.com/dbt-labs/dbt-spark/pull/956)) +- Update black requirement from ~=23.11 to ~=23.12 ([#959](https://github.com/dbt-labs/dbt-spark/pull/959)) + +### Contributors +- [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719)) +- [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803)) + + ## Previous Releases For information on prior major and minor releases, see their changelogs: - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index f15b401d1..6496f3e22 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "1.8.0a1" +version = "1.8.0b1" diff --git a/setup.py b/setup.py index 5f6290d1b..067ca41b1 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ def _get_plugin_version_dict(): package_name = "dbt-spark" -package_version = "1.8.0a1" +package_version = "1.8.0b1" description = """The Apache Spark adapter plugin for dbt""" odbc_extras = ["pyodbc~=4.0.39"] From d568d21736059aa06fd760672f681010e4ae252c Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Fri, 1 Mar 2024 19:36:56 -0500 Subject: [PATCH 12/44] Vendor release-prep from centralized dbt-release, replace tox testing with spark repo testing (#997) * vendor release-prep from centralized dbt-release, replace tox testing with spark repo testing * replace dbt --version --- .github/workflows/main.yml | 6 +- .github/workflows/release-prep.yml | 650 +++++++++++++++++++++++++++++ .github/workflows/release.yml | 4 +- 3 files changed, 655 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/release-prep.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 20f3f88f4..68911710f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -64,7 +64,7 @@ jobs: mypy --version python -m pip install -r requirements.txt python -m pip install -r dev-requirements.txt - dbt --version + python -c "import dbt.adapters.spark" - name: Run pre-commit hooks run: pre-commit run --all-files --show-diff-on-failure @@ -200,10 +200,10 @@ jobs: find ./dist/*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/ - name: Check wheel distributions run: | - dbt --version + python -c "import dbt.adapters.spark" - name: Install source distributions run: | find ./dist/*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/ - name: Check source distributions run: | - dbt --version + python -c "import dbt.adapters.spark" diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml new file mode 100644 index 000000000..1a6e450c4 --- /dev/null +++ b/.github/workflows/release-prep.yml @@ -0,0 +1,650 @@ +# **what?** +# Perform the version bump, generate the changelog and run tests. +# +# Inputs: +# sha: The commit to attach to this release +# version_number: The release version number (i.e. 1.0.0b1, 1.2.3rc2, 1.0.0) +# target_branch: The branch that we will release from +# env_setup_script_path: Path to the environment setup script +# test_run: Test run (The temp branch will be used for release) +# nightly_release: Identifier that this is nightly release +# +# Outputs: +# final_sha: The sha that will actually be released. This can differ from the +# input sha if adding a version bump and/or changelog +# changelog_path: Path to the changelog file (ex .changes/1.2.3-rc1.md) +# +# Branching strategy: +# - During execution workflow execution the temp branch will be generated. +# - For normal runs the temp branch will be removed once changes were merged to target branch; +# - For test runs we will keep temp branch and will use it for release; +# Naming strategy: +# - For normal runs: prep-release/${{ inputs.version_number }}_$GITHUB_RUN_ID +# - For test runs: prep-release/test-run/${{ inputs.version_number }}_$GITHUB_RUN_ID +# - For nightly releases: prep-release/nightly-release/${{ inputs.version_number }}_$GITHUB_RUN_ID +# +# **why?** +# Reusable and consistent GitHub release process. +# +# **when?** +# Call when ready to kick off a build and release +# +# Validation Checks +# +# 1. Bump the version if it has not been bumped +# 2. Generate the changelog (via changie) if there is no markdown file for this version +# + +name: Version Bump and Changelog Generation + +on: + workflow_call: + inputs: + sha: + required: true + type: string + version_number: + required: true + type: string + target_branch: + required: true + type: string + env_setup_script_path: + required: false + type: string + default: "" + test_run: + required: false + default: true + type: boolean + nightly_release: + type: boolean + default: false + required: false + outputs: + final_sha: + description: The new commit that includes the changelog and version bump. + value: ${{ jobs.determine-release-sha.outputs.final_sha }} + changelog_path: + description: The path to the changelog for this version + value: ${{ jobs.audit-changelog.outputs.changelog_path }} + secrets: + FISHTOWN_BOT_PAT: + description: "Token to commit/merge changes into branches" + required: true + IT_TEAM_MEMBERSHIP: + description: "Token that can view org level teams" + required: true + +permissions: + contents: write + +defaults: + run: + shell: bash + +env: + PYTHON_TARGET_VERSION: 3.8 + NOTIFICATION_PREFIX: "[Release Preparation]" + +jobs: + log-inputs: + runs-on: ubuntu-latest + + steps: + - name: "[DEBUG] Print Variables" + run: | + # WORKFLOW INPUTS + echo The last commit sha in the release: ${{ inputs.sha }} + echo The release version number: ${{ inputs.version_number }} + echo The branch that we will release from: ${{ inputs.target_branch }} + echo Path to the environment setup script: ${{ inputs.env_setup_script_path }} + echo Test run: ${{ inputs.test_run }} + echo Nightly release: ${{ inputs.nightly_release }} + # ENVIRONMENT VARIABLES + echo Python target version: ${{ env.PYTHON_TARGET_VERSION }} + echo Notification prefix: ${{ env.NOTIFICATION_PREFIX }} + + audit-changelog: + runs-on: ubuntu-latest + + outputs: + changelog_path: ${{ steps.set_path.outputs.changelog_path }} + exists: ${{ steps.set_existence.outputs.exists }} + base_version: ${{ steps.semver.outputs.base-version }} + prerelease: ${{ steps.semver.outputs.pre-release }} + is_prerelease: ${{ steps.semver.outputs.is-pre-release }} + + steps: + - name: "Checkout ${{ github.repository }} Commit ${{ inputs.sha }}" + uses: actions/checkout@v4 + with: + ref: ${{ inputs.sha }} + + - name: "Audit Version And Parse Into Parts" + id: semver + uses: dbt-labs/actions/parse-semver@v1.1.0 + with: + version: ${{ inputs.version_number }} + + - name: "Set Changelog Path" + id: set_path + run: | + path=".changes/" + if [[ ${{ steps.semver.outputs.is-pre-release }} -eq 1 ]] + then + path+="${{ steps.semver.outputs.base-version }}-${{ steps.semver.outputs.pre-release }}.md" + else + path+="${{ steps.semver.outputs.base-version }}.md" + fi + # Send notification + echo "changelog_path=$path" >> $GITHUB_OUTPUT + title="Changelog path" + echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$changelog_path" + + - name: "Set Changelog Existence For Subsequent Jobs" + id: set_existence + run: | + does_exist=false + if test -f ${{ steps.set_path.outputs.changelog_path }} + then + does_exist=true + fi + echo "exists=$does_exist">> $GITHUB_OUTPUT + + - name: "[Notification] Set Changelog Existence For Subsequent Jobs" + run: | + title="Changelog exists" + if [[ ${{ steps.set_existence.outputs.exists }} == true ]] + then + message="Changelog file ${{ steps.set_path.outputs.changelog_path }} already exists" + else + message="Changelog file ${{ steps.set_path.outputs.changelog_path }} doesn't exist" + fi + echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + + - name: "Spark safety check" + if: ${{ contains(github.repository, 'dbt-labs/dbt-spark') }} + run: | + if [[ ${{ steps.set_existence.outputs.exists }} != true ]] + then + title="Spark version-bump.yml check" + message="dbt-spark needs version-bump.yml run before running the release. The changelog is not up to date." + echo "::error title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + exit 1 + fi + + - name: "[DEBUG] Print Outputs" + run: | + echo changelog_path: ${{ steps.set_path.outputs.changelog_path }} + echo exists: ${{ steps.set_existence.outputs.exists }} + echo base_version: ${{ steps.semver.outputs.base-version }} + echo prerelease: ${{ steps.semver.outputs.pre-release }} + echo is_prerelease: ${{ steps.semver.outputs.is-pre-release }} + + audit-version-in-code: + runs-on: ubuntu-latest + + outputs: + up_to_date: ${{ steps.version-check.outputs.up_to_date }} + + steps: + - name: "Checkout ${{ github.repository }} Commit ${{ inputs.sha }}" + uses: actions/checkout@v4 + with: + ref: ${{ inputs.sha }} + + - name: "Check Current Version In Code" + id: version-check + run: | + is_updated=false + if grep -Fxq "current_version = ${{ inputs.version_number }}" .bumpversion.cfg + then + is_updated=true + fi + echo "up_to_date=$is_updated" >> $GITHUB_OUTPUT + + - name: "[Notification] Check Current Version In Code" + run: | + title="Version check" + if [[ ${{ steps.version-check.outputs.up_to_date }} == true ]] + then + message="The version in the codebase is equal to the provided version" + else + message="The version in the codebase differs from the provided version" + fi + echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + + - name: "Spark safety check" + if: ${{ contains(github.repository, 'dbt-labs/dbt-spark') }} + run: | + if [[ ${{ steps.version-check.outputs.up_to_date }} != true ]] + then + title="Spark version-bump.yml check" + message="dbt-spark needs version-bump.yml run before running the release. The version bump is not up to date." + echo "::error title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + exit 1 + fi + + - name: "[DEBUG] Print Outputs" + run: | + echo up_to_date: ${{ steps.version-check.outputs.up_to_date }} + + skip-generate-changelog: + runs-on: ubuntu-latest + needs: [audit-changelog] + if: needs.audit-changelog.outputs.exists == 'true' + + steps: + - name: "Changelog Exists, Skip Generating New Changelog" + run: | + # Send notification + title="Skip changelog generation" + message="A changelog file already exists at ${{ needs.audit-changelog.outputs.changelog_path }}, skipping generating changelog" + echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + + skip-version-bump: + runs-on: ubuntu-latest + needs: [audit-version-in-code] + if: needs.audit-version-in-code.outputs.up_to_date == 'true' + + steps: + - name: "Version Already Bumped" + run: | + # Send notification + title="Skip version bump" + message="The version has already been bumped to ${{ inputs.version_number }}, skipping version bump" + echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + + create-temp-branch: + runs-on: ubuntu-latest + needs: [audit-changelog, audit-version-in-code] + if: needs.audit-changelog.outputs.exists == 'false' || needs.audit-version-in-code.outputs.up_to_date == 'false' + + outputs: + branch_name: ${{ steps.variables.outputs.branch_name }} + + steps: + - name: "Checkout ${{ github.repository }} Commit ${{ inputs.sha }}" + uses: actions/checkout@v4 + with: + ref: ${{ inputs.sha }} + + - name: "Generate Branch Name" + id: variables + run: | + name="prep-release/" + if [[ ${{ inputs.nightly_release }} == true ]] + then + name+="nightly-release/" + elif [[ ${{ inputs.test_run }} == true ]] + then + name+="test-run/" + fi + name+="${{ inputs.version_number }}_$GITHUB_RUN_ID" + echo "branch_name=$name" >> $GITHUB_OUTPUT + + - name: "Create Branch - ${{ steps.variables.outputs.branch_name }}" + run: | + git checkout -b ${{ steps.variables.outputs.branch_name }} + git push -u origin ${{ steps.variables.outputs.branch_name }} + + - name: "[Notification] Temp branch created" + run: | + # Send notification + title="Temp branch generated" + message="The ${{ steps.variables.outputs.branch_name }} branch created" + echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + + - name: "[DEBUG] Print Outputs" + run: | + echo branch_name ${{ steps.variables.outputs.branch_name }} + + generate-changelog-bump-version: + runs-on: ubuntu-latest + needs: [audit-changelog, audit-version-in-code, create-temp-branch] + + steps: + - name: "Checkout ${{ github.repository }} Branch ${{ needs.create-temp-branch.outputs.branch_name }}" + uses: actions/checkout@v4 + with: + ref: ${{ needs.create-temp-branch.outputs.branch_name }} + + - name: "Install Spark Dependencies" + if: ${{ contains(github.repository, 'dbt-labs/dbt-spark') }} + run: | + sudo apt-get update + sudo apt-get install libsasl2-dev + + - name: "Add Homebrew To PATH" + run: | + echo "/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin" >> $GITHUB_PATH + + - name: "Install Homebrew Packages" + run: | + brew install pre-commit + brew tap miniscruff/changie https://github.com/miniscruff/changie + brew install changie + + - name: "Set json File Name" + id: json_file + run: | + echo "name=output_$GITHUB_RUN_ID.json" >> $GITHUB_OUTPUT + + - name: "Get Core Team Membership" + run: | + gh api -H "Accept: application/vnd.github+json" orgs/dbt-labs/teams/core-group/members > ${{ steps.json_file.outputs.name }} + env: + GH_TOKEN: ${{ secrets.IT_TEAM_MEMBERSHIP }} + + - name: "Set Core Team Membership for Changie Contributors exclusion" + id: set_team_membership + run: | + team_list=$(jq -r '.[].login' ${{ steps.json_file.outputs.name }}) + echo $team_list + team_list_single=$(echo $team_list | tr '\n' ' ') + echo "CHANGIE_CORE_TEAM=$team_list_single" >> $GITHUB_ENV + + - name: "Delete the json File" + run: | + rm ${{ steps.json_file.outputs.name }} + + - name: "Generate Release Changelog" + if: needs.audit-changelog.outputs.exists == 'false' + run: | + if [[ ${{ needs.audit-changelog.outputs.is_prerelease }} -eq 1 ]] + then + changie batch ${{ needs.audit-changelog.outputs.base_version }} --move-dir '${{ needs.audit-changelog.outputs.base_version }}' --prerelease ${{ needs.audit-changelog.outputs.prerelease }} + elif [[ -d ".changes/${{ needs.audit-changelog.outputs.base_version }}" ]] + then + changie batch ${{ needs.audit-changelog.outputs.base_version }} --include '${{ needs.audit-changelog.outputs.base_version }}' --remove-prereleases + else # releasing a final patch with no prereleases + changie batch ${{ needs.audit-changelog.outputs.base_version }} + fi + changie merge + git status + + - name: "Check Changelog Created Successfully" + if: needs.audit-changelog.outputs.exists == 'false' + run: | + title="Changelog" + if [[ -f ${{ needs.audit-changelog.outputs.changelog_path }} ]] + then + message="Changelog file created successfully" + echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + else + message="Changelog failed to generate" + echo "::error title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + exit 1 + fi + + - name: "Set up Python - ${{ env.PYTHON_TARGET_VERSION }}" + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_TARGET_VERSION }} + + - name: "Install Python Dependencies" + if: needs.audit-version-in-code.outputs.up_to_date == 'false' + run: | + python3 -m venv env + source env/bin/activate + python -m pip install --upgrade pip + + - name: "Bump Version To ${{ inputs.version_number }}" + if: needs.audit-version-in-code.outputs.up_to_date == 'false' + # note: bumpversion is no longer supported, it actually points to bump2version now + run: | + source env/bin/activate + if [ -f "editable-requirements.txt" ] + then + python -m pip install -r dev-requirements.txt -r editable-requirements.txt + else + python -m pip install -r dev-requirements.txt + fi + env/bin/bumpversion --allow-dirty --new-version ${{ inputs.version_number }} major + git status + + - name: "[Notification] Bump Version To ${{ inputs.version_number }}" + if: needs.audit-version-in-code.outputs.up_to_date == 'false' + run: | + title="Version bump" + message="Version successfully bumped in codebase to ${{ inputs.version_number }}" + echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + + # this step will fail on whitespace errors but also correct them + - name: "Remove Trailing Whitespace Via Pre-commit" + continue-on-error: true + run: | + pre-commit run trailing-whitespace --files .bumpversion.cfg CHANGELOG.md .changes/* + git status + + # this step will fail on newline errors but also correct them + - name: "Removing Extra Newlines Via Pre-commit" + continue-on-error: true + run: | + pre-commit run end-of-file-fixer --files .bumpversion.cfg CHANGELOG.md .changes/* + git status + + - name: "Commit & Push Changes" + run: | + #Data for commit + user="Github Build Bot" + email="buildbot@fishtownanalytics.com" + commit_message="Bumping version to ${{ inputs.version_number }} and generate changelog" + #Commit changes to branch + git config user.name "$user" + git config user.email "$email" + git pull + git add . + git commit -m "$commit_message" + git push + + run-unit-tests: + name: unit test / python ${{ matrix.python-version }} + + runs-on: ubuntu-latest + timeout-minutes: 10 + + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + + steps: + - name: Check out the repository + uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install python dependencies + run: | + sudo apt-get update + sudo apt-get install libsasl2-dev + python -m pip install --user --upgrade pip + python -m pip --version + python -m pip install -r requirements.txt + python -m pip install -r dev-requirements.txt + python -m pip install -e . + + - name: Run unit tests + run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit + + run-integration-tests: + name: ${{ matrix.test }} + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + test: + - "apache_spark" + - "spark_session" + - "databricks_sql_endpoint" + - "databricks_cluster" + - "databricks_http_cluster" + + env: + DBT_INVOCATION_ENV: github-actions + DD_CIVISIBILITY_AGENTLESS_ENABLED: true + DD_API_KEY: ${{ secrets.DATADOG_API_KEY }} + DD_SITE: datadoghq.com + DD_ENV: ci + DD_SERVICE: ${{ github.event.repository.name }} + DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }} + DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST_NAME }} + DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }} + DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }} + DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }} + DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com" + DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com" + DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com" + + steps: + - name: Check out the repository + if: github.event_name != 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + + # explicitly checkout the branch for the PR, + # this is necessary for the `pull_request` event + - name: Check out the repository (PR) + if: github.event_name == 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + ref: ${{ github.event.pull_request.head.sha }} + + # the python version used here is not what is used in the tests themselves + - name: Set up Python for dagger + uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: Install python dependencies + run: | + python -m pip install --user --upgrade pip + python -m pip --version + python -m pip install -r dagger/requirements.txt + + - name: Run tests for ${{ matrix.test }} + run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }} + + merge-changes-into-target-branch: + runs-on: ubuntu-latest + needs: [run-unit-tests, run-integration-tests, create-temp-branch, audit-version-in-code, audit-changelog] + if: | + !failure() && !cancelled() && + inputs.test_run == false && + ( + needs.audit-changelog.outputs.exists == 'false' || + needs.audit-version-in-code.outputs.up_to_date == 'false' + ) + + steps: + - name: "[Debug] Print Variables" + run: | + echo target_branch: ${{ inputs.target_branch }} + echo branch_name: ${{ needs.create-temp-branch.outputs.branch_name }} + echo inputs.test_run: ${{ inputs.test_run }} + echo needs.audit-changelog.outputs.exists: ${{ needs.audit-changelog.outputs.exists }} + echo needs.audit-version-in-code.outputs.up_to_date: ${{ needs.audit-version-in-code.outputs.up_to_date }} + + - name: "Checkout Repo ${{ github.repository }}" + uses: actions/checkout@v4 + + - name: "Merge Changes Into ${{ inputs.target_branch }}" + uses: everlytic/branch-merge@1.1.5 + with: + source_ref: ${{ needs.create-temp-branch.outputs.branch_name }} + target_branch: ${{ inputs.target_branch }} + github_token: ${{ secrets.FISHTOWN_BOT_PAT }} + commit_message_template: "[Automated] Merged {source_ref} into target {target_branch} during release process" + + - name: "[Notification] Changes Merged into ${{ inputs.target_branch }}" + run: | + title="Changelog and Version Bump Branch Merge" + message="The ${{ needs.create-temp-branch.outputs.branch_name }} branch was merged into ${{ inputs.target_branch }}" + echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + + determine-release-sha: + runs-on: ubuntu-latest + needs: + [ + create-temp-branch, + merge-changes-into-target-branch, + audit-changelog, + audit-version-in-code, + ] + # always run this job, regardless of if the dependant jobs were skipped + if: ${{ !failure() && !cancelled() }} + + # Get the sha that will be released. If the changelog already exists on the input sha and the version has already been bumped, + # then it is what we will release. Otherwise we generated a changelog and did the version bump in this workflow and there is a + # new sha to use from the merge we just did. Grab that here instead. + outputs: + final_sha: ${{ steps.resolve_commit_sha.outputs.release_sha }} + + steps: + - name: "[Debug] Print Variables" + run: | + echo target_branch: ${{ inputs.target_branch }} + echo new_branch: ${{ needs.create-temp-branch.outputs.branch_name }} + echo changelog_exists: ${{ needs.audit-changelog.outputs.exists }} + echo up_to_date: ${{ needs.audit-version-in-code.outputs.up_to_date }} + + - name: "Resolve Branch To Checkout" + id: resolve_branch + run: | + branch="" + if [[ ${{ inputs.test_run == true }} ]] + then + branch=${{ needs.create-temp-branch.outputs.branch_name }} + else + branch=${{ inputs.target_branch }} + fi + echo "target_branch=$branch" >> $GITHUB_OUTPUT + + - name: "[Notification] Resolve Branch To Checkout" + run: | + title="Branch pick" + message="The ${{ steps.resolve_branch.outputs.target_branch }} branch will be used for release" + echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + + - name: "Checkout Resolved Branch - ${{ steps.resolve_branch.outputs.target_branch }}" + uses: actions/checkout@v4 + with: + ref: ${{ steps.resolve_branch.outputs.target_branch }} + + - name: "[Debug] Log Branch" + run: git status + + - name: "Resolve Commit SHA For Release" + id: resolve_commit_sha + run: | + commit_sha="" + if [[ ${{ needs.audit-changelog.outputs.exists }} == false ]] || [[ ${{ needs.audit-version-in-code.outputs.up_to_date }} == false ]] + then + commit_sha=$(git rev-parse HEAD) + else + commit_sha=${{ inputs.sha }} + fi + echo "release_sha=$commit_sha" >> $GITHUB_OUTPUT + + - name: "[Notification] Resolve Commit SHA For Release" + run: | + title="Release commit pick" + message="The ${{ steps.resolve_commit_sha.outputs.release_sha }} commit will be used for release" + echo "::notice title=${{ env.NOTIFICATION_PREFIX }}: $title::$message" + + - name: "Remove Temp Branch - ${{ needs.create-temp-branch.outputs.branch_name }}" + if: ${{ inputs.test_run == false && needs.create-temp-branch.outputs.branch_name != '' }} + run: | + git push origin -d ${{ needs.create-temp-branch.outputs.branch_name }} + + - name: "[Debug] Print Outputs" + run: | + echo release_sha: ${{ steps.resolve_commit_sha.outputs.release_sha }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index df30809a9..9b2774f17 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -44,7 +44,7 @@ on: package_test_command: description: "Package test command" type: string - default: "dbt --version" + default: "python -c \"import dbt.adapters.spark\"" required: true env_setup_script_path: description: "Environment setup script path" @@ -89,7 +89,7 @@ jobs: audit-version-and-changelog: name: Bump package version, Generate changelog - uses: dbt-labs/dbt-release/.github/workflows/release-prep.yml@main + uses: dbt-labs/dbt-spark/.github/workflows/release-prep.yml@main with: sha: ${{ inputs.sha }} From a2c487111d57c9331620613ec83813ff6f0d7d96 Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Fri, 1 Mar 2024 17:53:49 -0800 Subject: [PATCH 13/44] add tox file so release workflows succeed (#996) * add tox file so release workflows succeed * add empty tox file --- tox.ini | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tox.ini diff --git a/tox.ini b/tox.ini new file mode 100644 index 000000000..62bb9c5b0 --- /dev/null +++ b/tox.ini @@ -0,0 +1,3 @@ +[tox] +skipsdist = True +envlist = unit, flake8, integration-spark-thrift From 748c7f615ec482cfd63eb05cbcf7b26f3b3655de Mon Sep 17 00:00:00 2001 From: Mila Page <67295367+VersusFacit@users.noreply.github.com> Date: Mon, 18 Mar 2024 18:15:56 -0700 Subject: [PATCH 14/44] Add workflow for spark and changelog (#998) Co-authored-by: Mila Page --- .../unreleased/Features-20240318-033621.yaml | 6 ++ .github/workflows/release-internal.yml | 64 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 .changes/unreleased/Features-20240318-033621.yaml create mode 100644 .github/workflows/release-internal.yml diff --git a/.changes/unreleased/Features-20240318-033621.yaml b/.changes/unreleased/Features-20240318-033621.yaml new file mode 100644 index 000000000..a1a1b9c5b --- /dev/null +++ b/.changes/unreleased/Features-20240318-033621.yaml @@ -0,0 +1,6 @@ +kind: Features +body: ': Add new workflow for internal patch releases' +time: 2024-03-18T03:36:21.634918-07:00 +custom: + Author: versusfacit + Issue: "38" diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml new file mode 100644 index 000000000..462e14819 --- /dev/null +++ b/.github/workflows/release-internal.yml @@ -0,0 +1,64 @@ +name: Release internal patch + +on: + workflow_dispatch: + inputs: + version_number: + description: "The release version number (i.e. 1.0.0b1)" + type: string + required: true + sha: + description: "The sha to use (leave empty to use latest on main)" + type: string + required: false + package_test_command: + description: "Package test command" + type: string + default: "python -c \"import dbt.adapters.spark\"" + required: true + dbms_name: + description: "The name of the warehouse the adapter connects to." + type: string + default: "spark" + required: true + workflow_call: + inputs: + version_number: + description: "The release version number (i.e. 1.0.0b1)" + type: string + required: true + sha: + description: "The sha to use (leave empty to use latest on main)" + type: string + required: false + package_test_command: + description: "Package test command" + type: string + default: "python -c \"import dbt.adapters.spark\"" + required: true + dbms_name: + description: "The name of the warehouse the adapter connects to." + type: string + default: "spark" + required: true + +defaults: + run: + shell: bash + +env: + PYTHON_TARGET_VERSION: 3.11 + +jobs: + invoke-reusable-workflow: + name: Build and Release Internally + + uses: VersusFacit/dbt-release/.github/workflows/internal-archive-release.yml@main + + with: + version_number: ${{ inputs.version_number }} + package_test_command: ${{ inputs.package_test_command }} + dbms_name: ${{ inputs.dbms_name }} + sha: ${{ inputs.sha }} + + secrets: inherit From 9fe1a0613dac7ccd717783b8d78081f7a3059560 Mon Sep 17 00:00:00 2001 From: Mila Page <67295367+VersusFacit@users.noreply.github.com> Date: Tue, 26 Mar 2024 17:15:52 -0700 Subject: [PATCH 15/44] Finish internal build workflow (#999) * Add workflow for spark * Shape up workflow. * Modify range of acceptable semvers to include a build tag. * Fix action name by making into a string * add tests to workflow * Change python version to match Cloud. * Pare down spark testing. * Change branch reference of workflow to main. --------- Co-authored-by: Mila Page --- .github/workflows/release-internal.yml | 153 +++++++++++++++++++------ setup.py | 3 +- 2 files changed, 118 insertions(+), 38 deletions(-) diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml index 462e14819..6334fd282 100644 --- a/.github/workflows/release-internal.yml +++ b/.github/workflows/release-internal.yml @@ -1,4 +1,16 @@ -name: Release internal patch +# What? +# +# Tag and release an arbitrary ref. Uploads to an internal archive for further processing. +# +# How? +# +# After checking out and testing the provided ref, the image is built and uploaded. +# +# When? +# +# Manual trigger. + +name: "Release internal patch" on: workflow_dispatch: @@ -7,58 +19,125 @@ on: description: "The release version number (i.e. 1.0.0b1)" type: string required: true - sha: - description: "The sha to use (leave empty to use latest on main)" - type: string - required: false - package_test_command: - description: "Package test command" - type: string - default: "python -c \"import dbt.adapters.spark\"" - required: true - dbms_name: - description: "The name of the warehouse the adapter connects to." + ref: + description: "The ref (sha or branch name) to use" type: string - default: "spark" + default: "main" required: true - workflow_call: - inputs: - version_number: - description: "The release version number (i.e. 1.0.0b1)" - type: string - required: true - sha: - description: "The sha to use (leave empty to use latest on main)" - type: string - required: false package_test_command: description: "Package test command" type: string default: "python -c \"import dbt.adapters.spark\"" required: true - dbms_name: - description: "The name of the warehouse the adapter connects to." - type: string - default: "spark" - required: true defaults: run: - shell: bash + shell: "bash" env: - PYTHON_TARGET_VERSION: 3.11 + PYTHON_TARGET_VERSION: 3.8 jobs: + run-unit-tests: + name: "Unit tests" + + runs-on: ubuntu-latest + timeout-minutes: 10 + + steps: + - name: "Check out the repository" + uses: actions/checkout@v3 + + - name: "Set up Python ${{ env.PYTHON_TARGET_VERSION }}" + uses: actions/setup-python@v4 + with: + python-version: "${{ env.PYTHON_TARGET_VERSION }}" + + - name: Install python dependencies + run: | + sudo apt-get update + sudo apt-get install libsasl2-dev + python -m pip install --user --upgrade pip + python -m pip --version + python -m pip install -r requirements.txt + python -m pip install -r dev-requirements.txt + python -m pip install -e . + + - name: Run unit tests + run: python -m pytest --color=yes --csv unit_results.csv -v tests/unit + + run-integration-tests: + name: "${{ matrix.test }}" + needs: [run-unit-tests] + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + test: + - "apache_spark" + - "spark_session" + - "databricks_sql_endpoint" + - "databricks_cluster" + - "databricks_http_cluster" + + env: + DBT_INVOCATION_ENV: github-actions + DD_CIVISIBILITY_AGENTLESS_ENABLED: true + DD_API_KEY: ${{ secrets.DATADOG_API_KEY }} + DD_SITE: datadoghq.com + DD_ENV: ci + DD_SERVICE: ${{ github.event.repository.name }} + DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }} + DBT_DATABRICKS_HOST_NAME: ${{ secrets.DBT_DATABRICKS_HOST_NAME }} + DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }} + DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }} + DBT_DATABRICKS_USER: ${{ secrets.DBT_DATABRICKS_USERNAME }} + DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com" + DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com" + DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com" + + steps: + - name: Check out the repository + if: github.event_name != 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + + # explicitly checkout the branch for the PR, + # this is necessary for the `pull_request` event + - name: Check out the repository (PR) + if: github.event_name == 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + ref: ${{ github.event.pull_request.head.ref }} + + # the python version used here is not what is used in the tests themselves + - name: Set up Python for dagger + uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: Install python dependencies + run: | + python -m pip install --user --upgrade pip + python -m pip --version + python -m pip install -r dagger/requirements.txt + + - name: "Run tests for ${{ matrix.test }}" + run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }} + invoke-reusable-workflow: - name: Build and Release Internally + name: "Build and Release Internally" + needs: [run-integration-tests] - uses: VersusFacit/dbt-release/.github/workflows/internal-archive-release.yml@main + uses: "dbt-labs/dbt-release/.github/workflows/internal-archive-release.yml@main" with: - version_number: ${{ inputs.version_number }} - package_test_command: ${{ inputs.package_test_command }} - dbms_name: ${{ inputs.dbms_name }} - sha: ${{ inputs.sha }} + version_number: "${{ inputs.version_number }}" + package_test_command: "${{ inputs.package_test_command }}" + dbms_name: "spark" + ref: "${{ inputs.ref }}" - secrets: inherit + secrets: "inherit" diff --git a/setup.py b/setup.py index 067ca41b1..48339e7c0 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,8 @@ def _get_plugin_version_dict(): _version_path = os.path.join(this_directory, "dbt", "adapters", "spark", "__version__.py") _semver = r"""(?P\d+)\.(?P\d+)\.(?P\d+)""" _pre = r"""((?Pa|b|rc)(?P
\d+))?"""
-    _version_pattern = rf"""version\s*=\s*["']{_semver}{_pre}["']"""
+    _build = r"""(\+build[0-9]+)?"""
+    _version_pattern = rf"""version\s*=\s*["']{_semver}{_pre}{_build}["']"""
     with open(_version_path) as f:
         match = re.search(_version_pattern, f.read().strip())
         if match is None:

From 7129f59e123dc572721840926860dd350105aab7 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Thu, 28 Mar 2024 13:44:15 -0400
Subject: [PATCH 16/44] Pin `black>=24.3` (#1000)

---
 .changes/unreleased/Security-20240327-193553.yaml | 6 ++++++
 dev-requirements.txt                              | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Security-20240327-193553.yaml

diff --git a/.changes/unreleased/Security-20240327-193553.yaml b/.changes/unreleased/Security-20240327-193553.yaml
new file mode 100644
index 000000000..daee50fd6
--- /dev/null
+++ b/.changes/unreleased/Security-20240327-193553.yaml
@@ -0,0 +1,6 @@
+kind: Security
+body: Pin `black>=24.3` in `dev-requirements.txt`
+time: 2024-03-27T19:35:53.102377-04:00
+custom:
+  Author: mikealfare
+  PR: "1000"
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 8f674d84b..26522411a 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -7,7 +7,7 @@ git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
 
 # if version 1.x or greater -> pin to major version
 # if version 0.x -> pin to minor
-black~=23.12
+black>=24.3
 bumpversion~=0.6.0
 click~=8.1
 flake8~=6.1;python_version>="3.8"

From 45b904cb06179f15c9b04c08b1bb7948ae3d5035 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Tue, 2 Apr 2024 14:39:48 -0400
Subject: [PATCH 17/44] Pin `beartype<0.18.0` (#1001)

* Pin `beartype<0.18.0`
---
 dagger/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dagger/requirements.txt b/dagger/requirements.txt
index b50c448d3..f150e3093 100644
--- a/dagger/requirements.txt
+++ b/dagger/requirements.txt
@@ -1,2 +1,3 @@
+beartype<0.18.0
 dagger-io~=0.9.7
 python-dotenv

From 377c65fb02f57001a52302291653ceef2081b97e Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 3 Apr 2024 15:45:13 -0400
Subject: [PATCH 18/44] Add `dbt-core~=1.8.0a1` as convenience dep (#1002)

* add `dbt-core~=1.8.0a1` as convenience dep
---
 .changes/unreleased/Dependencies-20240403-135436.yaml | 6 ++++++
 setup.py                                              | 2 ++
 2 files changed, 8 insertions(+)
 create mode 100644 .changes/unreleased/Dependencies-20240403-135436.yaml

diff --git a/.changes/unreleased/Dependencies-20240403-135436.yaml b/.changes/unreleased/Dependencies-20240403-135436.yaml
new file mode 100644
index 000000000..345b70740
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20240403-135436.yaml
@@ -0,0 +1,6 @@
+kind: Dependencies
+body: Add `dbt-core` as a dependency to preserve backwards compatibility for installation
+time: 2024-04-03T13:54:36.667724-04:00
+custom:
+  Author: mikealfare
+  PR: "1002"
diff --git a/setup.py b/setup.py
index 48339e7c0..325d31ccd 100644
--- a/setup.py
+++ b/setup.py
@@ -67,6 +67,8 @@ def _get_plugin_version_dict():
         "sqlparams>=3.0.0",
         "dbt-common>=0.1.0a1,<2.0",
         "dbt-adapters>=0.1.0a1,<2.0",
+        # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency
+        "dbt-core>=1.8.0a1",
     ],
     extras_require={
         "ODBC": odbc_extras,

From da885394070094b6c632b06dabf7a2a4a50e5c0b Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 3 Apr 2024 16:01:05 -0400
Subject: [PATCH 19/44] [create-pull-request] automated change (#1003)

Co-authored-by: Github Build Bot 
---
 .bumpversion.cfg                                |  2 +-
 .changes/1.8.0-b2.md                            | 13 +++++++++++++
 .../Dependencies-20240403-135436.yaml           |  0
 .../Features-20240318-033621.yaml               |  0
 .../Security-20240327-193553.yaml               |  0
 CHANGELOG.md                                    | 17 ++++++++++++++++-
 dbt/adapters/spark/__version__.py               |  2 +-
 setup.py                                        |  2 +-
 8 files changed, 32 insertions(+), 4 deletions(-)
 create mode 100644 .changes/1.8.0-b2.md
 rename .changes/{unreleased => 1.8.0}/Dependencies-20240403-135436.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Features-20240318-033621.yaml (100%)
 rename .changes/{unreleased => 1.8.0}/Security-20240327-193553.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 595914b21..aa22c8214 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.8.0b1
+current_version = 1.8.0b2
 parse = (?P[\d]+) # major version number
 	\.(?P[\d]+) # minor version number
 	\.(?P[\d]+) # patch version number
diff --git a/.changes/1.8.0-b2.md b/.changes/1.8.0-b2.md
new file mode 100644
index 000000000..806d61158
--- /dev/null
+++ b/.changes/1.8.0-b2.md
@@ -0,0 +1,13 @@
+## dbt-spark 1.8.0-b2 - April 03, 2024
+
+### Features
+
+- : Add new workflow for internal patch releases ([#38](https://github.com/dbt-labs/dbt-spark/issues/38))
+
+### Dependencies
+
+- Add `dbt-core` as a dependency to preserve backwards compatibility for installation ([#1002](https://github.com/dbt-labs/dbt-spark/pull/1002))
+
+### Security
+
+- Pin `black>=24.3` in `dev-requirements.txt` ([#1000](https://github.com/dbt-labs/dbt-spark/pull/1000))
diff --git a/.changes/unreleased/Dependencies-20240403-135436.yaml b/.changes/1.8.0/Dependencies-20240403-135436.yaml
similarity index 100%
rename from .changes/unreleased/Dependencies-20240403-135436.yaml
rename to .changes/1.8.0/Dependencies-20240403-135436.yaml
diff --git a/.changes/unreleased/Features-20240318-033621.yaml b/.changes/1.8.0/Features-20240318-033621.yaml
similarity index 100%
rename from .changes/unreleased/Features-20240318-033621.yaml
rename to .changes/1.8.0/Features-20240318-033621.yaml
diff --git a/.changes/unreleased/Security-20240327-193553.yaml b/.changes/1.8.0/Security-20240327-193553.yaml
similarity index 100%
rename from .changes/unreleased/Security-20240327-193553.yaml
rename to .changes/1.8.0/Security-20240327-193553.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d65c50be4..81f0575dd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,22 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
+## dbt-spark 1.8.0-b2 - April 03, 2024
+
+### Features
+
+- : Add new workflow for internal patch releases ([#38](https://github.com/dbt-labs/dbt-spark/issues/38))
+
+### Dependencies
+
+- Add `dbt-core` as a dependency to preserve backwards compatibility for installation ([#1002](https://github.com/dbt-labs/dbt-spark/pull/1002))
+
+### Security
+
+- Pin `black>=24.3` in `dev-requirements.txt` ([#1000](https://github.com/dbt-labs/dbt-spark/pull/1000))
+
+
+
 ## dbt-spark 1.8.0-b1 - March 01, 2024
 
 ### Features
@@ -45,7 +61,6 @@
 - [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
 - [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
 
-
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 6496f3e22..7d16c28f0 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.8.0b1"
+version = "1.8.0b2"
diff --git a/setup.py b/setup.py
index 325d31ccd..55112e3f2 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def _get_plugin_version_dict():
 
 
 package_name = "dbt-spark"
-package_version = "1.8.0b1"
+package_version = "1.8.0b2"
 description = """The Apache Spark adapter plugin for dbt"""
 
 odbc_extras = ["pyodbc~=4.0.39"]

From 0646c2403b97be2f83e70cac7233f22b35d496d7 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Thu, 11 Apr 2024 16:09:34 -0500
Subject: [PATCH 20/44] test name change (#1005)

---
 tests/functional/adapter/dbt_clone/test_dbt_clone.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/functional/adapter/dbt_clone/test_dbt_clone.py b/tests/functional/adapter/dbt_clone/test_dbt_clone.py
index a5e8d70e0..80e919a24 100644
--- a/tests/functional/adapter/dbt_clone/test_dbt_clone.py
+++ b/tests/functional/adapter/dbt_clone/test_dbt_clone.py
@@ -15,7 +15,7 @@
 
 
 @pytest.mark.skip_profile("apache_spark", "spark_session")
-class TestSparkBigqueryClonePossible(BaseClonePossible):
+class TestSparkClonePossible(BaseClonePossible):
     @pytest.fixture(scope="class")
     def models(self):
         return {

From e93deea64800d9c60a014d5e2a366b19ef630e17 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Thu, 18 Apr 2024 22:47:20 -0400
Subject: [PATCH 21/44] Update dependabot config to cover GHA and Docker
 (#1006)

* Update dependabot config to cover GHA and Docker
---
 .../unreleased/Under the Hood-20240410-183535.yaml    |  6 ++++++
 .github/dependabot.yml                                | 11 ++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Under the Hood-20240410-183535.yaml

diff --git a/.changes/unreleased/Under the Hood-20240410-183535.yaml b/.changes/unreleased/Under the Hood-20240410-183535.yaml
new file mode 100644
index 000000000..54b69f285
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20240410-183535.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Update dependabot config to cover GHA and Docker
+time: 2024-04-10T18:35:35.093246-04:00
+custom:
+  Author: mikealfare
+  Issue: "1006"
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 2a6f34492..ae2be43aa 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,8 +1,17 @@
 version: 2
 updates:
-  # python dependencies
   - package-ecosystem: "pip"
     directory: "/"
     schedule:
       interval: "daily"
     rebase-strategy: "disabled"
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    rebase-strategy: "disabled"
+  - package-ecosystem: "docker"
+    directory: "/docker"
+    schedule:
+      interval: "weekly"
+    rebase-strategy: "disabled"

From 080ba71b2689a67d9195dcae2cb6e9b0116e76dc Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 19 Apr 2024 12:48:09 -0400
Subject: [PATCH 22/44] add pre-commit check for dbt-core (#1015)

---
 .pre-commit-config.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5e7fdbd04..882a32769 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,6 +14,10 @@ repos:
   - id: end-of-file-fixer
   - id: trailing-whitespace
   - id: check-case-conflict
+- repo: https://github.com/dbt-labs/pre-commit-hooks
+  rev: v0.1.0a1
+  hooks:
+  - id: dbt-core-in-adapters-check
 - repo: https://github.com/psf/black
   rev: 23.1.0
   hooks:

From c5742f225f20938f16c4ce95ba7e71d6874a198c Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Thu, 25 Apr 2024 10:20:44 -0400
Subject: [PATCH 23/44] pin macos test runners to macos-12 (#1016)

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 68911710f..189a8847c 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -173,7 +173,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, macos-12, windows-latest]
         python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:

From c0c3f4f0f81004d49ab7aa7209aae096d6b8eaea Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 25 Apr 2024 23:48:22 +0000
Subject: [PATCH 24/44] Bump actions/checkout from 3 to 4 (#1014)

* Bump actions/checkout from 3 to 4

Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v3...v4)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] 

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot 
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20240419-024916.yaml | 6 ++++++
 .github/workflows/integration.yml                     | 4 ++--
 .github/workflows/main.yml                            | 6 +++---
 .github/workflows/release-internal.yml                | 6 +++---
 .github/workflows/release-prep.yml                    | 6 +++---
 5 files changed, 17 insertions(+), 11 deletions(-)
 create mode 100644 .changes/unreleased/Dependencies-20240419-024916.yaml

diff --git a/.changes/unreleased/Dependencies-20240419-024916.yaml b/.changes/unreleased/Dependencies-20240419-024916.yaml
new file mode 100644
index 000000000..1ef46465e
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20240419-024916.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump actions/checkout from 3 to 4"
+time: 2024-04-19T02:49:16.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 1014
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 94dece350..6e5f42af7 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -77,7 +77,7 @@ jobs:
     steps:
       - name: Check out the repository
         if: github.event_name != 'pull_request_target'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
 
@@ -85,7 +85,7 @@ jobs:
       # this is necessary for the `pull_request` event
       - name: Check out the repository (PR)
         if: github.event_name == 'pull_request_target'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
           ref: ${{ github.event.pull_request.head.sha }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 189a8847c..ebd0a3bd7 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -42,7 +42,7 @@ jobs:
 
     steps:
       - name: Check out the repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
 
@@ -82,7 +82,7 @@ jobs:
 
     steps:
       - name: Check out the repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
@@ -123,7 +123,7 @@ jobs:
 
     steps:
       - name: Check out the repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set up Python
         uses: actions/setup-python@v4
diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
index 6334fd282..d8b1e4592 100644
--- a/.github/workflows/release-internal.yml
+++ b/.github/workflows/release-internal.yml
@@ -46,7 +46,7 @@ jobs:
 
     steps:
       - name: "Check out the repository"
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: "Set up Python ${{ env.PYTHON_TARGET_VERSION }}"
         uses: actions/setup-python@v4
@@ -100,7 +100,7 @@ jobs:
     steps:
       - name: Check out the repository
         if: github.event_name != 'pull_request_target'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
 
@@ -108,7 +108,7 @@ jobs:
       # this is necessary for the `pull_request` event
       - name: Check out the repository (PR)
         if: github.event_name == 'pull_request_target'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
           ref: ${{ github.event.pull_request.head.ref }}
diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml
index 1a6e450c4..e214de828 100644
--- a/.github/workflows/release-prep.yml
+++ b/.github/workflows/release-prep.yml
@@ -452,7 +452,7 @@ jobs:
 
     steps:
       - name: Check out the repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
@@ -505,7 +505,7 @@ jobs:
     steps:
       - name: Check out the repository
         if: github.event_name != 'pull_request_target'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
 
@@ -513,7 +513,7 @@ jobs:
       # this is necessary for the `pull_request` event
       - name: Check out the repository (PR)
         if: github.event_name == 'pull_request_target'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           persist-credentials: false
           ref: ${{ github.event.pull_request.head.sha }}

From 8dfd12c16b6d0f2a6abf2213dc33cb7cf61ce45c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 25 Apr 2024 23:59:09 +0000
Subject: [PATCH 25/44] Bump actions/setup-python from 4 to 5 (#1013)

* Bump actions/setup-python from 4 to 5

Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4 to 5.
- [Release notes](https://github.com/actions/setup-python/releases)
- [Commits](https://github.com/actions/setup-python/compare/v4...v5)

---
updated-dependencies:
- dependency-name: actions/setup-python
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] 

* Add automated changelog yaml from template for bot PR

---------

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot 
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .changes/unreleased/Dependencies-20240419-024912.yaml | 6 ++++++
 .github/workflows/integration.yml                     | 2 +-
 .github/workflows/main.yml                            | 8 ++++----
 .github/workflows/release-internal.yml                | 4 ++--
 .github/workflows/release-prep.yml                    | 4 ++--
 5 files changed, 15 insertions(+), 9 deletions(-)
 create mode 100644 .changes/unreleased/Dependencies-20240419-024912.yaml

diff --git a/.changes/unreleased/Dependencies-20240419-024912.yaml b/.changes/unreleased/Dependencies-20240419-024912.yaml
new file mode 100644
index 000000000..ea5b75aa9
--- /dev/null
+++ b/.changes/unreleased/Dependencies-20240419-024912.yaml
@@ -0,0 +1,6 @@
+kind: "Dependencies"
+body: "Bump actions/setup-python from 4 to 5"
+time: 2024-04-19T02:49:12.00000Z
+custom:
+  Author: dependabot[bot]
+  PR: 1013
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 6e5f42af7..e82a83c4d 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -92,7 +92,7 @@ jobs:
 
       # the python version used here is not what is used in the tests themselves
       - name: Set up Python for dagger
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index ebd0a3bd7..c71df970a 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -47,7 +47,7 @@ jobs:
           persist-credentials: false
 
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.8'
 
@@ -85,7 +85,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
@@ -126,7 +126,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.8'
 
@@ -178,7 +178,7 @@ jobs:
 
     steps:
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
index d8b1e4592..eb892415c 100644
--- a/.github/workflows/release-internal.yml
+++ b/.github/workflows/release-internal.yml
@@ -49,7 +49,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: "Set up Python ${{ env.PYTHON_TARGET_VERSION }}"
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "${{ env.PYTHON_TARGET_VERSION }}"
 
@@ -115,7 +115,7 @@ jobs:
 
       # the python version used here is not what is used in the tests themselves
       - name: Set up Python for dagger
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 
diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml
index e214de828..0061a8602 100644
--- a/.github/workflows/release-prep.yml
+++ b/.github/workflows/release-prep.yml
@@ -455,7 +455,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
@@ -520,7 +520,7 @@ jobs:
 
       # the python version used here is not what is used in the tests themselves
       - name: Set up Python for dagger
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 

From 804567cea3f64af2a3b19bb51db76b637ed38ae5 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 3 May 2024 15:36:45 -0400
Subject: [PATCH 26/44] skip tests on `main` to unblock CI, create issues to
 fix them (#1034)

---
 tests/functional/adapter/test_constraints.py  | 11 +++++++++++
 tests/functional/adapter/test_python_model.py |  7 +++++++
 2 files changed, 18 insertions(+)

diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index 41b50ef81..654bbdb5f 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -358,6 +358,17 @@ def models(self):
             "constraints_schema.yml": constraints_yml,
         }
 
+    @pytest.mark.skip(
+        "Databricks now raises an exception, which gets raised prior to the `expected_pass` check."
+        "See https://github.com/dbt-labs/dbt-spark/issues/1009"
+    )
+    def test__constraints_enforcement_rollback(
+        self, project, expected_color, expected_error_messages, null_model_sql
+    ):
+        super().test__constraints_enforcement_rollback(
+            project, expected_color, expected_error_messages, null_model_sql
+        )
+
 
 # TODO: Like the tests above, this does test that model-level constraints don't
 # result in errors, but it does not verify that they are actually present in
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 1195cbd3e..05e25c5f4 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -24,6 +24,13 @@ class TestPythonIncrementalModelSpark(BasePythonIncrementalTests):
     def project_config_update(self):
         return {}
 
+    @pytest.mark.skip(
+        "Databricks can't find the transaction log"
+        "See https://github.com/dbt-labs/dbt-spark/issues/1033"
+    )
+    def test_incremental(self, project):
+        super().test_incremental(project)
+
 
 models__simple_python_model = """
 import pandas

From bec191158fa3a5586c2a8f2005ac3619d3e3b3a9 Mon Sep 17 00:00:00 2001
From: FishtownBuildBot <77737458+FishtownBuildBot@users.noreply.github.com>
Date: Mon, 6 May 2024 17:42:37 -0400
Subject: [PATCH 27/44] Cleanup main after cutting new 1.8.latest branch
 (#1032)

* Clean up changelog on main

* Bumping version to 1.9.0a1

* Code quality cleanup

---------

Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 .bumpversion.cfg                              |  2 +-
 .changes/1.8.0-b1.md                          | 39 -------------
 .changes/1.8.0-b2.md                          | 13 -----
 .../1.8.0/Dependencies-20231009-220732.yaml   |  6 --
 .../1.8.0/Dependencies-20231010-222853.yaml   |  6 --
 .../1.8.0/Dependencies-20231010-222910.yaml   |  6 --
 .../1.8.0/Dependencies-20231013-223750.yaml   |  6 --
 .../1.8.0/Dependencies-20231027-230251.yaml   |  6 --
 .../1.8.0/Dependencies-20231027-230254.yaml   |  6 --
 .../1.8.0/Dependencies-20231027-230301.yaml   |  6 --
 .../1.8.0/Dependencies-20231108-222326.yaml   |  6 --
 .../1.8.0/Dependencies-20231110-224056.yaml   |  6 --
 .../1.8.0/Dependencies-20231113-224111.yaml   |  6 --
 .../1.8.0/Dependencies-20231127-220733.yaml   |  6 --
 .../1.8.0/Dependencies-20231127-220737.yaml   |  6 --
 .../1.8.0/Dependencies-20231127-220741.yaml   |  6 --
 .../1.8.0/Dependencies-20231204-224210.yaml   |  6 --
 .../1.8.0/Dependencies-20231212-223929.yaml   |  6 --
 .../1.8.0/Dependencies-20240403-135436.yaml   |  6 --
 .changes/1.8.0/Features-20240220-195925.yaml  |  6 --
 .changes/1.8.0/Features-20240318-033621.yaml  |  6 --
 .changes/1.8.0/Fixes-20231107-134141.yaml     |  6 --
 .changes/1.8.0/Fixes-20231221-081949.yaml     |  6 --
 .changes/1.8.0/Security-20240327-193553.yaml  |  6 --
 .../1.8.0/Under the Hood-20230929-161218.yaml |  6 --
 .../1.8.0/Under the Hood-20231119-132050.yaml |  6 --
 .../1.8.0/Under the Hood-20231214-134728.yaml |  6 --
 .../1.8.0/Under the Hood-20240111-114806.yaml |  6 --
 .../Dependencies-20240419-024912.yaml         |  6 --
 .../Dependencies-20240419-024916.yaml         |  6 --
 .../Under the Hood-20240410-183535.yaml       |  6 --
 CHANGELOG.md                                  | 56 -------------------
 dbt/adapters/spark/__version__.py             |  2 +-
 setup.py                                      |  2 +-
 34 files changed, 3 insertions(+), 279 deletions(-)
 delete mode 100644 .changes/1.8.0-b1.md
 delete mode 100644 .changes/1.8.0-b2.md
 delete mode 100644 .changes/1.8.0/Dependencies-20231009-220732.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231010-222853.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231010-222910.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231013-223750.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231027-230251.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231027-230254.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231027-230301.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231108-222326.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231110-224056.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231113-224111.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231127-220733.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231127-220737.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231127-220741.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231204-224210.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20231212-223929.yaml
 delete mode 100644 .changes/1.8.0/Dependencies-20240403-135436.yaml
 delete mode 100644 .changes/1.8.0/Features-20240220-195925.yaml
 delete mode 100644 .changes/1.8.0/Features-20240318-033621.yaml
 delete mode 100644 .changes/1.8.0/Fixes-20231107-134141.yaml
 delete mode 100644 .changes/1.8.0/Fixes-20231221-081949.yaml
 delete mode 100644 .changes/1.8.0/Security-20240327-193553.yaml
 delete mode 100644 .changes/1.8.0/Under the Hood-20230929-161218.yaml
 delete mode 100644 .changes/1.8.0/Under the Hood-20231119-132050.yaml
 delete mode 100644 .changes/1.8.0/Under the Hood-20231214-134728.yaml
 delete mode 100644 .changes/1.8.0/Under the Hood-20240111-114806.yaml
 delete mode 100644 .changes/unreleased/Dependencies-20240419-024912.yaml
 delete mode 100644 .changes/unreleased/Dependencies-20240419-024916.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20240410-183535.yaml

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index aa22c8214..77a3f463f 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.8.0b2
+current_version = 1.9.0a1
 parse = (?P[\d]+) # major version number
 	\.(?P[\d]+) # minor version number
 	\.(?P[\d]+) # patch version number
diff --git a/.changes/1.8.0-b1.md b/.changes/1.8.0-b1.md
deleted file mode 100644
index 4f4091a91..000000000
--- a/.changes/1.8.0-b1.md
+++ /dev/null
@@ -1,39 +0,0 @@
-## dbt-spark 1.8.0-b1 - March 01, 2024
-
-### Features
-
-- Implement spark__safe_cast and add functional tests for unit testing ([#987](https://github.com/dbt-labs/dbt-spark/issues/987))
-
-### Fixes
-
-- Support new agate Integer type and empty seed test ([#935](https://github.com/dbt-labs/dbt-spark/issues/935))
-- Fix hardcoded file format for python models ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
-
-### Under the Hood
-
-- Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
-- Add tests for --empty flag ([#949](https://github.com/dbt-labs/dbt-spark/issues/949))
-- Remove unused `invalid_insert_overwrite_delta_msg` message ([#962](https://github.com/dbt-labs/dbt-spark/issues/962))
-- Update import paths and list_relations to support decoupling adapters/core ([#972](https://github.com/dbt-labs/dbt-spark/issues/972))
-
-### Dependencies
-
-- Update pre-commit-hooks requirement from ~=4.4 to ~=4.5 ([#903](https://github.com/dbt-labs/dbt-spark/pull/903))
-- Bump mypy from 1.5.1 to 1.6.0 ([#904](https://github.com/dbt-labs/dbt-spark/pull/904))
-- Update pyodbc requirement from ~=4.0.39 to ~=5.0.0 ([#905](https://github.com/dbt-labs/dbt-spark/pull/905))
-- Update pre-commit requirement from ~=3.4 to ~=3.5 ([#914](https://github.com/dbt-labs/dbt-spark/pull/914))
-- Update pyodbc requirement from ~=5.0.0 to ~=5.0.1 ([#925](https://github.com/dbt-labs/dbt-spark/pull/925))
-- Bump mypy from 1.6.0 to 1.6.1 ([#926](https://github.com/dbt-labs/dbt-spark/pull/926))
-- Update black requirement from ~=23.9 to ~=23.10 ([#927](https://github.com/dbt-labs/dbt-spark/pull/927))
-- Update black requirement from ~=23.10 to ~=23.11 ([#942](https://github.com/dbt-labs/dbt-spark/pull/942))
-- Bump mypy from 1.6.1 to 1.7.0 ([#946](https://github.com/dbt-labs/dbt-spark/pull/946))
-- Update pytest-xdist requirement from ~=3.3 to ~=3.4 ([#947](https://github.com/dbt-labs/dbt-spark/pull/947))
-- Update pytest-xdist requirement from ~=3.4 to ~=3.5 ([#951](https://github.com/dbt-labs/dbt-spark/pull/951))
-- Update wheel requirement from ~=0.41 to ~=0.42 ([#952](https://github.com/dbt-labs/dbt-spark/pull/952))
-- Bump mypy from 1.7.0 to 1.7.1 ([#953](https://github.com/dbt-labs/dbt-spark/pull/953))
-- Update freezegun requirement from ~=1.2 to ~=1.3 ([#956](https://github.com/dbt-labs/dbt-spark/pull/956))
-- Update black requirement from ~=23.11 to ~=23.12 ([#959](https://github.com/dbt-labs/dbt-spark/pull/959))
-
-### Contributors
-- [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
-- [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
diff --git a/.changes/1.8.0-b2.md b/.changes/1.8.0-b2.md
deleted file mode 100644
index 806d61158..000000000
--- a/.changes/1.8.0-b2.md
+++ /dev/null
@@ -1,13 +0,0 @@
-## dbt-spark 1.8.0-b2 - April 03, 2024
-
-### Features
-
-- : Add new workflow for internal patch releases ([#38](https://github.com/dbt-labs/dbt-spark/issues/38))
-
-### Dependencies
-
-- Add `dbt-core` as a dependency to preserve backwards compatibility for installation ([#1002](https://github.com/dbt-labs/dbt-spark/pull/1002))
-
-### Security
-
-- Pin `black>=24.3` in `dev-requirements.txt` ([#1000](https://github.com/dbt-labs/dbt-spark/pull/1000))
diff --git a/.changes/1.8.0/Dependencies-20231009-220732.yaml b/.changes/1.8.0/Dependencies-20231009-220732.yaml
deleted file mode 100644
index 4b9073fae..000000000
--- a/.changes/1.8.0/Dependencies-20231009-220732.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pre-commit-hooks requirement from ~=4.4 to ~=4.5"
-time: 2023-10-09T22:07:32.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 903
diff --git a/.changes/1.8.0/Dependencies-20231010-222853.yaml b/.changes/1.8.0/Dependencies-20231010-222853.yaml
deleted file mode 100644
index 43b481edb..000000000
--- a/.changes/1.8.0/Dependencies-20231010-222853.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.5.1 to 1.6.0"
-time: 2023-10-10T22:28:53.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 904
diff --git a/.changes/1.8.0/Dependencies-20231010-222910.yaml b/.changes/1.8.0/Dependencies-20231010-222910.yaml
deleted file mode 100644
index c6dbe582a..000000000
--- a/.changes/1.8.0/Dependencies-20231010-222910.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pyodbc requirement from ~=4.0.39 to ~=5.0.0"
-time: 2023-10-10T22:29:10.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 905
diff --git a/.changes/1.8.0/Dependencies-20231013-223750.yaml b/.changes/1.8.0/Dependencies-20231013-223750.yaml
deleted file mode 100644
index 2cea8c6dd..000000000
--- a/.changes/1.8.0/Dependencies-20231013-223750.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pre-commit requirement from ~=3.4 to ~=3.5"
-time: 2023-10-13T22:37:50.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 914
diff --git a/.changes/1.8.0/Dependencies-20231027-230251.yaml b/.changes/1.8.0/Dependencies-20231027-230251.yaml
deleted file mode 100644
index 4ad0d65f8..000000000
--- a/.changes/1.8.0/Dependencies-20231027-230251.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pyodbc requirement from ~=5.0.0 to ~=5.0.1"
-time: 2023-10-27T23:02:51.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 925
diff --git a/.changes/1.8.0/Dependencies-20231027-230254.yaml b/.changes/1.8.0/Dependencies-20231027-230254.yaml
deleted file mode 100644
index f1b745210..000000000
--- a/.changes/1.8.0/Dependencies-20231027-230254.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.6.0 to 1.6.1"
-time: 2023-10-27T23:02:54.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 926
diff --git a/.changes/1.8.0/Dependencies-20231027-230301.yaml b/.changes/1.8.0/Dependencies-20231027-230301.yaml
deleted file mode 100644
index 74548ddea..000000000
--- a/.changes/1.8.0/Dependencies-20231027-230301.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update black requirement from ~=23.9 to ~=23.10"
-time: 2023-10-27T23:03:01.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 927
diff --git a/.changes/1.8.0/Dependencies-20231108-222326.yaml b/.changes/1.8.0/Dependencies-20231108-222326.yaml
deleted file mode 100644
index fdd35004a..000000000
--- a/.changes/1.8.0/Dependencies-20231108-222326.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update black requirement from ~=23.10 to ~=23.11"
-time: 2023-11-08T22:23:26.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 942
diff --git a/.changes/1.8.0/Dependencies-20231110-224056.yaml b/.changes/1.8.0/Dependencies-20231110-224056.yaml
deleted file mode 100644
index efdf9a549..000000000
--- a/.changes/1.8.0/Dependencies-20231110-224056.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.6.1 to 1.7.0"
-time: 2023-11-10T22:40:56.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 946
diff --git a/.changes/1.8.0/Dependencies-20231113-224111.yaml b/.changes/1.8.0/Dependencies-20231113-224111.yaml
deleted file mode 100644
index b2fb306c7..000000000
--- a/.changes/1.8.0/Dependencies-20231113-224111.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pytest-xdist requirement from ~=3.3 to ~=3.4"
-time: 2023-11-13T22:41:11.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 947
diff --git a/.changes/1.8.0/Dependencies-20231127-220733.yaml b/.changes/1.8.0/Dependencies-20231127-220733.yaml
deleted file mode 100644
index be23a48f7..000000000
--- a/.changes/1.8.0/Dependencies-20231127-220733.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update pytest-xdist requirement from ~=3.4 to ~=3.5"
-time: 2023-11-27T22:07:33.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 951
diff --git a/.changes/1.8.0/Dependencies-20231127-220737.yaml b/.changes/1.8.0/Dependencies-20231127-220737.yaml
deleted file mode 100644
index 60e2be67f..000000000
--- a/.changes/1.8.0/Dependencies-20231127-220737.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update wheel requirement from ~=0.41 to ~=0.42"
-time: 2023-11-27T22:07:37.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 952
diff --git a/.changes/1.8.0/Dependencies-20231127-220741.yaml b/.changes/1.8.0/Dependencies-20231127-220741.yaml
deleted file mode 100644
index 63d572f2b..000000000
--- a/.changes/1.8.0/Dependencies-20231127-220741.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump mypy from 1.7.0 to 1.7.1"
-time: 2023-11-27T22:07:41.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 953
diff --git a/.changes/1.8.0/Dependencies-20231204-224210.yaml b/.changes/1.8.0/Dependencies-20231204-224210.yaml
deleted file mode 100644
index c415934db..000000000
--- a/.changes/1.8.0/Dependencies-20231204-224210.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update freezegun requirement from ~=1.2 to ~=1.3"
-time: 2023-12-04T22:42:10.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 956
diff --git a/.changes/1.8.0/Dependencies-20231212-223929.yaml b/.changes/1.8.0/Dependencies-20231212-223929.yaml
deleted file mode 100644
index 65f308f75..000000000
--- a/.changes/1.8.0/Dependencies-20231212-223929.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Update black requirement from ~=23.11 to ~=23.12"
-time: 2023-12-12T22:39:29.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 959
diff --git a/.changes/1.8.0/Dependencies-20240403-135436.yaml b/.changes/1.8.0/Dependencies-20240403-135436.yaml
deleted file mode 100644
index 345b70740..000000000
--- a/.changes/1.8.0/Dependencies-20240403-135436.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Dependencies
-body: Add `dbt-core` as a dependency to preserve backwards compatibility for installation
-time: 2024-04-03T13:54:36.667724-04:00
-custom:
-  Author: mikealfare
-  PR: "1002"
diff --git a/.changes/1.8.0/Features-20240220-195925.yaml b/.changes/1.8.0/Features-20240220-195925.yaml
deleted file mode 100644
index c5d86ab7c..000000000
--- a/.changes/1.8.0/Features-20240220-195925.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: Implement spark__safe_cast and add functional tests for unit testing
-time: 2024-02-20T19:59:25.907821-05:00
-custom:
-  Author: michelleark
-  Issue: "987"
diff --git a/.changes/1.8.0/Features-20240318-033621.yaml b/.changes/1.8.0/Features-20240318-033621.yaml
deleted file mode 100644
index a1a1b9c5b..000000000
--- a/.changes/1.8.0/Features-20240318-033621.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Features
-body: ': Add new workflow for internal patch releases'
-time: 2024-03-18T03:36:21.634918-07:00
-custom:
-  Author: versusfacit
-  Issue: "38"
diff --git a/.changes/1.8.0/Fixes-20231107-134141.yaml b/.changes/1.8.0/Fixes-20231107-134141.yaml
deleted file mode 100644
index d4f56eaca..000000000
--- a/.changes/1.8.0/Fixes-20231107-134141.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: Support new agate Integer type and empty seed test
-time: 2023-11-07T13:41:41.033441-05:00
-custom:
-  Author: gshank
-  Issue: "935"
diff --git a/.changes/1.8.0/Fixes-20231221-081949.yaml b/.changes/1.8.0/Fixes-20231221-081949.yaml
deleted file mode 100644
index b10c8141e..000000000
--- a/.changes/1.8.0/Fixes-20231221-081949.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Fixes
-body: Fix hardcoded file format for python models
-time: 2023-12-21T08:19:49.630806+02:00
-custom:
-  Author: ben-schreiber
-  Issue: "803"
diff --git a/.changes/1.8.0/Security-20240327-193553.yaml b/.changes/1.8.0/Security-20240327-193553.yaml
deleted file mode 100644
index daee50fd6..000000000
--- a/.changes/1.8.0/Security-20240327-193553.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Security
-body: Pin `black>=24.3` in `dev-requirements.txt`
-time: 2024-03-27T19:35:53.102377-04:00
-custom:
-  Author: mikealfare
-  PR: "1000"
diff --git a/.changes/1.8.0/Under the Hood-20230929-161218.yaml b/.changes/1.8.0/Under the Hood-20230929-161218.yaml
deleted file mode 100644
index 9b5c6818b..000000000
--- a/.changes/1.8.0/Under the Hood-20230929-161218.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow.
-time: 2023-09-29T16:12:18.968755+02:00
-custom:
-  Author: JCZuurmond, colin-rogers-dbt
-  Issue: "719"
diff --git a/.changes/1.8.0/Under the Hood-20231119-132050.yaml b/.changes/1.8.0/Under the Hood-20231119-132050.yaml
deleted file mode 100644
index 61c7f8ab8..000000000
--- a/.changes/1.8.0/Under the Hood-20231119-132050.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Add tests for --empty flag
-time: 2023-11-19T13:20:50.076459-05:00
-custom:
-  Author: michelleark
-  Issue: "949"
diff --git a/.changes/1.8.0/Under the Hood-20231214-134728.yaml b/.changes/1.8.0/Under the Hood-20231214-134728.yaml
deleted file mode 100644
index b1de2ddb7..000000000
--- a/.changes/1.8.0/Under the Hood-20231214-134728.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Remove unused `invalid_insert_overwrite_delta_msg` message
-time: 2023-12-14T13:47:28.444107-07:00
-custom:
-  Author: dbeatty10
-  Issue: "962"
diff --git a/.changes/1.8.0/Under the Hood-20240111-114806.yaml b/.changes/1.8.0/Under the Hood-20240111-114806.yaml
deleted file mode 100644
index 31705f468..000000000
--- a/.changes/1.8.0/Under the Hood-20240111-114806.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Update import paths and list_relations to support decoupling adapters/core
-time: 2024-01-11T11:48:06.120111-08:00
-custom:
-  Author: colin-rogers-dbt
-  Issue: "972"
diff --git a/.changes/unreleased/Dependencies-20240419-024912.yaml b/.changes/unreleased/Dependencies-20240419-024912.yaml
deleted file mode 100644
index ea5b75aa9..000000000
--- a/.changes/unreleased/Dependencies-20240419-024912.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump actions/setup-python from 4 to 5"
-time: 2024-04-19T02:49:12.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 1013
diff --git a/.changes/unreleased/Dependencies-20240419-024916.yaml b/.changes/unreleased/Dependencies-20240419-024916.yaml
deleted file mode 100644
index 1ef46465e..000000000
--- a/.changes/unreleased/Dependencies-20240419-024916.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: "Dependencies"
-body: "Bump actions/checkout from 3 to 4"
-time: 2024-04-19T02:49:16.00000Z
-custom:
-  Author: dependabot[bot]
-  PR: 1014
diff --git a/.changes/unreleased/Under the Hood-20240410-183535.yaml b/.changes/unreleased/Under the Hood-20240410-183535.yaml
deleted file mode 100644
index 54b69f285..000000000
--- a/.changes/unreleased/Under the Hood-20240410-183535.yaml	
+++ /dev/null
@@ -1,6 +0,0 @@
-kind: Under the Hood
-body: Update dependabot config to cover GHA and Docker
-time: 2024-04-10T18:35:35.093246-04:00
-custom:
-  Author: mikealfare
-  Issue: "1006"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 81f0575dd..902db37fc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,62 +5,6 @@
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
-## dbt-spark 1.8.0-b2 - April 03, 2024
-
-### Features
-
-- : Add new workflow for internal patch releases ([#38](https://github.com/dbt-labs/dbt-spark/issues/38))
-
-### Dependencies
-
-- Add `dbt-core` as a dependency to preserve backwards compatibility for installation ([#1002](https://github.com/dbt-labs/dbt-spark/pull/1002))
-
-### Security
-
-- Pin `black>=24.3` in `dev-requirements.txt` ([#1000](https://github.com/dbt-labs/dbt-spark/pull/1000))
-
-
-
-## dbt-spark 1.8.0-b1 - March 01, 2024
-
-### Features
-
-- Implement spark__safe_cast and add functional tests for unit testing ([#987](https://github.com/dbt-labs/dbt-spark/issues/987))
-
-### Fixes
-
-- Support new agate Integer type and empty seed test ([#935](https://github.com/dbt-labs/dbt-spark/issues/935))
-- Fix hardcoded file format for python models ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
-
-### Under the Hood
-
-- Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
-- Add tests for --empty flag ([#949](https://github.com/dbt-labs/dbt-spark/issues/949))
-- Remove unused `invalid_insert_overwrite_delta_msg` message ([#962](https://github.com/dbt-labs/dbt-spark/issues/962))
-- Update import paths and list_relations to support decoupling adapters/core ([#972](https://github.com/dbt-labs/dbt-spark/issues/972))
-
-### Dependencies
-
-- Update pre-commit-hooks requirement from ~=4.4 to ~=4.5 ([#903](https://github.com/dbt-labs/dbt-spark/pull/903))
-- Bump mypy from 1.5.1 to 1.6.0 ([#904](https://github.com/dbt-labs/dbt-spark/pull/904))
-- Update pyodbc requirement from ~=4.0.39 to ~=5.0.0 ([#905](https://github.com/dbt-labs/dbt-spark/pull/905))
-- Update pre-commit requirement from ~=3.4 to ~=3.5 ([#914](https://github.com/dbt-labs/dbt-spark/pull/914))
-- Update pyodbc requirement from ~=5.0.0 to ~=5.0.1 ([#925](https://github.com/dbt-labs/dbt-spark/pull/925))
-- Bump mypy from 1.6.0 to 1.6.1 ([#926](https://github.com/dbt-labs/dbt-spark/pull/926))
-- Update black requirement from ~=23.9 to ~=23.10 ([#927](https://github.com/dbt-labs/dbt-spark/pull/927))
-- Update black requirement from ~=23.10 to ~=23.11 ([#942](https://github.com/dbt-labs/dbt-spark/pull/942))
-- Bump mypy from 1.6.1 to 1.7.0 ([#946](https://github.com/dbt-labs/dbt-spark/pull/946))
-- Update pytest-xdist requirement from ~=3.3 to ~=3.4 ([#947](https://github.com/dbt-labs/dbt-spark/pull/947))
-- Update pytest-xdist requirement from ~=3.4 to ~=3.5 ([#951](https://github.com/dbt-labs/dbt-spark/pull/951))
-- Update wheel requirement from ~=0.41 to ~=0.42 ([#952](https://github.com/dbt-labs/dbt-spark/pull/952))
-- Bump mypy from 1.7.0 to 1.7.1 ([#953](https://github.com/dbt-labs/dbt-spark/pull/953))
-- Update freezegun requirement from ~=1.2 to ~=1.3 ([#956](https://github.com/dbt-labs/dbt-spark/pull/956))
-- Update black requirement from ~=23.11 to ~=23.12 ([#959](https://github.com/dbt-labs/dbt-spark/pull/959))
-
-### Contributors
-- [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719))
-- [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803))
-
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 7d16c28f0..6698ed64c 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.8.0b2"
+version = "1.9.0a1"
diff --git a/setup.py b/setup.py
index 55112e3f2..a7621d587 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def _get_plugin_version_dict():
 
 
 package_name = "dbt-spark"
-package_version = "1.8.0b2"
+package_version = "1.9.0a1"
 description = """The Apache Spark adapter plugin for dbt"""
 
 odbc_extras = ["pyodbc~=4.0.39"]

From 70f6060a5377da83360b6f6664012b03ca1a23c3 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Tue, 7 May 2024 08:09:45 -0700
Subject: [PATCH 28/44] Add missing changelog backlinks. (#1039)

Co-authored-by: Mila Page 
---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 902db37fc..36a3ea69a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@
 
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
+- [1.8](https://github.com/dbt-labs/dbt-spark/blob/1.8.latest/CHANGELOG.md)
+- [1.7](https://github.com/dbt-labs/dbt-spark/blob/1.7.latest/CHANGELOG.md)
 - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md)
 - [1.5](https://github.com/dbt-labs/dbt-spark/blob/1.5.latest/CHANGELOG.md)
 - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md)

From b65776724fe552963c7291b94be286cb69366c46 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 8 May 2024 10:37:19 -0400
Subject: [PATCH 29/44] Update CODEOWNERS (#1040)

---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index f6283d123..02ed72d45 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,3 +1,3 @@
 # This codeowners file is used to ensure all PRs require reviews from the adapters team
 
-* @dbt-labs/core-adapters
+* @dbt-labs/adapters

From 94bfcd942b989c7cba12135ce441edffc0f9c8a2 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 8 May 2024 11:18:05 -0400
Subject: [PATCH 30/44] Move to `pre-commit` only (#1026)

* move linter and typechecker reqs and config to .pre-commit-config.yaml
* update linters and typecheckers
* make updates from running linters and typecheckers
* remove old make recipes
---
 .flake8                           |  14 ----
 .github/workflows/integration.yml |   1 -
 .github/workflows/main.yml        |   3 -
 .pre-commit-config.yaml           | 115 ++++++++++++++----------------
 Makefile                          |  27 +------
 dbt/adapters/spark/__init__.py    |   2 +-
 dbt/adapters/spark/column.py      |   2 +-
 dbt/adapters/spark/connections.py |   6 +-
 dbt/adapters/spark/impl.py        |   2 +-
 dev-requirements.txt              |  30 +++-----
 tests/unit/utils.py               |   1 +
 11 files changed, 73 insertions(+), 130 deletions(-)
 delete mode 100644 .flake8

diff --git a/.flake8 b/.flake8
deleted file mode 100644
index bbc3202a0..000000000
--- a/.flake8
+++ /dev/null
@@ -1,14 +0,0 @@
-[flake8]
-select =
-    E
-    W
-    F
-ignore =
-    # makes Flake8 work like black
-    W503,
-    W504,
-    # makes Flake8 work like black
-    E203,
-    E741,
-    E501,
-exclude = test
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index e82a83c4d..1e058aa49 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -19,7 +19,6 @@ on:
   pull_request_target:
     paths-ignore:
       - ".changes/**"
-      - ".flake8"
       - ".gitignore"
       - "**.md"
 
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index c71df970a..cbbb7f72a 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -59,9 +59,6 @@ jobs:
           python -m pip --version
           python -m pip install pre-commit
           pre-commit --version
-          python -m pip install mypy==0.942
-          python -m pip install types-requests
-          mypy --version
           python -m pip install -r requirements.txt
           python -m pip install -r dev-requirements.txt
           python -c "import dbt.adapters.spark"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 882a32769..e228e7d97 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,67 +1,58 @@
 # For more on configuring pre-commit hooks (see https://pre-commit.com/)
-
-# Force all unspecified python hooks to run python 3.8
 default_language_version:
-  python: python3
+    python: python3
 
 repos:
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.4.0
-  hooks:
-  - id: check-yaml
-    args: [--unsafe]
-  - id: check-json
-  - id: end-of-file-fixer
-  - id: trailing-whitespace
-  - id: check-case-conflict
-- repo: https://github.com/dbt-labs/pre-commit-hooks
-  rev: v0.1.0a1
-  hooks:
-  - id: dbt-core-in-adapters-check
-- repo: https://github.com/psf/black
-  rev: 23.1.0
-  hooks:
-  - id: black
-    additional_dependencies: ['click~=8.1']
-    args:
-    - "--line-length=99"
-    - "--target-version=py38"
-  - id: black
-    alias: black-check
-    stages: [manual]
-    additional_dependencies: ['click~=8.1']
-    args:
-    - "--line-length=99"
-    - "--target-version=py38"
-    - "--check"
-    - "--diff"
-- repo: https://github.com/pycqa/flake8
-  rev: 6.0.0
-  hooks:
-  - id: flake8
-  - id: flake8
-    alias: flake8-check
-    stages: [manual]
-- repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v1.2.0
-  hooks:
-  - id: mypy
-    # N.B.: Mypy is... a bit fragile.
-    #
-    # By using `language: system` we run this hook in the local
-    # environment instead of a pre-commit isolated one.  This is needed
-    # to ensure mypy correctly parses the project.
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+    -   id: check-yaml
+        args: [--unsafe]
+    -   id: check-json
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+    -   id: check-case-conflict
+
+-   repo: https://github.com/dbt-labs/pre-commit-hooks
+    rev: v0.1.0a1
+    hooks:
+    -   id: dbt-core-in-adapters-check
+
+-   repo: https://github.com/psf/black
+    rev: 24.4.2
+    hooks:
+    -   id: black
+        args:
+        -   --line-length=99
+        -   --target-version=py38
+        -   --target-version=py39
+        -   --target-version=py310
+        -   --target-version=py311
+        additional_dependencies: [flaky]
+
+-   repo: https://github.com/pycqa/flake8
+    rev: 7.0.0
+    hooks:
+    -   id: flake8
+        exclude: tests/
+        args:
+        -   --max-line-length=99
+        -   --select=E,F,W
+        -   --ignore=E203,E501,E741,W503,W504
+        -   --per-file-ignores=*/__init__.py:F401
 
-    # It may cause trouble in that it adds environmental variables out
-    # of our control to the mix.  Unfortunately, there's nothing we can
-    # do about per pre-commit's author.
-    # See https://github.com/pre-commit/pre-commit/issues/730 for details.
-    args: [--show-error-codes, --ignore-missing-imports, --explicit-package-bases, --warn-unused-ignores, --disallow-untyped-defs]
-    files: ^dbt/adapters/.*
-    language: system
-  - id: mypy
-    alias: mypy-check
-    stages: [manual]
-    args: [--show-error-codes, --pretty, --ignore-missing-imports, --explicit-package-bases]
-    files: ^dbt/adapters
-    language: system
+-   repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.10.0
+    hooks:
+    -   id: mypy
+        args:
+        -   --show-error-codes
+        -   --ignore-missing-imports
+        -   --explicit-package-bases
+        -   --warn-unused-ignores
+        -   --disallow-untyped-defs
+        -   --pretty
+        files: ^dbt/adapters
+        additional_dependencies:
+        -   types-pytz
+        -   types-requests
diff --git a/Makefile b/Makefile
index 2bd1055fa..af3a51541 100644
--- a/Makefile
+++ b/Makefile
@@ -11,31 +11,10 @@ dev-uninstall: ## Uninstalls all packages while maintaining the virtual environm
 	pip freeze | grep -v "^-e" | cut -d "@" -f1 | xargs pip uninstall -y
 	pip uninstall -y dbt-spark
 
-.PHONY: mypy
-mypy: ## Runs mypy against staged changes for static type checking.
-	@\
-	pre-commit run --hook-stage manual mypy-check | grep -v "INFO"
-
-.PHONY: flake8
-flake8: ## Runs flake8 against staged changes to enforce style guide.
-	@\
-	pre-commit run --hook-stage manual flake8-check | grep -v "INFO"
-
-.PHONY: black
-black: ## Runs black  against staged changes to enforce style guide.
-	@\
-	pre-commit run --hook-stage manual black-check -v | grep -v "INFO"
-
 .PHONY: lint
 lint: ## Runs flake8 and mypy code checks against staged changes.
 	@\
-	pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \
-	pre-commit run mypy-check --hook-stage manual | grep -v "INFO"
-
-.PHONY: linecheck
-linecheck: ## Checks for all Python lines 100 characters or more
-	@\
-	find dbt -type f -name "*.py" -exec grep -I -r -n '.\{100\}' {} \;
+	pre-commit run --all-files
 
 .PHONY: unit
 unit: ## Runs unit tests with py38.
@@ -47,9 +26,7 @@ test: ## Runs unit tests with py38 and code checks against staged changes.
 	@\
 	python -m pytest tests/unit; \
 	python dagger/run_dbt_spark_tests.py --profile spark_session \
-	pre-commit run black-check --hook-stage manual | grep -v "INFO"; \
-	pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \
-	pre-commit run mypy-check --hook-stage manual | grep -v "INFO"
+	pre-commit run --all-files
 
 .PHONY: clean
 	@echo "cleaning repo"
diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py
index c25ba40d5..6ecc5eccf 100644
--- a/dbt/adapters/spark/__init__.py
+++ b/dbt/adapters/spark/__init__.py
@@ -8,5 +8,5 @@
 from dbt.include import spark
 
 Plugin = AdapterPlugin(
-    adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH  # type: ignore
+    adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH
 )
diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py
index 39f6f529e..98fa24a17 100644
--- a/dbt/adapters/spark/column.py
+++ b/dbt/adapters/spark/column.py
@@ -21,7 +21,7 @@ class SparkColumn(dbtClassMixin, Column):
     def translate_type(cls, dtype: str) -> str:
         return dtype
 
-    def can_expand_to(self: Self, other_column: Self) -> bool:  # type: ignore
+    def can_expand_to(self: Self, other_column: Self) -> bool:
         """returns True if both columns are strings"""
         return self.is_string() and other_column.is_string()
 
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 83048f921..0405eaf5b 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -65,9 +65,9 @@ class SparkConnectionMethod(StrEnum):
 @dataclass
 class SparkCredentials(Credentials):
     host: Optional[str] = None
-    schema: Optional[str] = None  # type: ignore
+    schema: Optional[str] = None
     method: SparkConnectionMethod = None  # type: ignore
-    database: Optional[str] = None  # type: ignore
+    database: Optional[str] = None
     driver: Optional[str] = None
     cluster: Optional[str] = None
     endpoint: Optional[str] = None
@@ -568,7 +568,7 @@ def open(cls, connection: Connection) -> Connection:
         return connection
 
     @classmethod
-    def data_type_code_to_name(cls, type_code: Union[type, str]) -> str:  # type: ignore
+    def data_type_code_to_name(cls, type_code: Union[type, str]) -> str:
         """
         :param Union[type, str] type_code: The sql to execute.
             * type_code is a python type (!) in pyodbc https://github.com/mkleehammer/pyodbc/wiki/Cursor#description, and a string for other spark runtimes.
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 9a1a7ec06..255ab7806 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -151,7 +151,7 @@ def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str:
     def convert_datetime_type(cls, agate_table: agate.Table, col_idx: int) -> str:
         return "timestamp"
 
-    def quote(self, identifier: str) -> str:  # type: ignore
+    def quote(self, identifier: str) -> str:
         return "`{}`".format(identifier)
 
     def _get_relation_information(self, row: agate.Row) -> RelationInfo:
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 26522411a..f86fb5ef4 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -5,30 +5,22 @@ git+https://github.com/dbt-labs/dbt-common.git
 git+https://github.com/dbt-labs/dbt-adapters.git
 git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter
 
-# if version 1.x or greater -> pin to major version
-# if version 0.x -> pin to minor
-black>=24.3
-bumpversion~=0.6.0
-click~=8.1
-flake8~=6.1;python_version>="3.8"
-flaky~=3.7
-freezegun~=1.3
+# dev
 ipdb~=0.13.13
-mypy==1.7.1  # patch updates have historically introduced breaking changes
-pip-tools~=7.3
-pre-commit~=3.5
-pre-commit-hooks~=4.5
+pre-commit==3.7.0;python_version >="3.9"
+pre-commit==3.5.0;python_version <"3.9"
+
+# test
+freezegun~=1.3
+mock~=5.1
 pytest~=7.4
 pytest-csv~=3.0
 pytest-dotenv~=0.5.2
 pytest-logbook~=1.2
 pytest-xdist~=3.5
-pytz~=2023.3
-types-pytz~=2023.3
-types-requests~=2.31
+thrift_sasl~=0.4.3
+
+# build
+bumpversion~=0.6.0
 twine~=4.0
 wheel~=0.42
-
-# Adapter specific dependencies
-mock~=5.1
-thrift_sasl~=0.4.3
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index 17cd3ee78..d080242cc 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -2,6 +2,7 @@
 Note that all imports should be inside the functions to avoid import/mocking
 issues.
 """
+
 import string
 import os
 from unittest import mock

From 1f4442a4dcf9719b1670278760772a07506681fc Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 10 May 2024 11:37:55 -0400
Subject: [PATCH 31/44] [Bug] Fix constraints enforcement rollover test (#1041)

* unskip offending test case
* update help in example test env file
* install pyodbc from source to support arm machines
* allow for both col and column
---
 requirements.txt                             |  2 +-
 test.env.example                             | 16 ++++++----------
 tests/functional/adapter/test_constraints.py | 13 +------------
 3 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 18ccc77fd..40335fb8a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 pyhive[hive_pure_sasl]~=0.7.0
 requests>=2.28.1
 
-pyodbc~=5.0.1
+pyodbc~=5.0.1 --no-binary pyodbc
 sqlparams>=3.0.0
 thrift>=0.13.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/test.env.example b/test.env.example
index e69f700b7..140da4042 100644
--- a/test.env.example
+++ b/test.env.example
@@ -1,13 +1,9 @@
-# Cluster ID
-DBT_DATABRICKS_CLUSTER_NAME=
-# SQL Endpoint
-DBT_DATABRICKS_ENDPOINT=
-# Server Hostname value
-DBT_DATABRICKS_HOST_NAME=
-# personal token
-DBT_DATABRICKS_TOKEN=
-# file path to local ODBC driver
-ODBC_DRIVER=
+# databricks credentials
+DBT_DATABRICKS_HOST_NAME=<{this value}.cloud.databricks.com>.cloud.databricks.com
+DBT_DATABRICKS_TOKEN=
+DBT_DATABRICKS_CLUSTER_NAME=
+ODBC_DRIVER=
+DBT_DATABRICKS_ENDPOINT=
 
 # users for testing 'grants' functionality
 DBT_TEST_USER_1=
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
index 654bbdb5f..e35a13a64 100644
--- a/tests/functional/adapter/test_constraints.py
+++ b/tests/functional/adapter/test_constraints.py
@@ -316,7 +316,7 @@ def expected_error_messages(self):
             "violate the new NOT NULL constraint",
             "(id > 0) violated by row with values:",  # incremental mats
             "DELTA_VIOLATE_CONSTRAINT_WITH_VALUES",  # incremental mats
-            "NOT NULL constraint violated for column",
+            "NOT NULL constraint violated for col",
         ]
 
     def assert_expected_error_messages(self, error_message, expected_error_messages):
@@ -358,17 +358,6 @@ def models(self):
             "constraints_schema.yml": constraints_yml,
         }
 
-    @pytest.mark.skip(
-        "Databricks now raises an exception, which gets raised prior to the `expected_pass` check."
-        "See https://github.com/dbt-labs/dbt-spark/issues/1009"
-    )
-    def test__constraints_enforcement_rollback(
-        self, project, expected_color, expected_error_messages, null_model_sql
-    ):
-        super().test__constraints_enforcement_rollback(
-            project, expected_color, expected_error_messages, null_model_sql
-        )
-
 
 # TODO: Like the tests above, this does test that model-level constraints don't
 # result in errors, but it does not verify that they are actually present in

From 94af50e683c7c4466708e40e29ce38a51ee102b8 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Mon, 13 May 2024 17:24:38 -0600
Subject: [PATCH 32/44] Cross-database `date` macro (#1030)

* Cross-database `date` macro

* Temporary dev requirements for testing

* Update changelog entry

* Revert "Temporary dev requirements for testing"

This reverts commit 529461f475c5a639a6d545ff61ab32d91616fd2a.
---
 .changes/unreleased/Features-20240501-151904.yaml | 6 ++++++
 dbt/include/spark/macros/utils/date.sql           | 5 +++++
 tests/functional/adapter/utils/test_utils.py      | 5 +++++
 3 files changed, 16 insertions(+)
 create mode 100644 .changes/unreleased/Features-20240501-151904.yaml
 create mode 100644 dbt/include/spark/macros/utils/date.sql

diff --git a/.changes/unreleased/Features-20240501-151904.yaml b/.changes/unreleased/Features-20240501-151904.yaml
new file mode 100644
index 000000000..ed08f1251
--- /dev/null
+++ b/.changes/unreleased/Features-20240501-151904.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Cross-database `date` macro
+time: 2024-05-01T15:19:04.822157-06:00
+custom:
+  Author: dbeatty10
+  Issue: 1031
diff --git a/dbt/include/spark/macros/utils/date.sql b/dbt/include/spark/macros/utils/date.sql
new file mode 100644
index 000000000..a76e995b3
--- /dev/null
+++ b/dbt/include/spark/macros/utils/date.sql
@@ -0,0 +1,5 @@
+{% macro spark__date(year, month, day) -%}
+    {%- set dt = modules.datetime.date(year, month, day) -%}
+    {%- set iso_8601_formatted_date = dt.strftime('%Y-%m-%d') -%}
+    to_date('{{ iso_8601_formatted_date }}', 'yyyy-MM-dd')
+{%- endmacro %}
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index 0dc526564..e8ebb087f 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -8,6 +8,7 @@
 from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText
 from dbt.tests.adapter.utils.test_concat import BaseConcat
 from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampNaive
+from dbt.tests.adapter.utils.test_date import BaseDate
 from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd
 from dbt.tests.adapter.utils.test_datediff import BaseDateDiff
 from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc
@@ -80,6 +81,10 @@ class TestCurrentTimestamp(BaseCurrentTimestampNaive):
     pass
 
 
+class TestDate(BaseDate):
+    pass
+
+
 class TestDateAdd(BaseDateAdd):
     pass
 

From 629ea94098cea5f4c182b123048086cd2a48c767 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Tue, 14 May 2024 12:28:51 -0400
Subject: [PATCH 33/44] [Bug] Fix incremental python tables - dbt can't find
 temporary table transaction logs (#1042)

* unskip test
* align test pyodbc version with build deps; add pyspark from build deps
* revert change to exclude database and schema from temp relations
* retain existing behavior for unit tests by vendoring temp relation into the incremental materialization
* changelog entry
---
 .changes/unreleased/Fixes-20240513-160121.yaml             | 7 +++++++
 .../macros/materializations/incremental/incremental.sql    | 2 +-
 requirements.txt                                           | 3 ++-
 tests/functional/adapter/test_python_model.py              | 7 -------
 4 files changed, 10 insertions(+), 9 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20240513-160121.yaml

diff --git a/.changes/unreleased/Fixes-20240513-160121.yaml b/.changes/unreleased/Fixes-20240513-160121.yaml
new file mode 100644
index 000000000..fd7072954
--- /dev/null
+++ b/.changes/unreleased/Fixes-20240513-160121.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: Fix incremental python models error where Databricks could not find the temp
+  table transaction logs
+time: 2024-05-13T16:01:21.255833-04:00
+custom:
+  Author: mikealfare
+  Issue: "1033"
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 10d4f3ed8..9a66bab51 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -16,7 +16,7 @@
   {%- set incremental_predicates = config.get('predicates', none) or config.get('incremental_predicates', none) -%}
   {%- set target_relation = this -%}
   {%- set existing_relation = load_relation(this) -%}
-  {%- set tmp_relation = make_temp_relation(this) -%}
+  {% set tmp_relation = this.incorporate(path = {"identifier": this.identifier ~ '__dbt_tmp'}) -%}
 
   {#-- for SQL model we will create temp view that doesn't have database and schema --#}
   {%- if language == 'sql'-%}
diff --git a/requirements.txt b/requirements.txt
index 40335fb8a..b32884c43 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,10 @@
 pyhive[hive_pure_sasl]~=0.7.0
 requests>=2.28.1
 
-pyodbc~=5.0.1 --no-binary pyodbc
+pyodbc~=4.0.39 --no-binary pyodbc
 sqlparams>=3.0.0
 thrift>=0.13.0
+pyspark>=3.0.0,<4.0.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability
 
 types-PyYAML
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 05e25c5f4..1195cbd3e 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -24,13 +24,6 @@ class TestPythonIncrementalModelSpark(BasePythonIncrementalTests):
     def project_config_update(self):
         return {}
 
-    @pytest.mark.skip(
-        "Databricks can't find the transaction log"
-        "See https://github.com/dbt-labs/dbt-spark/issues/1033"
-    )
-    def test_incremental(self, project):
-        super().test_incremental(project)
-
 
 models__simple_python_model = """
 import pandas

From 9b3dd3f7eb6d0f5379239342cdf25052bfb7eb9a Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Tue, 14 May 2024 13:04:41 -0600
Subject: [PATCH 34/44] Import relevant pytest(s) for cross-database `cast`
 macro (#1029)

Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 .changes/unreleased/Features-20240430-185723.yaml | 6 ++++++
 tests/functional/adapter/utils/test_utils.py      | 5 +++++
 2 files changed, 11 insertions(+)
 create mode 100644 .changes/unreleased/Features-20240430-185723.yaml

diff --git a/.changes/unreleased/Features-20240430-185723.yaml b/.changes/unreleased/Features-20240430-185723.yaml
new file mode 100644
index 000000000..cb86e7966
--- /dev/null
+++ b/.changes/unreleased/Features-20240430-185723.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Add tests for cross-database `cast` macro
+time: 2024-04-30T18:57:23.881246-06:00
+custom:
+  Author: dbeatty10
+  Issue: "1028"
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index e8ebb087f..0285f7c4e 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -5,6 +5,7 @@
 from dbt.tests.adapter.utils.test_array_construct import BaseArrayConstruct
 from dbt.tests.adapter.utils.test_any_value import BaseAnyValue
 from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr
+from dbt.tests.adapter.utils.test_cast import BaseCast
 from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText
 from dbt.tests.adapter.utils.test_concat import BaseConcat
 from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampNaive
@@ -67,6 +68,10 @@ class TestBoolOr(BaseBoolOr):
     pass
 
 
+class TestCast(BaseCast):
+    pass
+
+
 class TestCastBoolToText(BaseCastBoolToText):
     pass
 

From 6f8ff606926b5bb3f986a0eb0b6007bff35476da Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Tue, 14 May 2024 23:53:46 +0200
Subject: [PATCH 35/44] Bump deps on common, adapters, core (#1045)

Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
---
 setup.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index a7621d587..fd985eba4 100644
--- a/setup.py
+++ b/setup.py
@@ -65,10 +65,10 @@ def _get_plugin_version_dict():
     include_package_data=True,
     install_requires=[
         "sqlparams>=3.0.0",
-        "dbt-common>=0.1.0a1,<2.0",
-        "dbt-adapters>=0.1.0a1,<2.0",
+        "dbt-common>=1.0.4,<2.0",
+        "dbt-adapters>=1.1.1,<2.0",
         # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency
-        "dbt-core>=1.8.0a1",
+        "dbt-core>=1.8.0",
     ],
     extras_require={
         "ODBC": odbc_extras,

From 944dbea08de9a8069a39ad92f4beecb2cbec608c Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Tue, 21 May 2024 00:57:20 -0400
Subject: [PATCH 36/44]  Add docker release to the full release process for
 final releases (#1004)

* add docker release to release pipeline
* update docker release to align with other adapters, add dev docker
* remove defaulted input for docker package, override default for docker release image
* fix docker release dependent steps
* only release docker when not testing, allow to only release to docker
* remove dev container
* remove test script
* rename the spark Dockerfile to make space for the release Dockerfile
* move the release Dockerfile into ./docker

---------

Co-authored-by: Emily Rockman 
---
 .github/dependabot.yml        |  5 +++
 .github/workflows/release.yml | 64 ++++++++++++++++---------------
 Makefile                      |  4 ++
 docker-compose.yml            |  4 +-
 docker/Dockerfile             | 72 ++++++++++++++++++++---------------
 docker/README.md              | 70 ++++++++++++++++++++++++++++++++++
 docker/spark.Dockerfile       | 30 +++++++++++++++
 7 files changed, 187 insertions(+), 62 deletions(-)
 create mode 100644 docker/README.md
 create mode 100644 docker/spark.Dockerfile

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index ae2be43aa..fc44c9fe5 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -15,3 +15,8 @@ updates:
     schedule:
       interval: "weekly"
     rebase-strategy: "disabled"
+  - package-ecosystem: "docker"
+    directory: "/docker-dev"
+    schedule:
+      interval: "weekly"
+    rebase-strategy: "disabled"
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 9b2774f17..cdbdaa13f 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -13,8 +13,8 @@
 # This will only run manually. Run this workflow only after the
 # version bump workflow is completed and related changes are reviewed and merged.
 #
-
-name: Release to GitHub and PyPI
+name: "Release to GitHub, PyPI, and Docker"
+run-name: "Release ${{ inputs.version_number }} to GitHub, PyPI, and Docker"
 
 on:
   workflow_dispatch:
@@ -56,6 +56,11 @@ on:
         type: boolean
         default: true
         required: false
+      only_docker:
+        description: "Only release Docker image, skip GitHub & PyPI"
+        type: boolean
+        default: false
+        required: false
 
 permissions:
   contents: write # this is the permission that allows creating a new release
@@ -66,7 +71,7 @@ defaults:
 
 jobs:
   log-inputs:
-    name: Log Inputs
+    name: "Log Inputs"
     runs-on: ubuntu-latest
     steps:
       - name: "[DEBUG] Print Variables"
@@ -79,6 +84,7 @@ jobs:
           echo AWS S3 bucket name:                 ${{ inputs.s3_bucket_name }}
           echo Package test command:               ${{ inputs.package_test_command }}
           echo Test run:                           ${{ inputs.test_run }}
+          echo Only Docker:                        ${{ inputs.only_docker }}
 
   # The Spark repository uses CircleCI to run integration tests.
   # Because of this, the process of version bumps will be manual
@@ -87,27 +93,21 @@ jobs:
   # We are passing `env_setup_script_path` as an empty string
   # so that the integration tests stage will be skipped.
   audit-version-and-changelog:
-    name: Bump package version, Generate changelog
-
+    name: "Bump package version, Generate changelog"
     uses: dbt-labs/dbt-spark/.github/workflows/release-prep.yml@main
-
     with:
       sha: ${{ inputs.sha }}
       version_number: ${{ inputs.version_number }}
       target_branch: ${{ inputs.target_branch }}
       env_setup_script_path: ""
       test_run: ${{ inputs.test_run }}
-
     secrets: inherit
 
   log-outputs-audit-version-and-changelog:
     name: "[Log output] Bump package version, Generate changelog"
-    if: ${{ !failure() && !cancelled() }}
-
+    if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
     needs: [audit-version-and-changelog]
-
     runs-on: ubuntu-latest
-
     steps:
       - name: Print variables
         run: |
@@ -115,12 +115,10 @@ jobs:
           echo Changelog path: ${{ needs.audit-version-and-changelog.outputs.changelog_path }}
 
   build-test-package:
-    name: Build, Test, Package
-    if: ${{ !failure() && !cancelled() }}
+    name: "Build, Test, Package"
+    if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
     needs: [audit-version-and-changelog]
-
     uses: dbt-labs/dbt-release/.github/workflows/build.yml@main
-
     with:
       sha: ${{ needs.audit-version-and-changelog.outputs.final_sha }}
       version_number: ${{ inputs.version_number }}
@@ -129,19 +127,15 @@ jobs:
       s3_bucket_name: ${{ inputs.s3_bucket_name }}
       package_test_command: ${{ inputs.package_test_command }}
       test_run: ${{ inputs.test_run }}
-
     secrets:
       AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
       AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 
   github-release:
-    name: GitHub Release
-    if: ${{ !failure() && !cancelled() }}
-
+    name: "GitHub Release"
+    if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
     needs: [audit-version-and-changelog, build-test-package]
-
     uses: dbt-labs/dbt-release/.github/workflows/github-release.yml@main
-
     with:
       sha: ${{ needs.audit-version-and-changelog.outputs.final_sha }}
       version_number: ${{ inputs.version_number }}
@@ -149,35 +143,43 @@ jobs:
       test_run: ${{ inputs.test_run }}
 
   pypi-release:
-    name: PyPI Release
-
+    name: "PyPI Release"
+    if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
     needs: [github-release]
-
     uses: dbt-labs/dbt-release/.github/workflows/pypi-release.yml@main
-
     with:
       version_number: ${{ inputs.version_number }}
       test_run: ${{ inputs.test_run }}
-
     secrets:
       PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
       TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }}
 
+  docker-release:
+    name: "Docker Release"
+    # We cannot release to docker on a test run because it uses the tag in GitHub as
+    # what we need to release but draft releases don't actually tag the commit so it
+    # finds nothing to release
+    if: ${{ !failure() && !cancelled() && (!inputs.test_run || inputs.only_docker) }}
+    needs: [github-release]
+    permissions:
+      packages: write
+    uses: dbt-labs/dbt-release/.github/workflows/release-docker.yml@main
+    with:
+      version_number: ${{ inputs.version_number }}
+      dockerfile: "docker/Dockerfile"
+      test_run: ${{ inputs.test_run }}
+
   slack-notification:
     name: Slack Notification
     if: ${{ failure() && (!inputs.test_run || inputs.nightly_release) }}
-
     needs:
       [
-        audit-version-and-changelog,
-        build-test-package,
         github-release,
         pypi-release,
+        docker-release,
       ]
-
     uses: dbt-labs/dbt-release/.github/workflows/slack-post-notification.yml@main
     with:
       status: "failure"
-
     secrets:
       SLACK_WEBHOOK_URL: ${{ secrets.SLACK_DEV_CORE_ALERTS }}
diff --git a/Makefile b/Makefile
index af3a51541..ff4c0fc1b 100644
--- a/Makefile
+++ b/Makefile
@@ -38,3 +38,7 @@ help: ## Show this help message.
 	@echo
 	@echo 'targets:'
 	@grep -E '^[7+a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
+
+.PHONY: docker-prod
+docker-prod:
+	docker build -f docker/Dockerfile -t dbt-spark .
diff --git a/docker-compose.yml b/docker-compose.yml
index ad083eaf4..cd3e1c776 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,9 @@ version: "3.7"
 services:
 
   dbt-spark3-thrift:
-    build: docker/
+    build:
+      context: ./docker
+      dockerfile: spark.Dockerfile
     ports:
       - "10000:10000"
       - "4040:4040"
diff --git a/docker/Dockerfile b/docker/Dockerfile
index bb4d378ed..ef4574ddd 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,30 +1,42 @@
-ARG OPENJDK_VERSION=8
-FROM eclipse-temurin:${OPENJDK_VERSION}-jre
-
-ARG BUILD_DATE
-ARG SPARK_VERSION=3.3.2
-ARG HADOOP_VERSION=3
-
-LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
-      org.label-schema.build-date=$BUILD_DATE \
-      org.label-schema.version=$SPARK_VERSION
-
-ENV SPARK_HOME /usr/spark
-ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"
-
-RUN apt-get update && \
-    apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
-    wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
-    tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
-    rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
-    mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
-    ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \
-    apt-get remove -y wget && \
-    apt-get autoremove -y && \
-    apt-get clean
-
-COPY entrypoint.sh /scripts/
-RUN chmod +x /scripts/entrypoint.sh
-
-ENTRYPOINT ["/scripts/entrypoint.sh"]
-CMD ["--help"]
+# this image gets published to GHCR for production use
+ARG py_version=3.11.2
+
+FROM python:$py_version-slim-bullseye as base
+
+RUN apt-get update \
+  && apt-get dist-upgrade -y \
+  && apt-get install -y --no-install-recommends \
+    build-essential=12.9 \
+    ca-certificates=20210119 \
+    gcc=4:10.2.1-1 \
+    git=1:2.30.2-1+deb11u2 \
+    libpq-dev=13.14-0+deb11u1 \
+    libsasl2-dev=2.1.27+dfsg-2.1+deb11u1 \
+    make=4.3-4.1 \
+    openssh-client=1:8.4p1-5+deb11u3 \
+    python-dev-is-python2=2.7.18-9 \
+    software-properties-common=0.96.20.2-2.1 \
+    unixodbc-dev=2.3.6-0.1+b1 \
+  && apt-get clean \
+  && rm -rf \
+    /var/lib/apt/lists/* \
+    /tmp/* \
+    /var/tmp/*
+
+ENV PYTHONIOENCODING=utf-8
+ENV LANG=C.UTF-8
+
+RUN python -m pip install --upgrade "pip==24.0" "setuptools==69.2.0" "wheel==0.43.0" --no-cache-dir
+
+
+FROM base as dbt-spark
+
+ARG commit_ref=main
+ARG extras=all
+
+HEALTHCHECK CMD dbt --version || exit 1
+
+WORKDIR /usr/app/dbt/
+ENTRYPOINT ["dbt"]
+
+RUN python -m pip install --no-cache-dir "dbt-spark[${extras}] @ git+https://github.com/dbt-labs/dbt-spark@${commit_ref}"
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 000000000..42ca5e227
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,70 @@
+# Docker for dbt
+`Dockerfile` is suitable for building dbt Docker images locally or using with CI/CD to automate populating a container registry.
+
+## Building an image:
+This Dockerfile can create images for the following target: `dbt-spark`
+
+In order to build a new image, run the following docker command.
+```shell
+docker build --tag  --target dbt-spark 
+```
+---
+> **Note:**  Docker must be configured to use [BuildKit](https://docs.docker.com/develop/develop-images/build_enhancements/) in order for images to build properly!
+
+---
+
+By default the image will be populated with the latest version of `dbt-spark` on `main`.
+If you need to use a different version you can specify it by git ref using the `--build-arg` flag:
+```shell
+docker build --tag  \
+  --target dbt-spark \
+  --build-arg commit_ref= \
+  
+```
+
+### Examples:
+To build an image named "my-dbt" that supports Snowflake using the latest releases:
+```shell
+cd dbt-core/docker
+docker build --tag my-dbt --target dbt-spark .
+```
+
+To build an image named "my-other-dbt" that supports Snowflake using the adapter version 1.0.0b1:
+```shell
+cd dbt-core/docker
+docker build \
+  --tag my-other-dbt \
+  --target dbt-spark \
+  --build-arg commit_ref=v1.0.0b1 \
+ .
+```
+
+## Special cases
+There are a few special cases worth noting:
+* The `dbt-spark` database adapter comes in three different versions named `PyHive`, `ODBC`, and the default `all`.
+If you wish to override this you can use the `--build-arg` flag with the value of `extras=`.
+See the [docs](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile) for more information.
+```shell
+docker build --tag my_dbt \
+  --target dbt-spark \
+  --build-arg commit_ref=v1.0.0b1 \
+  --build-arg extras=PyHive \
+  
+```
+
+## Running an image in a container:
+The `ENTRYPOINT` for this Dockerfile is the command `dbt` so you can bind-mount your project to `/usr/app` and use dbt as normal:
+```shell
+docker run \
+  --network=host \
+  --mount type=bind,source=path/to/project,target=/usr/app \
+  --mount type=bind,source=path/to/profiles.yml,target=/root/.dbt/profiles.yml \
+  my-dbt \
+  ls
+```
+---
+**Notes:**
+* Bind-mount sources _must_ be an absolute path
+* You may need to make adjustments to the docker networking setting depending on the specifics of your data warehouse/database host.
+
+---
diff --git a/docker/spark.Dockerfile b/docker/spark.Dockerfile
new file mode 100644
index 000000000..bb4d378ed
--- /dev/null
+++ b/docker/spark.Dockerfile
@@ -0,0 +1,30 @@
+ARG OPENJDK_VERSION=8
+FROM eclipse-temurin:${OPENJDK_VERSION}-jre
+
+ARG BUILD_DATE
+ARG SPARK_VERSION=3.3.2
+ARG HADOOP_VERSION=3
+
+LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
+      org.label-schema.build-date=$BUILD_DATE \
+      org.label-schema.version=$SPARK_VERSION
+
+ENV SPARK_HOME /usr/spark
+ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"
+
+RUN apt-get update && \
+    apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
+    wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+    tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+    rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
+    mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
+    ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \
+    apt-get remove -y wget && \
+    apt-get autoremove -y && \
+    apt-get clean
+
+COPY entrypoint.sh /scripts/
+RUN chmod +x /scripts/entrypoint.sh
+
+ENTRYPOINT ["/scripts/entrypoint.sh"]
+CMD ["--help"]

From 7850da3d542608128ad80c41e03972649943d449 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Fri, 14 Jun 2024 05:00:32 +0000
Subject: [PATCH 37/44] Adap 1049/lazy load agate (#1050)

* Add changelog

* Lazy load agate.

* More comments on types and lint.

---------

Co-authored-by: Mila Page 
---
 .../Under the Hood-20240612-195629.yaml       |  6 +++
 dbt/adapters/spark/impl.py                    | 39 ++++++++++++-------
 2 files changed, 30 insertions(+), 15 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20240612-195629.yaml

diff --git a/.changes/unreleased/Under the Hood-20240612-195629.yaml b/.changes/unreleased/Under the Hood-20240612-195629.yaml
new file mode 100644
index 000000000..c90ebcdab
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20240612-195629.yaml	
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Lazy load agate to improve performance
+time: 2024-06-12T19:56:29.943204-07:00
+custom:
+  Author: versusfacit
+  Issue: "1049"
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 255ab7806..d33ebde20 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -14,6 +14,7 @@
     Callable,
     Set,
     FrozenSet,
+    TYPE_CHECKING,
 )
 
 from dbt.adapters.base.relation import InformationSchema
@@ -24,7 +25,10 @@
 
 from typing_extensions import TypeAlias
 
-import agate
+if TYPE_CHECKING:
+    # Indirectly imported via agate_helper, which is lazy loaded further downfile.
+    # Used by mypy for earlier type hints.
+    import agate
 
 from dbt.adapters.base import AdapterConfig, PythonJobHelper
 from dbt.adapters.base.impl import catch_as_completed, ConstraintSupport
@@ -127,34 +131,36 @@ def date_function(cls) -> str:
         return "current_timestamp()"
 
     @classmethod
-    def convert_text_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+    def convert_text_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
         return "string"
 
     @classmethod
-    def convert_number_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+    def convert_number_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
+        import agate
+
         decimals = agate_table.aggregate(agate.MaxPrecision(col_idx))
         return "double" if decimals else "bigint"
 
     @classmethod
-    def convert_integer_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+    def convert_integer_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
         return "bigint"
 
     @classmethod
-    def convert_date_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+    def convert_date_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
         return "date"
 
     @classmethod
-    def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+    def convert_time_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
         return "time"
 
     @classmethod
-    def convert_datetime_type(cls, agate_table: agate.Table, col_idx: int) -> str:
+    def convert_datetime_type(cls, agate_table: "agate.Table", col_idx: int) -> str:
         return "timestamp"
 
     def quote(self, identifier: str) -> str:
         return "`{}`".format(identifier)
 
-    def _get_relation_information(self, row: agate.Row) -> RelationInfo:
+    def _get_relation_information(self, row: "agate.Row") -> RelationInfo:
         """relation info was fetched with SHOW TABLES EXTENDED"""
         try:
             _schema, name, _, information = row
@@ -165,7 +171,7 @@ def _get_relation_information(self, row: agate.Row) -> RelationInfo:
 
         return _schema, name, information
 
-    def _get_relation_information_using_describe(self, row: agate.Row) -> RelationInfo:
+    def _get_relation_information_using_describe(self, row: "agate.Row") -> RelationInfo:
         """Relation info fetched using SHOW TABLES and an auxiliary DESCRIBE statement"""
         try:
             _schema, name, _ = row
@@ -193,8 +199,8 @@ def _get_relation_information_using_describe(self, row: agate.Row) -> RelationIn
 
     def _build_spark_relation_list(
         self,
-        row_list: agate.Table,
-        relation_info_func: Callable[[agate.Row], RelationInfo],
+        row_list: "agate.Table",
+        relation_info_func: Callable[["agate.Row"], RelationInfo],
     ) -> List[BaseRelation]:
         """Aggregate relations with format metadata included."""
         relations = []
@@ -370,7 +376,7 @@ def get_catalog(
         self,
         relation_configs: Iterable[RelationConfig],
         used_schemas: FrozenSet[Tuple[str, str]],
-    ) -> Tuple[agate.Table, List[Exception]]:
+    ) -> Tuple["agate.Table", List[Exception]]:
         schema_map = self._get_catalog_schemas(relation_configs)
         if len(schema_map) > 1:
             raise CompilationError(
@@ -378,7 +384,7 @@ def get_catalog(
             )
 
         with executor(self.config) as tpe:
-            futures: List[Future[agate.Table]] = []
+            futures: List[Future["agate.Table"]] = []
             for info, schemas in schema_map.items():
                 for schema in schemas:
                     futures.append(
@@ -399,7 +405,7 @@ def _get_one_catalog(
         information_schema: InformationSchema,
         schemas: Set[str],
         used_schemas: FrozenSet[Tuple[str, str]],
-    ) -> agate.Table:
+    ) -> "agate.Table":
         if len(schemas) != 1:
             raise CompilationError(
                 f"Expected only one schema in spark _get_one_catalog, found " f"{schemas}"
@@ -412,6 +418,9 @@ def _get_one_catalog(
         for relation in self.list_relations(database, schema):
             logger.debug("Getting table schema for relation {}", str(relation))
             columns.extend(self._get_columns_for_catalog(relation))
+
+        import agate
+
         return agate.Table.from_object(columns, column_types=DEFAULT_TYPE_TESTER)
 
     def check_schema_exists(self, database: str, schema: str) -> bool:
@@ -486,7 +495,7 @@ def python_submission_helpers(self) -> Dict[str, Type[PythonJobHelper]]:
             "all_purpose_cluster": AllPurposeClusterPythonJobHelper,
         }
 
-    def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
+    def standardize_grants_dict(self, grants_table: "agate.Table") -> dict:
         grants_dict: Dict[str, List[str]] = {}
         for row in grants_table:
             grantee = row["Principal"]

From ba5b3f05bb699c9be59b39661cb8186ce7175587 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 21 Jun 2024 16:53:38 -0400
Subject: [PATCH 38/44] skip broken tests, link to the issue for resolution
 (#1056)

---
 tests/functional/adapter/test_python_model.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 1195cbd3e..957361cb7 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -15,7 +15,9 @@ class TestPythonModelSpark(BasePythonModelTests):
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPySpark(BasePySparkTests):
-    pass
+    @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054")
+    def test_different_dataframes(self, project):
+        return super().test_different_dataframes(project)
 
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
@@ -69,6 +71,7 @@ class TestChangingSchemaSpark:
     def models(self):
         return {"simple_python_model.py": models__simple_python_model}
 
+    @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054")
     def test_changing_schema_with_log_validation(self, project, logs_dir):
         run_dbt(["run"])
         write_file(

From cd6efba4f006f7bc3de761a02717ff9261b736a1 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Fri, 21 Jun 2024 16:50:48 -0500
Subject: [PATCH 39/44] update user docs-issue workflow (#1051)

* update user docs-issue workflow

* pre-commit fix

* update workflow based onf feedback

* whitespace

* update to match bigquery

* pin numpy to below 2.0 new release

* remove numpy pin for its own pr
---
 .github/workflows/docs-issues.yml | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/docs-issues.yml b/.github/workflows/docs-issues.yml
index 00a098df8..f49cf517c 100644
--- a/.github/workflows/docs-issues.yml
+++ b/.github/workflows/docs-issues.yml
@@ -1,19 +1,18 @@
 # **what?**
-# Open an issue in docs.getdbt.com when a PR is labeled `user docs`
+# Open an issue in docs.getdbt.com when an issue is labeled `user docs` and closed as completed
 
 # **why?**
 # To reduce barriers for keeping docs up to date
 
 # **when?**
-# When a PR is labeled `user docs` and is merged.  Runs on pull_request_target to run off the workflow already merged,
-# not the workflow that existed on the PR branch.  This allows old PRs to get comments.
+# When an issue is labeled `user docs` and is closed as completed.  Can be labeled before or after the issue is closed.
 
 
-name: Open issues in docs.getdbt.com repo when a PR is labeled
-run-name: "Open an issue in docs.getdbt.com for PR #${{ github.event.pull_request.number }}"
+name: Open issues in docs.getdbt.com repo when an issue is labeled
+run-name: "Open an issue in docs.getdbt.com for issue #${{ github.event.issue.number }}"
 
 on:
-  pull_request_target:
+  issues:
     types: [labeled, closed]
 
 defaults:
@@ -21,23 +20,22 @@ defaults:
     shell: bash
 
 permissions:
-    issues: write # opens new issues
-    pull-requests: write # comments on PRs
-
+    issues: write # comments on issues
 
 jobs:
   open_issues:
-    # we only want to run this when the PR has been merged or the label in the labeled event is `user docs`.  Otherwise it runs the
+    # we only want to run this when the issue is closed as completed and the label `user docs` has been assigned.
+    # If this logic does not exist in this workflow, it runs the
     # risk of duplicaton of issues being created due to merge and label both triggering this workflow to run and neither having
     # generating the comment before the other runs.  This lives here instead of the shared workflow because this is where we
     # decide if it should run or not.
     if: |
-      (github.event.pull_request.merged == true) &&
-      ((github.event.action == 'closed' && contains( github.event.pull_request.labels.*.name, 'user docs')) ||
+      (github.event.issue.state == 'closed' && github.event.issue.state_reason == 'completed') && (
+      (github.event.action == 'closed' && contains(github.event.issue.labels.*.name, 'user docs')) ||
       (github.event.action == 'labeled' && github.event.label.name == 'user docs'))
     uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main
     with:
         issue_repository: "dbt-labs/docs.getdbt.com"
-        issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} PR #${{ github.event.pull_request.number }}"
+        issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}"
         issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated."
     secrets: inherit

From 50634b9c6038016f888ea84be542d9a84e52a141 Mon Sep 17 00:00:00 2001
From: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Mon, 24 Jun 2024 09:09:25 -0700
Subject: [PATCH 40/44] update spark internal-release workflow (#1052)

Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .github/workflows/release-internal.yml | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
index eb892415c..d4e7a3c93 100644
--- a/.github/workflows/release-internal.yml
+++ b/.github/workflows/release-internal.yml
@@ -10,15 +10,12 @@
 #
 # Manual trigger.
 
-name: "Release internal patch"
+name: "Release to Cloud"
+run-name: "Release to Cloud off of ${{ inputs.ref }}"
 
 on:
   workflow_dispatch:
     inputs:
-      version_number:
-        description: "The release version number (i.e. 1.0.0b1)"
-        type: string
-        required: true
       ref:
         description: "The ref (sha or branch name) to use"
         type: string
@@ -29,6 +26,11 @@ on:
         type: string
         default: "python -c \"import dbt.adapters.spark\""
         required: true
+      skip_tests:
+        description: "Should the tests be skipped? (default to false)"
+        type: boolean
+        required: true
+        default: false
 
 defaults:
   run:
@@ -129,15 +131,14 @@ jobs:
         run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
 
   invoke-reusable-workflow:
-    name: "Build and Release Internally"
+    name: "Create cloud release"
     needs: [run-integration-tests]
-
     uses: "dbt-labs/dbt-release/.github/workflows/internal-archive-release.yml@main"
 
     with:
-      version_number: "${{ inputs.version_number }}"
       package_test_command: "${{ inputs.package_test_command }}"
       dbms_name: "spark"
       ref: "${{ inputs.ref }}"
+      skip_tests: "${{ inputs.skip_tests }}"
 
     secrets: "inherit"

From 824ca0f2249d145234f21d7e4066e033a273e2e2 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 26 Jun 2024 12:06:32 -0400
Subject: [PATCH 41/44] Update the spark version to the current version (#1055)

* update the spark version to the current version
* update pin for pydantic to resolve https://github.com/explosion/spaCy/issues/12659
* exclude koalas dataframes from test
---
 tests/functional/adapter/test_python_model.py | 35 ++++++++++++++++---
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 957361cb7..cd798d1da 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -15,9 +15,22 @@ class TestPythonModelSpark(BasePythonModelTests):
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPySpark(BasePySparkTests):
-    @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054")
     def test_different_dataframes(self, project):
-        return super().test_different_dataframes(project)
+        """
+        Test that python models are supported using dataframes from:
+        - pandas
+        - pyspark
+        - pyspark.pandas (formerly dataspark.koalas)
+
+        Note:
+            The CI environment is on Apache Spark >3.1, which includes koalas as pyspark.pandas.
+            The only Databricks runtime that supports Apache Spark <=3.1 is 9.1 LTS, which is EOL 2024-09-23.
+            For more information, see:
+            - https://github.com/databricks/koalas
+            - https://docs.databricks.com/en/release-notes/runtime/index.html
+        """
+        results = run_dbt(["run", "--exclude", "koalas_df"])
+        assert len(results) == 3
 
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
@@ -37,7 +50,7 @@ def model(dbt, spark):
         materialized='table',
         submission_method='job_cluster',
         job_cluster_config={
-            "spark_version": "7.3.x-scala2.12",
+            "spark_version": "12.2.x-scala2.12",
             "node_type_id": "i3.xlarge",
             "num_workers": 0,
             "spark_conf": {
@@ -48,7 +61,7 @@ def model(dbt, spark):
                 "ResourceClass": "SingleNode"
             }
         },
-        packages=['spacy', 'torch', 'pydantic<1.10.3']
+        packages=['spacy', 'torch', 'pydantic>=1.10.8']
     )
     data = [[1,2]] * 10
     return spark.createDataFrame(data, schema=['test', 'test2'])
@@ -67,11 +80,23 @@ def model(dbt, spark):
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestChangingSchemaSpark:
+    """
+    Confirm that we can setup a spot instance and parse required packages into the Databricks job.
+
+    Notes:
+        - This test generates a spot instance on demand using the settings from `job_cluster_config`
+        in `models__simple_python_model` above. It takes several minutes to run due to creating the cluster.
+        The job can be monitored via "Data Engineering > Job Runs" or "Workflows > Job Runs"
+        in the Databricks UI (instead of via the normal cluster).
+        - The `spark_version` argument will need to periodically be updated. It will eventually become
+        unsupported and start experiencing issues.
+        - See https://github.com/explosion/spaCy/issues/12659 for why we're pinning pydantic
+    """
+
     @pytest.fixture(scope="class")
     def models(self):
         return {"simple_python_model.py": models__simple_python_model}
 
-    @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054")
     def test_changing_schema_with_log_validation(self, project, logs_dir):
         run_dbt(["run"])
         write_file(

From 034cb6118e808c1c9ad81d3553a136ac94b77781 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Mon, 8 Jul 2024 12:11:09 -0400
Subject: [PATCH 42/44] Base 207/add test (#1057)

* Add test for upstream change.
* Skip session since it's not liking the test.
* Import pytest to fix skip error.
* Dial in tests to reflect error messages from spark.

---------

Co-authored-by: Mila Page 
Co-authored-by: Mike Alfare 
---
 .../adapter/dbt_show/test_dbt_show.py         | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 tests/functional/adapter/dbt_show/test_dbt_show.py

diff --git a/tests/functional/adapter/dbt_show/test_dbt_show.py b/tests/functional/adapter/dbt_show/test_dbt_show.py
new file mode 100644
index 000000000..bc56fd908
--- /dev/null
+++ b/tests/functional/adapter/dbt_show/test_dbt_show.py
@@ -0,0 +1,22 @@
+import pytest
+
+from dbt.tests.adapter.dbt_show.test_dbt_show import (
+    BaseShowSqlHeader,
+    BaseShowLimit,
+    BaseShowDoesNotHandleDoubleLimit,
+)
+
+
+class TestSparkShowLimit(BaseShowLimit):
+    pass
+
+
+class TestSparkShowSqlHeader(BaseShowSqlHeader):
+    pass
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_http_cluster")
+class TestSparkShowDoesNotHandleDoubleLimit(BaseShowDoesNotHandleDoubleLimit):
+    """The syntax message is quite variable across clusters, but this hits two at once."""
+
+    DATABASE_ERROR_MESSAGE = "limit"

From 81a7cf25e9880fe1d8114e7e27cd6eacac01093e Mon Sep 17 00:00:00 2001
From: Colin 
Date: Wed, 10 Jul 2024 16:54:38 -0700
Subject: [PATCH 43/44] delete duplicate fixture

---
 tests/functional/adapter/test_python_model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 1195cbd3e..1e40344ec 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -28,7 +28,6 @@ def project_config_update(self):
 models__simple_python_model = """
 import pandas
 import torch
-import spacy
 
 def model(dbt, spark):
     dbt.config(

From 2a31d3f28cc5e2fe6542a62e35b8c654c312f1ab Mon Sep 17 00:00:00 2001
From: Colin 
Date: Wed, 10 Jul 2024 16:56:47 -0700
Subject: [PATCH 44/44] Revert "delete duplicate fixture"

This reverts commit 81a7cf25e9880fe1d8114e7e27cd6eacac01093e.
---
 tests/functional/adapter/test_python_model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 99c9fc955..cd798d1da 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -43,6 +43,7 @@ def project_config_update(self):
 models__simple_python_model = """
 import pandas
 import torch
+import spacy
 
 def model(dbt, spark):
     dbt.config(