From 5210d0a9746443c8a2cf217c16c919a73f3ba543 Mon Sep 17 00:00:00 2001
From: Ben Schreiber <74134279+ben-schreiber@users.noreply.github.com>
Date: Thu, 21 Dec 2023 19:45:14 +0200
Subject: [PATCH 01/44] Fix hardcoded file format in python materializations
(#955)
* Fix hardcoded file format in python materializations
* Add changelog
---
.changes/unreleased/Fixes-20231221-081949.yaml | 6 ++++++
dbt/include/spark/macros/materializations/table.sql | 2 +-
2 files changed, 7 insertions(+), 1 deletion(-)
create mode 100644 .changes/unreleased/Fixes-20231221-081949.yaml
diff --git a/.changes/unreleased/Fixes-20231221-081949.yaml b/.changes/unreleased/Fixes-20231221-081949.yaml
new file mode 100644
index 000000000..b10c8141e
--- /dev/null
+++ b/.changes/unreleased/Fixes-20231221-081949.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Fix hardcoded file format for python models
+time: 2023-12-21T08:19:49.630806+02:00
+custom:
+ Author: ben-schreiber
+ Issue: "803"
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 39a9caba9..b7c56c424 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -98,7 +98,7 @@ else:
msg = f"{type(df)} is not a supported type for dbt Python materialization"
raise Exception(msg)
-df.write.mode("overwrite").format("delta").option("overwriteSchema", "true").saveAsTable("{{ target_relation }}")
+df.write.mode("overwrite").format("{{ config.get('file_format', 'delta') }}").option("overwriteSchema", "true").saveAsTable("{{ target_relation }}")
{%- endmacro -%}
{%macro py_script_comment()%}
From f9f75e92a3177979cd745440297d33536c93a348 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Wed, 10 Jan 2024 13:36:17 -0800
Subject: [PATCH 02/44] Migrate Off Circle CI / To Github Actions + dagger.io
(#923)
* Add Github action for integration test
* Update tox
* Fetch spark from https link
* Use Spark version 3.1.2
* Seperate running Spark session and thrift
* Use Spark 3.1.2 and Hadoop 3.2
* Reset tox.ini
* Remove base pythons in tox.ini
* Fix reference to Docker compose file
* Remove timeout
* Remove artifact steps
* Bump Spark and Hadoop versions
* Reset Spark and Hadoop version
* Update comment
* Add changie
* add databricks and PR execution protections
* use single quotes
* remove `_target` suffix
* add comment to test
* specify container user as root
* formatting
* remove python setup for pre-existing container
* download simba
* fix curl call
* fix curl call
* fix curl call
* fix curl call
* fix curl call
* fix curl call
* fix db test naming
* confirm ODBC driver installed
* add odbc driver env var
* add odbc driver env var
* specify platform
* check odbc driver integrity
* add dbt user env var
* add dbt user env var
* fix host_name env var
* try removing architecture arg
* swap back to pull_request_target
* try running on host instead of container
* Update .github/workflows/integration.yml
Co-authored-by: Emily Rockman
\d+))?""" - _version_pattern = rf"""version\s*=\s*["']{_semver}{_pre}["']""" + _build = r"""(\+build[0-9]+)?""" + _version_pattern = rf"""version\s*=\s*["']{_semver}{_pre}{_build}["']""" with open(_version_path) as f: match = re.search(_version_pattern, f.read().strip()) if match is None: From 7129f59e123dc572721840926860dd350105aab7 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Thu, 28 Mar 2024 13:44:15 -0400 Subject: [PATCH 16/44] Pin `black>=24.3` (#1000) --- .changes/unreleased/Security-20240327-193553.yaml | 6 ++++++ dev-requirements.txt | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Security-20240327-193553.yaml diff --git a/.changes/unreleased/Security-20240327-193553.yaml b/.changes/unreleased/Security-20240327-193553.yaml new file mode 100644 index 000000000..daee50fd6 --- /dev/null +++ b/.changes/unreleased/Security-20240327-193553.yaml @@ -0,0 +1,6 @@ +kind: Security +body: Pin `black>=24.3` in `dev-requirements.txt` +time: 2024-03-27T19:35:53.102377-04:00 +custom: + Author: mikealfare + PR: "1000" diff --git a/dev-requirements.txt b/dev-requirements.txt index 8f674d84b..26522411a 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -7,7 +7,7 @@ git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter # if version 1.x or greater -> pin to major version # if version 0.x -> pin to minor -black~=23.12 +black>=24.3 bumpversion~=0.6.0 click~=8.1 flake8~=6.1;python_version>="3.8" From 45b904cb06179f15c9b04c08b1bb7948ae3d5035 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Tue, 2 Apr 2024 14:39:48 -0400 Subject: [PATCH 17/44] Pin `beartype<0.18.0` (#1001) * Pin `beartype<0.18.0` --- dagger/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/dagger/requirements.txt b/dagger/requirements.txt index b50c448d3..f150e3093 100644 --- a/dagger/requirements.txt +++ b/dagger/requirements.txt @@ -1,2 +1,3 @@ +beartype<0.18.0 dagger-io~=0.9.7 python-dotenv From 377c65fb02f57001a52302291653ceef2081b97e Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Wed, 3 Apr 2024 15:45:13 -0400 Subject: [PATCH 18/44] Add `dbt-core~=1.8.0a1` as convenience dep (#1002) * add `dbt-core~=1.8.0a1` as convenience dep --- .changes/unreleased/Dependencies-20240403-135436.yaml | 6 ++++++ setup.py | 2 ++ 2 files changed, 8 insertions(+) create mode 100644 .changes/unreleased/Dependencies-20240403-135436.yaml diff --git a/.changes/unreleased/Dependencies-20240403-135436.yaml b/.changes/unreleased/Dependencies-20240403-135436.yaml new file mode 100644 index 000000000..345b70740 --- /dev/null +++ b/.changes/unreleased/Dependencies-20240403-135436.yaml @@ -0,0 +1,6 @@ +kind: Dependencies +body: Add `dbt-core` as a dependency to preserve backwards compatibility for installation +time: 2024-04-03T13:54:36.667724-04:00 +custom: + Author: mikealfare + PR: "1002" diff --git a/setup.py b/setup.py index 48339e7c0..325d31ccd 100644 --- a/setup.py +++ b/setup.py @@ -67,6 +67,8 @@ def _get_plugin_version_dict(): "sqlparams>=3.0.0", "dbt-common>=0.1.0a1,<2.0", "dbt-adapters>=0.1.0a1,<2.0", + # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency + "dbt-core>=1.8.0a1", ], extras_require={ "ODBC": odbc_extras, From da885394070094b6c632b06dabf7a2a4a50e5c0b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 16:01:05 -0400 Subject: [PATCH 19/44] [create-pull-request] automated change (#1003) Co-authored-by: Github Build Bot--- .bumpversion.cfg | 2 +- .changes/1.8.0-b2.md | 13 +++++++++++++ .../Dependencies-20240403-135436.yaml | 0 .../Features-20240318-033621.yaml | 0 .../Security-20240327-193553.yaml | 0 CHANGELOG.md | 17 ++++++++++++++++- dbt/adapters/spark/__version__.py | 2 +- setup.py | 2 +- 8 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 .changes/1.8.0-b2.md rename .changes/{unreleased => 1.8.0}/Dependencies-20240403-135436.yaml (100%) rename .changes/{unreleased => 1.8.0}/Features-20240318-033621.yaml (100%) rename .changes/{unreleased => 1.8.0}/Security-20240327-193553.yaml (100%) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 595914b21..aa22c8214 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.8.0b1 +current_version = 1.8.0b2 parse = (?P [\d]+) # major version number \.(?P [\d]+) # minor version number \.(?P [\d]+) # patch version number diff --git a/.changes/1.8.0-b2.md b/.changes/1.8.0-b2.md new file mode 100644 index 000000000..806d61158 --- /dev/null +++ b/.changes/1.8.0-b2.md @@ -0,0 +1,13 @@ +## dbt-spark 1.8.0-b2 - April 03, 2024 + +### Features + +- : Add new workflow for internal patch releases ([#38](https://github.com/dbt-labs/dbt-spark/issues/38)) + +### Dependencies + +- Add `dbt-core` as a dependency to preserve backwards compatibility for installation ([#1002](https://github.com/dbt-labs/dbt-spark/pull/1002)) + +### Security + +- Pin `black>=24.3` in `dev-requirements.txt` ([#1000](https://github.com/dbt-labs/dbt-spark/pull/1000)) diff --git a/.changes/unreleased/Dependencies-20240403-135436.yaml b/.changes/1.8.0/Dependencies-20240403-135436.yaml similarity index 100% rename from .changes/unreleased/Dependencies-20240403-135436.yaml rename to .changes/1.8.0/Dependencies-20240403-135436.yaml diff --git a/.changes/unreleased/Features-20240318-033621.yaml b/.changes/1.8.0/Features-20240318-033621.yaml similarity index 100% rename from .changes/unreleased/Features-20240318-033621.yaml rename to .changes/1.8.0/Features-20240318-033621.yaml diff --git a/.changes/unreleased/Security-20240327-193553.yaml b/.changes/1.8.0/Security-20240327-193553.yaml similarity index 100% rename from .changes/unreleased/Security-20240327-193553.yaml rename to .changes/1.8.0/Security-20240327-193553.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index d65c50be4..81f0575dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version. - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry) +## dbt-spark 1.8.0-b2 - April 03, 2024 + +### Features + +- : Add new workflow for internal patch releases ([#38](https://github.com/dbt-labs/dbt-spark/issues/38)) + +### Dependencies + +- Add `dbt-core` as a dependency to preserve backwards compatibility for installation ([#1002](https://github.com/dbt-labs/dbt-spark/pull/1002)) + +### Security + +- Pin `black>=24.3` in `dev-requirements.txt` ([#1000](https://github.com/dbt-labs/dbt-spark/pull/1000)) + + + ## dbt-spark 1.8.0-b1 - March 01, 2024 ### Features @@ -45,7 +61,6 @@ - [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719)) - [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803)) - ## Previous Releases For information on prior major and minor releases, see their changelogs: - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 6496f3e22..7d16c28f0 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "1.8.0b1" +version = "1.8.0b2" diff --git a/setup.py b/setup.py index 325d31ccd..55112e3f2 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ def _get_plugin_version_dict(): package_name = "dbt-spark" -package_version = "1.8.0b1" +package_version = "1.8.0b2" description = """The Apache Spark adapter plugin for dbt""" odbc_extras = ["pyodbc~=4.0.39"] From 0646c2403b97be2f83e70cac7233f22b35d496d7 Mon Sep 17 00:00:00 2001 From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com> Date: Thu, 11 Apr 2024 16:09:34 -0500 Subject: [PATCH 20/44] test name change (#1005) --- tests/functional/adapter/dbt_clone/test_dbt_clone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/adapter/dbt_clone/test_dbt_clone.py b/tests/functional/adapter/dbt_clone/test_dbt_clone.py index a5e8d70e0..80e919a24 100644 --- a/tests/functional/adapter/dbt_clone/test_dbt_clone.py +++ b/tests/functional/adapter/dbt_clone/test_dbt_clone.py @@ -15,7 +15,7 @@ @pytest.mark.skip_profile("apache_spark", "spark_session") -class TestSparkBigqueryClonePossible(BaseClonePossible): +class TestSparkClonePossible(BaseClonePossible): @pytest.fixture(scope="class") def models(self): return { From e93deea64800d9c60a014d5e2a366b19ef630e17 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Thu, 18 Apr 2024 22:47:20 -0400 Subject: [PATCH 21/44] Update dependabot config to cover GHA and Docker (#1006) * Update dependabot config to cover GHA and Docker --- .../unreleased/Under the Hood-20240410-183535.yaml | 6 ++++++ .github/dependabot.yml | 11 ++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Under the Hood-20240410-183535.yaml diff --git a/.changes/unreleased/Under the Hood-20240410-183535.yaml b/.changes/unreleased/Under the Hood-20240410-183535.yaml new file mode 100644 index 000000000..54b69f285 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240410-183535.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Update dependabot config to cover GHA and Docker +time: 2024-04-10T18:35:35.093246-04:00 +custom: + Author: mikealfare + Issue: "1006" diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 2a6f34492..ae2be43aa 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,8 +1,17 @@ version: 2 updates: - # python dependencies - package-ecosystem: "pip" directory: "/" schedule: interval: "daily" rebase-strategy: "disabled" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + rebase-strategy: "disabled" + - package-ecosystem: "docker" + directory: "/docker" + schedule: + interval: "weekly" + rebase-strategy: "disabled" From 080ba71b2689a67d9195dcae2cb6e9b0116e76dc Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Fri, 19 Apr 2024 12:48:09 -0400 Subject: [PATCH 22/44] add pre-commit check for dbt-core (#1015) --- .pre-commit-config.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5e7fdbd04..882a32769 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,6 +14,10 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - id: check-case-conflict +- repo: https://github.com/dbt-labs/pre-commit-hooks + rev: v0.1.0a1 + hooks: + - id: dbt-core-in-adapters-check - repo: https://github.com/psf/black rev: 23.1.0 hooks: From c5742f225f20938f16c4ce95ba7e71d6874a198c Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Thu, 25 Apr 2024 10:20:44 -0400 Subject: [PATCH 23/44] pin macos test runners to macos-12 (#1016) --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 68911710f..189a8847c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -173,7 +173,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: [ubuntu-latest, macos-12, windows-latest] python-version: ["3.8", "3.9", "3.10", "3.11"] steps: From c0c3f4f0f81004d49ab7aa7209aae096d6b8eaea Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Apr 2024 23:48:22 +0000 Subject: [PATCH 24/44] Bump actions/checkout from 3 to 4 (#1014) * Bump actions/checkout from 3 to 4 Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * Add automated changelog yaml from template for bot PR --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Github Build Bot Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com> --- .changes/unreleased/Dependencies-20240419-024916.yaml | 6 ++++++ .github/workflows/integration.yml | 4 ++-- .github/workflows/main.yml | 6 +++--- .github/workflows/release-internal.yml | 6 +++--- .github/workflows/release-prep.yml | 6 +++--- 5 files changed, 17 insertions(+), 11 deletions(-) create mode 100644 .changes/unreleased/Dependencies-20240419-024916.yaml diff --git a/.changes/unreleased/Dependencies-20240419-024916.yaml b/.changes/unreleased/Dependencies-20240419-024916.yaml new file mode 100644 index 000000000..1ef46465e --- /dev/null +++ b/.changes/unreleased/Dependencies-20240419-024916.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Bump actions/checkout from 3 to 4" +time: 2024-04-19T02:49:16.00000Z +custom: + Author: dependabot[bot] + PR: 1014 diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 94dece350..6e5f42af7 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -77,7 +77,7 @@ jobs: steps: - name: Check out the repository if: github.event_name != 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false @@ -85,7 +85,7 @@ jobs: # this is necessary for the `pull_request` event - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 189a8847c..ebd0a3bd7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -42,7 +42,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false @@ -82,7 +82,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 @@ -123,7 +123,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml index 6334fd282..d8b1e4592 100644 --- a/.github/workflows/release-internal.yml +++ b/.github/workflows/release-internal.yml @@ -46,7 +46,7 @@ jobs: steps: - name: "Check out the repository" - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: "Set up Python ${{ env.PYTHON_TARGET_VERSION }}" uses: actions/setup-python@v4 @@ -100,7 +100,7 @@ jobs: steps: - name: Check out the repository if: github.event_name != 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false @@ -108,7 +108,7 @@ jobs: # this is necessary for the `pull_request` event - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false ref: ${{ github.event.pull_request.head.ref }} diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml index 1a6e450c4..e214de828 100644 --- a/.github/workflows/release-prep.yml +++ b/.github/workflows/release-prep.yml @@ -452,7 +452,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 @@ -505,7 +505,7 @@ jobs: steps: - name: Check out the repository if: github.event_name != 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false @@ -513,7 +513,7 @@ jobs: # this is necessary for the `pull_request` event - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} From 8dfd12c16b6d0f2a6abf2213dc33cb7cf61ce45c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Apr 2024 23:59:09 +0000 Subject: [PATCH 25/44] Bump actions/setup-python from 4 to 5 (#1013) * Bump actions/setup-python from 4 to 5 Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4 to 5. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * Add automated changelog yaml from template for bot PR --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Github Build Bot Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com> --- .changes/unreleased/Dependencies-20240419-024912.yaml | 6 ++++++ .github/workflows/integration.yml | 2 +- .github/workflows/main.yml | 8 ++++---- .github/workflows/release-internal.yml | 4 ++-- .github/workflows/release-prep.yml | 4 ++-- 5 files changed, 15 insertions(+), 9 deletions(-) create mode 100644 .changes/unreleased/Dependencies-20240419-024912.yaml diff --git a/.changes/unreleased/Dependencies-20240419-024912.yaml b/.changes/unreleased/Dependencies-20240419-024912.yaml new file mode 100644 index 000000000..ea5b75aa9 --- /dev/null +++ b/.changes/unreleased/Dependencies-20240419-024912.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Bump actions/setup-python from 4 to 5" +time: 2024-04-19T02:49:12.00000Z +custom: + Author: dependabot[bot] + PR: 1013 diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6e5f42af7..e82a83c4d 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -92,7 +92,7 @@ jobs: # the python version used here is not what is used in the tests themselves - name: Set up Python for dagger - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.11" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ebd0a3bd7..c71df970a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -47,7 +47,7 @@ jobs: persist-credentials: false - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' @@ -85,7 +85,7 @@ jobs: uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -126,7 +126,7 @@ jobs: uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' @@ -178,7 +178,7 @@ jobs: steps: - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml index d8b1e4592..eb892415c 100644 --- a/.github/workflows/release-internal.yml +++ b/.github/workflows/release-internal.yml @@ -49,7 +49,7 @@ jobs: uses: actions/checkout@v4 - name: "Set up Python ${{ env.PYTHON_TARGET_VERSION }}" - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "${{ env.PYTHON_TARGET_VERSION }}" @@ -115,7 +115,7 @@ jobs: # the python version used here is not what is used in the tests themselves - name: Set up Python for dagger - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.11" diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml index e214de828..0061a8602 100644 --- a/.github/workflows/release-prep.yml +++ b/.github/workflows/release-prep.yml @@ -455,7 +455,7 @@ jobs: uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -520,7 +520,7 @@ jobs: # the python version used here is not what is used in the tests themselves - name: Set up Python for dagger - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.11" From 804567cea3f64af2a3b19bb51db76b637ed38ae5 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Fri, 3 May 2024 15:36:45 -0400 Subject: [PATCH 26/44] skip tests on `main` to unblock CI, create issues to fix them (#1034) --- tests/functional/adapter/test_constraints.py | 11 +++++++++++ tests/functional/adapter/test_python_model.py | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py index 41b50ef81..654bbdb5f 100644 --- a/tests/functional/adapter/test_constraints.py +++ b/tests/functional/adapter/test_constraints.py @@ -358,6 +358,17 @@ def models(self): "constraints_schema.yml": constraints_yml, } + @pytest.mark.skip( + "Databricks now raises an exception, which gets raised prior to the `expected_pass` check." + "See https://github.com/dbt-labs/dbt-spark/issues/1009" + ) + def test__constraints_enforcement_rollback( + self, project, expected_color, expected_error_messages, null_model_sql + ): + super().test__constraints_enforcement_rollback( + project, expected_color, expected_error_messages, null_model_sql + ) + # TODO: Like the tests above, this does test that model-level constraints don't # result in errors, but it does not verify that they are actually present in diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py index 1195cbd3e..05e25c5f4 100644 --- a/tests/functional/adapter/test_python_model.py +++ b/tests/functional/adapter/test_python_model.py @@ -24,6 +24,13 @@ class TestPythonIncrementalModelSpark(BasePythonIncrementalTests): def project_config_update(self): return {} + @pytest.mark.skip( + "Databricks can't find the transaction log" + "See https://github.com/dbt-labs/dbt-spark/issues/1033" + ) + def test_incremental(self, project): + super().test_incremental(project) + models__simple_python_model = """ import pandas From bec191158fa3a5586c2a8f2005ac3619d3e3b3a9 Mon Sep 17 00:00:00 2001 From: FishtownBuildBot <77737458+FishtownBuildBot@users.noreply.github.com> Date: Mon, 6 May 2024 17:42:37 -0400 Subject: [PATCH 27/44] Cleanup main after cutting new 1.8.latest branch (#1032) * Clean up changelog on main * Bumping version to 1.9.0a1 * Code quality cleanup --------- Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com> --- .bumpversion.cfg | 2 +- .changes/1.8.0-b1.md | 39 ------------- .changes/1.8.0-b2.md | 13 ----- .../1.8.0/Dependencies-20231009-220732.yaml | 6 -- .../1.8.0/Dependencies-20231010-222853.yaml | 6 -- .../1.8.0/Dependencies-20231010-222910.yaml | 6 -- .../1.8.0/Dependencies-20231013-223750.yaml | 6 -- .../1.8.0/Dependencies-20231027-230251.yaml | 6 -- .../1.8.0/Dependencies-20231027-230254.yaml | 6 -- .../1.8.0/Dependencies-20231027-230301.yaml | 6 -- .../1.8.0/Dependencies-20231108-222326.yaml | 6 -- .../1.8.0/Dependencies-20231110-224056.yaml | 6 -- .../1.8.0/Dependencies-20231113-224111.yaml | 6 -- .../1.8.0/Dependencies-20231127-220733.yaml | 6 -- .../1.8.0/Dependencies-20231127-220737.yaml | 6 -- .../1.8.0/Dependencies-20231127-220741.yaml | 6 -- .../1.8.0/Dependencies-20231204-224210.yaml | 6 -- .../1.8.0/Dependencies-20231212-223929.yaml | 6 -- .../1.8.0/Dependencies-20240403-135436.yaml | 6 -- .changes/1.8.0/Features-20240220-195925.yaml | 6 -- .changes/1.8.0/Features-20240318-033621.yaml | 6 -- .changes/1.8.0/Fixes-20231107-134141.yaml | 6 -- .changes/1.8.0/Fixes-20231221-081949.yaml | 6 -- .changes/1.8.0/Security-20240327-193553.yaml | 6 -- .../1.8.0/Under the Hood-20230929-161218.yaml | 6 -- .../1.8.0/Under the Hood-20231119-132050.yaml | 6 -- .../1.8.0/Under the Hood-20231214-134728.yaml | 6 -- .../1.8.0/Under the Hood-20240111-114806.yaml | 6 -- .../Dependencies-20240419-024912.yaml | 6 -- .../Dependencies-20240419-024916.yaml | 6 -- .../Under the Hood-20240410-183535.yaml | 6 -- CHANGELOG.md | 56 ------------------- dbt/adapters/spark/__version__.py | 2 +- setup.py | 2 +- 34 files changed, 3 insertions(+), 279 deletions(-) delete mode 100644 .changes/1.8.0-b1.md delete mode 100644 .changes/1.8.0-b2.md delete mode 100644 .changes/1.8.0/Dependencies-20231009-220732.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231010-222853.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231010-222910.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231013-223750.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231027-230251.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231027-230254.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231027-230301.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231108-222326.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231110-224056.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231113-224111.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231127-220733.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231127-220737.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231127-220741.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231204-224210.yaml delete mode 100644 .changes/1.8.0/Dependencies-20231212-223929.yaml delete mode 100644 .changes/1.8.0/Dependencies-20240403-135436.yaml delete mode 100644 .changes/1.8.0/Features-20240220-195925.yaml delete mode 100644 .changes/1.8.0/Features-20240318-033621.yaml delete mode 100644 .changes/1.8.0/Fixes-20231107-134141.yaml delete mode 100644 .changes/1.8.0/Fixes-20231221-081949.yaml delete mode 100644 .changes/1.8.0/Security-20240327-193553.yaml delete mode 100644 .changes/1.8.0/Under the Hood-20230929-161218.yaml delete mode 100644 .changes/1.8.0/Under the Hood-20231119-132050.yaml delete mode 100644 .changes/1.8.0/Under the Hood-20231214-134728.yaml delete mode 100644 .changes/1.8.0/Under the Hood-20240111-114806.yaml delete mode 100644 .changes/unreleased/Dependencies-20240419-024912.yaml delete mode 100644 .changes/unreleased/Dependencies-20240419-024916.yaml delete mode 100644 .changes/unreleased/Under the Hood-20240410-183535.yaml diff --git a/.bumpversion.cfg b/.bumpversion.cfg index aa22c8214..77a3f463f 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.8.0b2 +current_version = 1.9.0a1 parse = (?P [\d]+) # major version number \.(?P [\d]+) # minor version number \.(?P [\d]+) # patch version number diff --git a/.changes/1.8.0-b1.md b/.changes/1.8.0-b1.md deleted file mode 100644 index 4f4091a91..000000000 --- a/.changes/1.8.0-b1.md +++ /dev/null @@ -1,39 +0,0 @@ -## dbt-spark 1.8.0-b1 - March 01, 2024 - -### Features - -- Implement spark__safe_cast and add functional tests for unit testing ([#987](https://github.com/dbt-labs/dbt-spark/issues/987)) - -### Fixes - -- Support new agate Integer type and empty seed test ([#935](https://github.com/dbt-labs/dbt-spark/issues/935)) -- Fix hardcoded file format for python models ([#803](https://github.com/dbt-labs/dbt-spark/issues/803)) - -### Under the Hood - -- Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. ([#719](https://github.com/dbt-labs/dbt-spark/issues/719)) -- Add tests for --empty flag ([#949](https://github.com/dbt-labs/dbt-spark/issues/949)) -- Remove unused `invalid_insert_overwrite_delta_msg` message ([#962](https://github.com/dbt-labs/dbt-spark/issues/962)) -- Update import paths and list_relations to support decoupling adapters/core ([#972](https://github.com/dbt-labs/dbt-spark/issues/972)) - -### Dependencies - -- Update pre-commit-hooks requirement from ~=4.4 to ~=4.5 ([#903](https://github.com/dbt-labs/dbt-spark/pull/903)) -- Bump mypy from 1.5.1 to 1.6.0 ([#904](https://github.com/dbt-labs/dbt-spark/pull/904)) -- Update pyodbc requirement from ~=4.0.39 to ~=5.0.0 ([#905](https://github.com/dbt-labs/dbt-spark/pull/905)) -- Update pre-commit requirement from ~=3.4 to ~=3.5 ([#914](https://github.com/dbt-labs/dbt-spark/pull/914)) -- Update pyodbc requirement from ~=5.0.0 to ~=5.0.1 ([#925](https://github.com/dbt-labs/dbt-spark/pull/925)) -- Bump mypy from 1.6.0 to 1.6.1 ([#926](https://github.com/dbt-labs/dbt-spark/pull/926)) -- Update black requirement from ~=23.9 to ~=23.10 ([#927](https://github.com/dbt-labs/dbt-spark/pull/927)) -- Update black requirement from ~=23.10 to ~=23.11 ([#942](https://github.com/dbt-labs/dbt-spark/pull/942)) -- Bump mypy from 1.6.1 to 1.7.0 ([#946](https://github.com/dbt-labs/dbt-spark/pull/946)) -- Update pytest-xdist requirement from ~=3.3 to ~=3.4 ([#947](https://github.com/dbt-labs/dbt-spark/pull/947)) -- Update pytest-xdist requirement from ~=3.4 to ~=3.5 ([#951](https://github.com/dbt-labs/dbt-spark/pull/951)) -- Update wheel requirement from ~=0.41 to ~=0.42 ([#952](https://github.com/dbt-labs/dbt-spark/pull/952)) -- Bump mypy from 1.7.0 to 1.7.1 ([#953](https://github.com/dbt-labs/dbt-spark/pull/953)) -- Update freezegun requirement from ~=1.2 to ~=1.3 ([#956](https://github.com/dbt-labs/dbt-spark/pull/956)) -- Update black requirement from ~=23.11 to ~=23.12 ([#959](https://github.com/dbt-labs/dbt-spark/pull/959)) - -### Contributors -- [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719)) -- [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803)) diff --git a/.changes/1.8.0-b2.md b/.changes/1.8.0-b2.md deleted file mode 100644 index 806d61158..000000000 --- a/.changes/1.8.0-b2.md +++ /dev/null @@ -1,13 +0,0 @@ -## dbt-spark 1.8.0-b2 - April 03, 2024 - -### Features - -- : Add new workflow for internal patch releases ([#38](https://github.com/dbt-labs/dbt-spark/issues/38)) - -### Dependencies - -- Add `dbt-core` as a dependency to preserve backwards compatibility for installation ([#1002](https://github.com/dbt-labs/dbt-spark/pull/1002)) - -### Security - -- Pin `black>=24.3` in `dev-requirements.txt` ([#1000](https://github.com/dbt-labs/dbt-spark/pull/1000)) diff --git a/.changes/1.8.0/Dependencies-20231009-220732.yaml b/.changes/1.8.0/Dependencies-20231009-220732.yaml deleted file mode 100644 index 4b9073fae..000000000 --- a/.changes/1.8.0/Dependencies-20231009-220732.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pre-commit-hooks requirement from ~=4.4 to ~=4.5" -time: 2023-10-09T22:07:32.00000Z -custom: - Author: dependabot[bot] - PR: 903 diff --git a/.changes/1.8.0/Dependencies-20231010-222853.yaml b/.changes/1.8.0/Dependencies-20231010-222853.yaml deleted file mode 100644 index 43b481edb..000000000 --- a/.changes/1.8.0/Dependencies-20231010-222853.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.5.1 to 1.6.0" -time: 2023-10-10T22:28:53.00000Z -custom: - Author: dependabot[bot] - PR: 904 diff --git a/.changes/1.8.0/Dependencies-20231010-222910.yaml b/.changes/1.8.0/Dependencies-20231010-222910.yaml deleted file mode 100644 index c6dbe582a..000000000 --- a/.changes/1.8.0/Dependencies-20231010-222910.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pyodbc requirement from ~=4.0.39 to ~=5.0.0" -time: 2023-10-10T22:29:10.00000Z -custom: - Author: dependabot[bot] - PR: 905 diff --git a/.changes/1.8.0/Dependencies-20231013-223750.yaml b/.changes/1.8.0/Dependencies-20231013-223750.yaml deleted file mode 100644 index 2cea8c6dd..000000000 --- a/.changes/1.8.0/Dependencies-20231013-223750.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pre-commit requirement from ~=3.4 to ~=3.5" -time: 2023-10-13T22:37:50.00000Z -custom: - Author: dependabot[bot] - PR: 914 diff --git a/.changes/1.8.0/Dependencies-20231027-230251.yaml b/.changes/1.8.0/Dependencies-20231027-230251.yaml deleted file mode 100644 index 4ad0d65f8..000000000 --- a/.changes/1.8.0/Dependencies-20231027-230251.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pyodbc requirement from ~=5.0.0 to ~=5.0.1" -time: 2023-10-27T23:02:51.00000Z -custom: - Author: dependabot[bot] - PR: 925 diff --git a/.changes/1.8.0/Dependencies-20231027-230254.yaml b/.changes/1.8.0/Dependencies-20231027-230254.yaml deleted file mode 100644 index f1b745210..000000000 --- a/.changes/1.8.0/Dependencies-20231027-230254.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.6.0 to 1.6.1" -time: 2023-10-27T23:02:54.00000Z -custom: - Author: dependabot[bot] - PR: 926 diff --git a/.changes/1.8.0/Dependencies-20231027-230301.yaml b/.changes/1.8.0/Dependencies-20231027-230301.yaml deleted file mode 100644 index 74548ddea..000000000 --- a/.changes/1.8.0/Dependencies-20231027-230301.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update black requirement from ~=23.9 to ~=23.10" -time: 2023-10-27T23:03:01.00000Z -custom: - Author: dependabot[bot] - PR: 927 diff --git a/.changes/1.8.0/Dependencies-20231108-222326.yaml b/.changes/1.8.0/Dependencies-20231108-222326.yaml deleted file mode 100644 index fdd35004a..000000000 --- a/.changes/1.8.0/Dependencies-20231108-222326.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update black requirement from ~=23.10 to ~=23.11" -time: 2023-11-08T22:23:26.00000Z -custom: - Author: dependabot[bot] - PR: 942 diff --git a/.changes/1.8.0/Dependencies-20231110-224056.yaml b/.changes/1.8.0/Dependencies-20231110-224056.yaml deleted file mode 100644 index efdf9a549..000000000 --- a/.changes/1.8.0/Dependencies-20231110-224056.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.6.1 to 1.7.0" -time: 2023-11-10T22:40:56.00000Z -custom: - Author: dependabot[bot] - PR: 946 diff --git a/.changes/1.8.0/Dependencies-20231113-224111.yaml b/.changes/1.8.0/Dependencies-20231113-224111.yaml deleted file mode 100644 index b2fb306c7..000000000 --- a/.changes/1.8.0/Dependencies-20231113-224111.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pytest-xdist requirement from ~=3.3 to ~=3.4" -time: 2023-11-13T22:41:11.00000Z -custom: - Author: dependabot[bot] - PR: 947 diff --git a/.changes/1.8.0/Dependencies-20231127-220733.yaml b/.changes/1.8.0/Dependencies-20231127-220733.yaml deleted file mode 100644 index be23a48f7..000000000 --- a/.changes/1.8.0/Dependencies-20231127-220733.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pytest-xdist requirement from ~=3.4 to ~=3.5" -time: 2023-11-27T22:07:33.00000Z -custom: - Author: dependabot[bot] - PR: 951 diff --git a/.changes/1.8.0/Dependencies-20231127-220737.yaml b/.changes/1.8.0/Dependencies-20231127-220737.yaml deleted file mode 100644 index 60e2be67f..000000000 --- a/.changes/1.8.0/Dependencies-20231127-220737.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update wheel requirement from ~=0.41 to ~=0.42" -time: 2023-11-27T22:07:37.00000Z -custom: - Author: dependabot[bot] - PR: 952 diff --git a/.changes/1.8.0/Dependencies-20231127-220741.yaml b/.changes/1.8.0/Dependencies-20231127-220741.yaml deleted file mode 100644 index 63d572f2b..000000000 --- a/.changes/1.8.0/Dependencies-20231127-220741.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.7.0 to 1.7.1" -time: 2023-11-27T22:07:41.00000Z -custom: - Author: dependabot[bot] - PR: 953 diff --git a/.changes/1.8.0/Dependencies-20231204-224210.yaml b/.changes/1.8.0/Dependencies-20231204-224210.yaml deleted file mode 100644 index c415934db..000000000 --- a/.changes/1.8.0/Dependencies-20231204-224210.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update freezegun requirement from ~=1.2 to ~=1.3" -time: 2023-12-04T22:42:10.00000Z -custom: - Author: dependabot[bot] - PR: 956 diff --git a/.changes/1.8.0/Dependencies-20231212-223929.yaml b/.changes/1.8.0/Dependencies-20231212-223929.yaml deleted file mode 100644 index 65f308f75..000000000 --- a/.changes/1.8.0/Dependencies-20231212-223929.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update black requirement from ~=23.11 to ~=23.12" -time: 2023-12-12T22:39:29.00000Z -custom: - Author: dependabot[bot] - PR: 959 diff --git a/.changes/1.8.0/Dependencies-20240403-135436.yaml b/.changes/1.8.0/Dependencies-20240403-135436.yaml deleted file mode 100644 index 345b70740..000000000 --- a/.changes/1.8.0/Dependencies-20240403-135436.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Dependencies -body: Add `dbt-core` as a dependency to preserve backwards compatibility for installation -time: 2024-04-03T13:54:36.667724-04:00 -custom: - Author: mikealfare - PR: "1002" diff --git a/.changes/1.8.0/Features-20240220-195925.yaml b/.changes/1.8.0/Features-20240220-195925.yaml deleted file mode 100644 index c5d86ab7c..000000000 --- a/.changes/1.8.0/Features-20240220-195925.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: Implement spark__safe_cast and add functional tests for unit testing -time: 2024-02-20T19:59:25.907821-05:00 -custom: - Author: michelleark - Issue: "987" diff --git a/.changes/1.8.0/Features-20240318-033621.yaml b/.changes/1.8.0/Features-20240318-033621.yaml deleted file mode 100644 index a1a1b9c5b..000000000 --- a/.changes/1.8.0/Features-20240318-033621.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: ': Add new workflow for internal patch releases' -time: 2024-03-18T03:36:21.634918-07:00 -custom: - Author: versusfacit - Issue: "38" diff --git a/.changes/1.8.0/Fixes-20231107-134141.yaml b/.changes/1.8.0/Fixes-20231107-134141.yaml deleted file mode 100644 index d4f56eaca..000000000 --- a/.changes/1.8.0/Fixes-20231107-134141.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Support new agate Integer type and empty seed test -time: 2023-11-07T13:41:41.033441-05:00 -custom: - Author: gshank - Issue: "935" diff --git a/.changes/1.8.0/Fixes-20231221-081949.yaml b/.changes/1.8.0/Fixes-20231221-081949.yaml deleted file mode 100644 index b10c8141e..000000000 --- a/.changes/1.8.0/Fixes-20231221-081949.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Fix hardcoded file format for python models -time: 2023-12-21T08:19:49.630806+02:00 -custom: - Author: ben-schreiber - Issue: "803" diff --git a/.changes/1.8.0/Security-20240327-193553.yaml b/.changes/1.8.0/Security-20240327-193553.yaml deleted file mode 100644 index daee50fd6..000000000 --- a/.changes/1.8.0/Security-20240327-193553.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Security -body: Pin `black>=24.3` in `dev-requirements.txt` -time: 2024-03-27T19:35:53.102377-04:00 -custom: - Author: mikealfare - PR: "1000" diff --git a/.changes/1.8.0/Under the Hood-20230929-161218.yaml b/.changes/1.8.0/Under the Hood-20230929-161218.yaml deleted file mode 100644 index 9b5c6818b..000000000 --- a/.changes/1.8.0/Under the Hood-20230929-161218.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. -time: 2023-09-29T16:12:18.968755+02:00 -custom: - Author: JCZuurmond, colin-rogers-dbt - Issue: "719" diff --git a/.changes/1.8.0/Under the Hood-20231119-132050.yaml b/.changes/1.8.0/Under the Hood-20231119-132050.yaml deleted file mode 100644 index 61c7f8ab8..000000000 --- a/.changes/1.8.0/Under the Hood-20231119-132050.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Add tests for --empty flag -time: 2023-11-19T13:20:50.076459-05:00 -custom: - Author: michelleark - Issue: "949" diff --git a/.changes/1.8.0/Under the Hood-20231214-134728.yaml b/.changes/1.8.0/Under the Hood-20231214-134728.yaml deleted file mode 100644 index b1de2ddb7..000000000 --- a/.changes/1.8.0/Under the Hood-20231214-134728.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Remove unused `invalid_insert_overwrite_delta_msg` message -time: 2023-12-14T13:47:28.444107-07:00 -custom: - Author: dbeatty10 - Issue: "962" diff --git a/.changes/1.8.0/Under the Hood-20240111-114806.yaml b/.changes/1.8.0/Under the Hood-20240111-114806.yaml deleted file mode 100644 index 31705f468..000000000 --- a/.changes/1.8.0/Under the Hood-20240111-114806.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Update import paths and list_relations to support decoupling adapters/core -time: 2024-01-11T11:48:06.120111-08:00 -custom: - Author: colin-rogers-dbt - Issue: "972" diff --git a/.changes/unreleased/Dependencies-20240419-024912.yaml b/.changes/unreleased/Dependencies-20240419-024912.yaml deleted file mode 100644 index ea5b75aa9..000000000 --- a/.changes/unreleased/Dependencies-20240419-024912.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump actions/setup-python from 4 to 5" -time: 2024-04-19T02:49:12.00000Z -custom: - Author: dependabot[bot] - PR: 1013 diff --git a/.changes/unreleased/Dependencies-20240419-024916.yaml b/.changes/unreleased/Dependencies-20240419-024916.yaml deleted file mode 100644 index 1ef46465e..000000000 --- a/.changes/unreleased/Dependencies-20240419-024916.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump actions/checkout from 3 to 4" -time: 2024-04-19T02:49:16.00000Z -custom: - Author: dependabot[bot] - PR: 1014 diff --git a/.changes/unreleased/Under the Hood-20240410-183535.yaml b/.changes/unreleased/Under the Hood-20240410-183535.yaml deleted file mode 100644 index 54b69f285..000000000 --- a/.changes/unreleased/Under the Hood-20240410-183535.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Update dependabot config to cover GHA and Docker -time: 2024-04-10T18:35:35.093246-04:00 -custom: - Author: mikealfare - Issue: "1006" diff --git a/CHANGELOG.md b/CHANGELOG.md index 81f0575dd..902db37fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,62 +5,6 @@ - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version. - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry) -## dbt-spark 1.8.0-b2 - April 03, 2024 - -### Features - -- : Add new workflow for internal patch releases ([#38](https://github.com/dbt-labs/dbt-spark/issues/38)) - -### Dependencies - -- Add `dbt-core` as a dependency to preserve backwards compatibility for installation ([#1002](https://github.com/dbt-labs/dbt-spark/pull/1002)) - -### Security - -- Pin `black>=24.3` in `dev-requirements.txt` ([#1000](https://github.com/dbt-labs/dbt-spark/pull/1000)) - - - -## dbt-spark 1.8.0-b1 - March 01, 2024 - -### Features - -- Implement spark__safe_cast and add functional tests for unit testing ([#987](https://github.com/dbt-labs/dbt-spark/issues/987)) - -### Fixes - -- Support new agate Integer type and empty seed test ([#935](https://github.com/dbt-labs/dbt-spark/issues/935)) -- Fix hardcoded file format for python models ([#803](https://github.com/dbt-labs/dbt-spark/issues/803)) - -### Under the Hood - -- Add GitHub action for integration testing and use dagger-io to run tests. Remove CircleCI workflow. ([#719](https://github.com/dbt-labs/dbt-spark/issues/719)) -- Add tests for --empty flag ([#949](https://github.com/dbt-labs/dbt-spark/issues/949)) -- Remove unused `invalid_insert_overwrite_delta_msg` message ([#962](https://github.com/dbt-labs/dbt-spark/issues/962)) -- Update import paths and list_relations to support decoupling adapters/core ([#972](https://github.com/dbt-labs/dbt-spark/issues/972)) - -### Dependencies - -- Update pre-commit-hooks requirement from ~=4.4 to ~=4.5 ([#903](https://github.com/dbt-labs/dbt-spark/pull/903)) -- Bump mypy from 1.5.1 to 1.6.0 ([#904](https://github.com/dbt-labs/dbt-spark/pull/904)) -- Update pyodbc requirement from ~=4.0.39 to ~=5.0.0 ([#905](https://github.com/dbt-labs/dbt-spark/pull/905)) -- Update pre-commit requirement from ~=3.4 to ~=3.5 ([#914](https://github.com/dbt-labs/dbt-spark/pull/914)) -- Update pyodbc requirement from ~=5.0.0 to ~=5.0.1 ([#925](https://github.com/dbt-labs/dbt-spark/pull/925)) -- Bump mypy from 1.6.0 to 1.6.1 ([#926](https://github.com/dbt-labs/dbt-spark/pull/926)) -- Update black requirement from ~=23.9 to ~=23.10 ([#927](https://github.com/dbt-labs/dbt-spark/pull/927)) -- Update black requirement from ~=23.10 to ~=23.11 ([#942](https://github.com/dbt-labs/dbt-spark/pull/942)) -- Bump mypy from 1.6.1 to 1.7.0 ([#946](https://github.com/dbt-labs/dbt-spark/pull/946)) -- Update pytest-xdist requirement from ~=3.3 to ~=3.4 ([#947](https://github.com/dbt-labs/dbt-spark/pull/947)) -- Update pytest-xdist requirement from ~=3.4 to ~=3.5 ([#951](https://github.com/dbt-labs/dbt-spark/pull/951)) -- Update wheel requirement from ~=0.41 to ~=0.42 ([#952](https://github.com/dbt-labs/dbt-spark/pull/952)) -- Bump mypy from 1.7.0 to 1.7.1 ([#953](https://github.com/dbt-labs/dbt-spark/pull/953)) -- Update freezegun requirement from ~=1.2 to ~=1.3 ([#956](https://github.com/dbt-labs/dbt-spark/pull/956)) -- Update black requirement from ~=23.11 to ~=23.12 ([#959](https://github.com/dbt-labs/dbt-spark/pull/959)) - -### Contributors -- [@JCZuurmond,](https://github.com/JCZuurmond,) ([#719](https://github.com/dbt-labs/dbt-spark/issues/719)) -- [@ben-schreiber](https://github.com/ben-schreiber) ([#803](https://github.com/dbt-labs/dbt-spark/issues/803)) - ## Previous Releases For information on prior major and minor releases, see their changelogs: - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 7d16c28f0..6698ed64c 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "1.8.0b2" +version = "1.9.0a1" diff --git a/setup.py b/setup.py index 55112e3f2..a7621d587 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ def _get_plugin_version_dict(): package_name = "dbt-spark" -package_version = "1.8.0b2" +package_version = "1.9.0a1" description = """The Apache Spark adapter plugin for dbt""" odbc_extras = ["pyodbc~=4.0.39"] From 70f6060a5377da83360b6f6664012b03ca1a23c3 Mon Sep 17 00:00:00 2001 From: Mila Page <67295367+VersusFacit@users.noreply.github.com> Date: Tue, 7 May 2024 08:09:45 -0700 Subject: [PATCH 28/44] Add missing changelog backlinks. (#1039) Co-authored-by: Mila Page --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 902db37fc..36a3ea69a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ ## Previous Releases For information on prior major and minor releases, see their changelogs: +- [1.8](https://github.com/dbt-labs/dbt-spark/blob/1.8.latest/CHANGELOG.md) +- [1.7](https://github.com/dbt-labs/dbt-spark/blob/1.7.latest/CHANGELOG.md) - [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md) - [1.5](https://github.com/dbt-labs/dbt-spark/blob/1.5.latest/CHANGELOG.md) - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md) From b65776724fe552963c7291b94be286cb69366c46 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Wed, 8 May 2024 10:37:19 -0400 Subject: [PATCH 29/44] Update CODEOWNERS (#1040) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f6283d123..02ed72d45 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,3 +1,3 @@ # This codeowners file is used to ensure all PRs require reviews from the adapters team -* @dbt-labs/core-adapters +* @dbt-labs/adapters From 94bfcd942b989c7cba12135ce441edffc0f9c8a2 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Wed, 8 May 2024 11:18:05 -0400 Subject: [PATCH 30/44] Move to `pre-commit` only (#1026) * move linter and typechecker reqs and config to .pre-commit-config.yaml * update linters and typecheckers * make updates from running linters and typecheckers * remove old make recipes --- .flake8 | 14 ---- .github/workflows/integration.yml | 1 - .github/workflows/main.yml | 3 - .pre-commit-config.yaml | 115 ++++++++++++++---------------- Makefile | 27 +------ dbt/adapters/spark/__init__.py | 2 +- dbt/adapters/spark/column.py | 2 +- dbt/adapters/spark/connections.py | 6 +- dbt/adapters/spark/impl.py | 2 +- dev-requirements.txt | 30 +++----- tests/unit/utils.py | 1 + 11 files changed, 73 insertions(+), 130 deletions(-) delete mode 100644 .flake8 diff --git a/.flake8 b/.flake8 deleted file mode 100644 index bbc3202a0..000000000 --- a/.flake8 +++ /dev/null @@ -1,14 +0,0 @@ -[flake8] -select = - E - W - F -ignore = - # makes Flake8 work like black - W503, - W504, - # makes Flake8 work like black - E203, - E741, - E501, -exclude = test diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index e82a83c4d..1e058aa49 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -19,7 +19,6 @@ on: pull_request_target: paths-ignore: - ".changes/**" - - ".flake8" - ".gitignore" - "**.md" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c71df970a..cbbb7f72a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -59,9 +59,6 @@ jobs: python -m pip --version python -m pip install pre-commit pre-commit --version - python -m pip install mypy==0.942 - python -m pip install types-requests - mypy --version python -m pip install -r requirements.txt python -m pip install -r dev-requirements.txt python -c "import dbt.adapters.spark" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 882a32769..e228e7d97 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,67 +1,58 @@ # For more on configuring pre-commit hooks (see https://pre-commit.com/) - -# Force all unspecified python hooks to run python 3.8 default_language_version: - python: python3 + python: python3 repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - - id: check-yaml - args: [--unsafe] - - id: check-json - - id: end-of-file-fixer - - id: trailing-whitespace - - id: check-case-conflict -- repo: https://github.com/dbt-labs/pre-commit-hooks - rev: v0.1.0a1 - hooks: - - id: dbt-core-in-adapters-check -- repo: https://github.com/psf/black - rev: 23.1.0 - hooks: - - id: black - additional_dependencies: ['click~=8.1'] - args: - - "--line-length=99" - - "--target-version=py38" - - id: black - alias: black-check - stages: [manual] - additional_dependencies: ['click~=8.1'] - args: - - "--line-length=99" - - "--target-version=py38" - - "--check" - - "--diff" -- repo: https://github.com/pycqa/flake8 - rev: 6.0.0 - hooks: - - id: flake8 - - id: flake8 - alias: flake8-check - stages: [manual] -- repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.2.0 - hooks: - - id: mypy - # N.B.: Mypy is... a bit fragile. - # - # By using `language: system` we run this hook in the local - # environment instead of a pre-commit isolated one. This is needed - # to ensure mypy correctly parses the project. +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: check-yaml + args: [--unsafe] + - id: check-json + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-case-conflict + +- repo: https://github.com/dbt-labs/pre-commit-hooks + rev: v0.1.0a1 + hooks: + - id: dbt-core-in-adapters-check + +- repo: https://github.com/psf/black + rev: 24.4.2 + hooks: + - id: black + args: + - --line-length=99 + - --target-version=py38 + - --target-version=py39 + - --target-version=py310 + - --target-version=py311 + additional_dependencies: [flaky] + +- repo: https://github.com/pycqa/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + exclude: tests/ + args: + - --max-line-length=99 + - --select=E,F,W + - --ignore=E203,E501,E741,W503,W504 + - --per-file-ignores=*/__init__.py:F401 - # It may cause trouble in that it adds environmental variables out - # of our control to the mix. Unfortunately, there's nothing we can - # do about per pre-commit's author. - # See https://github.com/pre-commit/pre-commit/issues/730 for details. - args: [--show-error-codes, --ignore-missing-imports, --explicit-package-bases, --warn-unused-ignores, --disallow-untyped-defs] - files: ^dbt/adapters/.* - language: system - - id: mypy - alias: mypy-check - stages: [manual] - args: [--show-error-codes, --pretty, --ignore-missing-imports, --explicit-package-bases] - files: ^dbt/adapters - language: system +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.10.0 + hooks: + - id: mypy + args: + - --show-error-codes + - --ignore-missing-imports + - --explicit-package-bases + - --warn-unused-ignores + - --disallow-untyped-defs + - --pretty + files: ^dbt/adapters + additional_dependencies: + - types-pytz + - types-requests diff --git a/Makefile b/Makefile index 2bd1055fa..af3a51541 100644 --- a/Makefile +++ b/Makefile @@ -11,31 +11,10 @@ dev-uninstall: ## Uninstalls all packages while maintaining the virtual environm pip freeze | grep -v "^-e" | cut -d "@" -f1 | xargs pip uninstall -y pip uninstall -y dbt-spark -.PHONY: mypy -mypy: ## Runs mypy against staged changes for static type checking. - @\ - pre-commit run --hook-stage manual mypy-check | grep -v "INFO" - -.PHONY: flake8 -flake8: ## Runs flake8 against staged changes to enforce style guide. - @\ - pre-commit run --hook-stage manual flake8-check | grep -v "INFO" - -.PHONY: black -black: ## Runs black against staged changes to enforce style guide. - @\ - pre-commit run --hook-stage manual black-check -v | grep -v "INFO" - .PHONY: lint lint: ## Runs flake8 and mypy code checks against staged changes. @\ - pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \ - pre-commit run mypy-check --hook-stage manual | grep -v "INFO" - -.PHONY: linecheck -linecheck: ## Checks for all Python lines 100 characters or more - @\ - find dbt -type f -name "*.py" -exec grep -I -r -n '.\{100\}' {} \; + pre-commit run --all-files .PHONY: unit unit: ## Runs unit tests with py38. @@ -47,9 +26,7 @@ test: ## Runs unit tests with py38 and code checks against staged changes. @\ python -m pytest tests/unit; \ python dagger/run_dbt_spark_tests.py --profile spark_session \ - pre-commit run black-check --hook-stage manual | grep -v "INFO"; \ - pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \ - pre-commit run mypy-check --hook-stage manual | grep -v "INFO" + pre-commit run --all-files .PHONY: clean @echo "cleaning repo" diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py index c25ba40d5..6ecc5eccf 100644 --- a/dbt/adapters/spark/__init__.py +++ b/dbt/adapters/spark/__init__.py @@ -8,5 +8,5 @@ from dbt.include import spark Plugin = AdapterPlugin( - adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH # type: ignore + adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH ) diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py index 39f6f529e..98fa24a17 100644 --- a/dbt/adapters/spark/column.py +++ b/dbt/adapters/spark/column.py @@ -21,7 +21,7 @@ class SparkColumn(dbtClassMixin, Column): def translate_type(cls, dtype: str) -> str: return dtype - def can_expand_to(self: Self, other_column: Self) -> bool: # type: ignore + def can_expand_to(self: Self, other_column: Self) -> bool: """returns True if both columns are strings""" return self.is_string() and other_column.is_string() diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 83048f921..0405eaf5b 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -65,9 +65,9 @@ class SparkConnectionMethod(StrEnum): @dataclass class SparkCredentials(Credentials): host: Optional[str] = None - schema: Optional[str] = None # type: ignore + schema: Optional[str] = None method: SparkConnectionMethod = None # type: ignore - database: Optional[str] = None # type: ignore + database: Optional[str] = None driver: Optional[str] = None cluster: Optional[str] = None endpoint: Optional[str] = None @@ -568,7 +568,7 @@ def open(cls, connection: Connection) -> Connection: return connection @classmethod - def data_type_code_to_name(cls, type_code: Union[type, str]) -> str: # type: ignore + def data_type_code_to_name(cls, type_code: Union[type, str]) -> str: """ :param Union[type, str] type_code: The sql to execute. * type_code is a python type (!) in pyodbc https://github.com/mkleehammer/pyodbc/wiki/Cursor#description, and a string for other spark runtimes. diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 9a1a7ec06..255ab7806 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -151,7 +151,7 @@ def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str: def convert_datetime_type(cls, agate_table: agate.Table, col_idx: int) -> str: return "timestamp" - def quote(self, identifier: str) -> str: # type: ignore + def quote(self, identifier: str) -> str: return "`{}`".format(identifier) def _get_relation_information(self, row: agate.Row) -> RelationInfo: diff --git a/dev-requirements.txt b/dev-requirements.txt index 26522411a..f86fb5ef4 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -5,30 +5,22 @@ git+https://github.com/dbt-labs/dbt-common.git git+https://github.com/dbt-labs/dbt-adapters.git git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter -# if version 1.x or greater -> pin to major version -# if version 0.x -> pin to minor -black>=24.3 -bumpversion~=0.6.0 -click~=8.1 -flake8~=6.1;python_version>="3.8" -flaky~=3.7 -freezegun~=1.3 +# dev ipdb~=0.13.13 -mypy==1.7.1 # patch updates have historically introduced breaking changes -pip-tools~=7.3 -pre-commit~=3.5 -pre-commit-hooks~=4.5 +pre-commit==3.7.0;python_version >="3.9" +pre-commit==3.5.0;python_version <"3.9" + +# test +freezegun~=1.3 +mock~=5.1 pytest~=7.4 pytest-csv~=3.0 pytest-dotenv~=0.5.2 pytest-logbook~=1.2 pytest-xdist~=3.5 -pytz~=2023.3 -types-pytz~=2023.3 -types-requests~=2.31 +thrift_sasl~=0.4.3 + +# build +bumpversion~=0.6.0 twine~=4.0 wheel~=0.42 - -# Adapter specific dependencies -mock~=5.1 -thrift_sasl~=0.4.3 diff --git a/tests/unit/utils.py b/tests/unit/utils.py index 17cd3ee78..d080242cc 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -2,6 +2,7 @@ Note that all imports should be inside the functions to avoid import/mocking issues. """ + import string import os from unittest import mock From 1f4442a4dcf9719b1670278760772a07506681fc Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Fri, 10 May 2024 11:37:55 -0400 Subject: [PATCH 31/44] [Bug] Fix constraints enforcement rollover test (#1041) * unskip offending test case * update help in example test env file * install pyodbc from source to support arm machines * allow for both col and column --- requirements.txt | 2 +- test.env.example | 16 ++++++---------- tests/functional/adapter/test_constraints.py | 13 +------------ 3 files changed, 8 insertions(+), 23 deletions(-) diff --git a/requirements.txt b/requirements.txt index 18ccc77fd..40335fb8a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ pyhive[hive_pure_sasl]~=0.7.0 requests>=2.28.1 -pyodbc~=5.0.1 +pyodbc~=5.0.1 --no-binary pyodbc sqlparams>=3.0.0 thrift>=0.13.0 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/test.env.example b/test.env.example index e69f700b7..140da4042 100644 --- a/test.env.example +++ b/test.env.example @@ -1,13 +1,9 @@ -# Cluster ID -DBT_DATABRICKS_CLUSTER_NAME= -# SQL Endpoint -DBT_DATABRICKS_ENDPOINT= -# Server Hostname value -DBT_DATABRICKS_HOST_NAME= -# personal token -DBT_DATABRICKS_TOKEN= -# file path to local ODBC driver -ODBC_DRIVER= +# databricks credentials +DBT_DATABRICKS_HOST_NAME=<{this value}.cloud.databricks.com>.cloud.databricks.com +DBT_DATABRICKS_TOKEN= +DBT_DATABRICKS_CLUSTER_NAME= +ODBC_DRIVER= +DBT_DATABRICKS_ENDPOINT= # users for testing 'grants' functionality DBT_TEST_USER_1= diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py index 654bbdb5f..e35a13a64 100644 --- a/tests/functional/adapter/test_constraints.py +++ b/tests/functional/adapter/test_constraints.py @@ -316,7 +316,7 @@ def expected_error_messages(self): "violate the new NOT NULL constraint", "(id > 0) violated by row with values:", # incremental mats "DELTA_VIOLATE_CONSTRAINT_WITH_VALUES", # incremental mats - "NOT NULL constraint violated for column", + "NOT NULL constraint violated for col", ] def assert_expected_error_messages(self, error_message, expected_error_messages): @@ -358,17 +358,6 @@ def models(self): "constraints_schema.yml": constraints_yml, } - @pytest.mark.skip( - "Databricks now raises an exception, which gets raised prior to the `expected_pass` check." - "See https://github.com/dbt-labs/dbt-spark/issues/1009" - ) - def test__constraints_enforcement_rollback( - self, project, expected_color, expected_error_messages, null_model_sql - ): - super().test__constraints_enforcement_rollback( - project, expected_color, expected_error_messages, null_model_sql - ) - # TODO: Like the tests above, this does test that model-level constraints don't # result in errors, but it does not verify that they are actually present in From 94af50e683c7c4466708e40e29ce38a51ee102b8 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Mon, 13 May 2024 17:24:38 -0600 Subject: [PATCH 32/44] Cross-database `date` macro (#1030) * Cross-database `date` macro * Temporary dev requirements for testing * Update changelog entry * Revert "Temporary dev requirements for testing" This reverts commit 529461f475c5a639a6d545ff61ab32d91616fd2a. --- .changes/unreleased/Features-20240501-151904.yaml | 6 ++++++ dbt/include/spark/macros/utils/date.sql | 5 +++++ tests/functional/adapter/utils/test_utils.py | 5 +++++ 3 files changed, 16 insertions(+) create mode 100644 .changes/unreleased/Features-20240501-151904.yaml create mode 100644 dbt/include/spark/macros/utils/date.sql diff --git a/.changes/unreleased/Features-20240501-151904.yaml b/.changes/unreleased/Features-20240501-151904.yaml new file mode 100644 index 000000000..ed08f1251 --- /dev/null +++ b/.changes/unreleased/Features-20240501-151904.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Cross-database `date` macro +time: 2024-05-01T15:19:04.822157-06:00 +custom: + Author: dbeatty10 + Issue: 1031 diff --git a/dbt/include/spark/macros/utils/date.sql b/dbt/include/spark/macros/utils/date.sql new file mode 100644 index 000000000..a76e995b3 --- /dev/null +++ b/dbt/include/spark/macros/utils/date.sql @@ -0,0 +1,5 @@ +{% macro spark__date(year, month, day) -%} + {%- set dt = modules.datetime.date(year, month, day) -%} + {%- set iso_8601_formatted_date = dt.strftime('%Y-%m-%d') -%} + to_date('{{ iso_8601_formatted_date }}', 'yyyy-MM-dd') +{%- endmacro %} diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py index 0dc526564..e8ebb087f 100644 --- a/tests/functional/adapter/utils/test_utils.py +++ b/tests/functional/adapter/utils/test_utils.py @@ -8,6 +8,7 @@ from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText from dbt.tests.adapter.utils.test_concat import BaseConcat from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampNaive +from dbt.tests.adapter.utils.test_date import BaseDate from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd from dbt.tests.adapter.utils.test_datediff import BaseDateDiff from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc @@ -80,6 +81,10 @@ class TestCurrentTimestamp(BaseCurrentTimestampNaive): pass +class TestDate(BaseDate): + pass + + class TestDateAdd(BaseDateAdd): pass From 629ea94098cea5f4c182b123048086cd2a48c767 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Tue, 14 May 2024 12:28:51 -0400 Subject: [PATCH 33/44] [Bug] Fix incremental python tables - dbt can't find temporary table transaction logs (#1042) * unskip test * align test pyodbc version with build deps; add pyspark from build deps * revert change to exclude database and schema from temp relations * retain existing behavior for unit tests by vendoring temp relation into the incremental materialization * changelog entry --- .changes/unreleased/Fixes-20240513-160121.yaml | 7 +++++++ .../macros/materializations/incremental/incremental.sql | 2 +- requirements.txt | 3 ++- tests/functional/adapter/test_python_model.py | 7 ------- 4 files changed, 10 insertions(+), 9 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240513-160121.yaml diff --git a/.changes/unreleased/Fixes-20240513-160121.yaml b/.changes/unreleased/Fixes-20240513-160121.yaml new file mode 100644 index 000000000..fd7072954 --- /dev/null +++ b/.changes/unreleased/Fixes-20240513-160121.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: Fix incremental python models error where Databricks could not find the temp + table transaction logs +time: 2024-05-13T16:01:21.255833-04:00 +custom: + Author: mikealfare + Issue: "1033" diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql index 10d4f3ed8..9a66bab51 100644 --- a/dbt/include/spark/macros/materializations/incremental/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql @@ -16,7 +16,7 @@ {%- set incremental_predicates = config.get('predicates', none) or config.get('incremental_predicates', none) -%} {%- set target_relation = this -%} {%- set existing_relation = load_relation(this) -%} - {%- set tmp_relation = make_temp_relation(this) -%} + {% set tmp_relation = this.incorporate(path = {"identifier": this.identifier ~ '__dbt_tmp'}) -%} {#-- for SQL model we will create temp view that doesn't have database and schema --#} {%- if language == 'sql'-%} diff --git a/requirements.txt b/requirements.txt index 40335fb8a..b32884c43 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,10 @@ pyhive[hive_pure_sasl]~=0.7.0 requests>=2.28.1 -pyodbc~=5.0.1 --no-binary pyodbc +pyodbc~=4.0.39 --no-binary pyodbc sqlparams>=3.0.0 thrift>=0.13.0 +pyspark>=3.0.0,<4.0.0 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability types-PyYAML diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py index 05e25c5f4..1195cbd3e 100644 --- a/tests/functional/adapter/test_python_model.py +++ b/tests/functional/adapter/test_python_model.py @@ -24,13 +24,6 @@ class TestPythonIncrementalModelSpark(BasePythonIncrementalTests): def project_config_update(self): return {} - @pytest.mark.skip( - "Databricks can't find the transaction log" - "See https://github.com/dbt-labs/dbt-spark/issues/1033" - ) - def test_incremental(self, project): - super().test_incremental(project) - models__simple_python_model = """ import pandas From 9b3dd3f7eb6d0f5379239342cdf25052bfb7eb9a Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Tue, 14 May 2024 13:04:41 -0600 Subject: [PATCH 34/44] Import relevant pytest(s) for cross-database `cast` macro (#1029) Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com> --- .changes/unreleased/Features-20240430-185723.yaml | 6 ++++++ tests/functional/adapter/utils/test_utils.py | 5 +++++ 2 files changed, 11 insertions(+) create mode 100644 .changes/unreleased/Features-20240430-185723.yaml diff --git a/.changes/unreleased/Features-20240430-185723.yaml b/.changes/unreleased/Features-20240430-185723.yaml new file mode 100644 index 000000000..cb86e7966 --- /dev/null +++ b/.changes/unreleased/Features-20240430-185723.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add tests for cross-database `cast` macro +time: 2024-04-30T18:57:23.881246-06:00 +custom: + Author: dbeatty10 + Issue: "1028" diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py index e8ebb087f..0285f7c4e 100644 --- a/tests/functional/adapter/utils/test_utils.py +++ b/tests/functional/adapter/utils/test_utils.py @@ -5,6 +5,7 @@ from dbt.tests.adapter.utils.test_array_construct import BaseArrayConstruct from dbt.tests.adapter.utils.test_any_value import BaseAnyValue from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr +from dbt.tests.adapter.utils.test_cast import BaseCast from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText from dbt.tests.adapter.utils.test_concat import BaseConcat from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampNaive @@ -67,6 +68,10 @@ class TestBoolOr(BaseBoolOr): pass +class TestCast(BaseCast): + pass + + class TestCastBoolToText(BaseCastBoolToText): pass From 6f8ff606926b5bb3f986a0eb0b6007bff35476da Mon Sep 17 00:00:00 2001 From: Jeremy CohenDate: Tue, 14 May 2024 23:53:46 +0200 Subject: [PATCH 35/44] Bump deps on common, adapters, core (#1045) Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com> --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index a7621d587..fd985eba4 100644 --- a/setup.py +++ b/setup.py @@ -65,10 +65,10 @@ def _get_plugin_version_dict(): include_package_data=True, install_requires=[ "sqlparams>=3.0.0", - "dbt-common>=0.1.0a1,<2.0", - "dbt-adapters>=0.1.0a1,<2.0", + "dbt-common>=1.0.4,<2.0", + "dbt-adapters>=1.1.1,<2.0", # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency - "dbt-core>=1.8.0a1", + "dbt-core>=1.8.0", ], extras_require={ "ODBC": odbc_extras, From 944dbea08de9a8069a39ad92f4beecb2cbec608c Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Tue, 21 May 2024 00:57:20 -0400 Subject: [PATCH 36/44] Add docker release to the full release process for final releases (#1004) * add docker release to release pipeline * update docker release to align with other adapters, add dev docker * remove defaulted input for docker package, override default for docker release image * fix docker release dependent steps * only release docker when not testing, allow to only release to docker * remove dev container * remove test script * rename the spark Dockerfile to make space for the release Dockerfile * move the release Dockerfile into ./docker --------- Co-authored-by: Emily Rockman --- .github/dependabot.yml | 5 +++ .github/workflows/release.yml | 64 ++++++++++++++++--------------- Makefile | 4 ++ docker-compose.yml | 4 +- docker/Dockerfile | 72 ++++++++++++++++++++--------------- docker/README.md | 70 ++++++++++++++++++++++++++++++++++ docker/spark.Dockerfile | 30 +++++++++++++++ 7 files changed, 187 insertions(+), 62 deletions(-) create mode 100644 docker/README.md create mode 100644 docker/spark.Dockerfile diff --git a/.github/dependabot.yml b/.github/dependabot.yml index ae2be43aa..fc44c9fe5 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -15,3 +15,8 @@ updates: schedule: interval: "weekly" rebase-strategy: "disabled" + - package-ecosystem: "docker" + directory: "/docker-dev" + schedule: + interval: "weekly" + rebase-strategy: "disabled" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9b2774f17..cdbdaa13f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,8 +13,8 @@ # This will only run manually. Run this workflow only after the # version bump workflow is completed and related changes are reviewed and merged. # - -name: Release to GitHub and PyPI +name: "Release to GitHub, PyPI, and Docker" +run-name: "Release ${{ inputs.version_number }} to GitHub, PyPI, and Docker" on: workflow_dispatch: @@ -56,6 +56,11 @@ on: type: boolean default: true required: false + only_docker: + description: "Only release Docker image, skip GitHub & PyPI" + type: boolean + default: false + required: false permissions: contents: write # this is the permission that allows creating a new release @@ -66,7 +71,7 @@ defaults: jobs: log-inputs: - name: Log Inputs + name: "Log Inputs" runs-on: ubuntu-latest steps: - name: "[DEBUG] Print Variables" @@ -79,6 +84,7 @@ jobs: echo AWS S3 bucket name: ${{ inputs.s3_bucket_name }} echo Package test command: ${{ inputs.package_test_command }} echo Test run: ${{ inputs.test_run }} + echo Only Docker: ${{ inputs.only_docker }} # The Spark repository uses CircleCI to run integration tests. # Because of this, the process of version bumps will be manual @@ -87,27 +93,21 @@ jobs: # We are passing `env_setup_script_path` as an empty string # so that the integration tests stage will be skipped. audit-version-and-changelog: - name: Bump package version, Generate changelog - + name: "Bump package version, Generate changelog" uses: dbt-labs/dbt-spark/.github/workflows/release-prep.yml@main - with: sha: ${{ inputs.sha }} version_number: ${{ inputs.version_number }} target_branch: ${{ inputs.target_branch }} env_setup_script_path: "" test_run: ${{ inputs.test_run }} - secrets: inherit log-outputs-audit-version-and-changelog: name: "[Log output] Bump package version, Generate changelog" - if: ${{ !failure() && !cancelled() }} - + if: ${{ !failure() && !cancelled() && !inputs.only_docker }} needs: [audit-version-and-changelog] - runs-on: ubuntu-latest - steps: - name: Print variables run: | @@ -115,12 +115,10 @@ jobs: echo Changelog path: ${{ needs.audit-version-and-changelog.outputs.changelog_path }} build-test-package: - name: Build, Test, Package - if: ${{ !failure() && !cancelled() }} + name: "Build, Test, Package" + if: ${{ !failure() && !cancelled() && !inputs.only_docker }} needs: [audit-version-and-changelog] - uses: dbt-labs/dbt-release/.github/workflows/build.yml@main - with: sha: ${{ needs.audit-version-and-changelog.outputs.final_sha }} version_number: ${{ inputs.version_number }} @@ -129,19 +127,15 @@ jobs: s3_bucket_name: ${{ inputs.s3_bucket_name }} package_test_command: ${{ inputs.package_test_command }} test_run: ${{ inputs.test_run }} - secrets: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} github-release: - name: GitHub Release - if: ${{ !failure() && !cancelled() }} - + name: "GitHub Release" + if: ${{ !failure() && !cancelled() && !inputs.only_docker }} needs: [audit-version-and-changelog, build-test-package] - uses: dbt-labs/dbt-release/.github/workflows/github-release.yml@main - with: sha: ${{ needs.audit-version-and-changelog.outputs.final_sha }} version_number: ${{ inputs.version_number }} @@ -149,35 +143,43 @@ jobs: test_run: ${{ inputs.test_run }} pypi-release: - name: PyPI Release - + name: "PyPI Release" + if: ${{ !failure() && !cancelled() && !inputs.only_docker }} needs: [github-release] - uses: dbt-labs/dbt-release/.github/workflows/pypi-release.yml@main - with: version_number: ${{ inputs.version_number }} test_run: ${{ inputs.test_run }} - secrets: PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }} + docker-release: + name: "Docker Release" + # We cannot release to docker on a test run because it uses the tag in GitHub as + # what we need to release but draft releases don't actually tag the commit so it + # finds nothing to release + if: ${{ !failure() && !cancelled() && (!inputs.test_run || inputs.only_docker) }} + needs: [github-release] + permissions: + packages: write + uses: dbt-labs/dbt-release/.github/workflows/release-docker.yml@main + with: + version_number: ${{ inputs.version_number }} + dockerfile: "docker/Dockerfile" + test_run: ${{ inputs.test_run }} + slack-notification: name: Slack Notification if: ${{ failure() && (!inputs.test_run || inputs.nightly_release) }} - needs: [ - audit-version-and-changelog, - build-test-package, github-release, pypi-release, + docker-release, ] - uses: dbt-labs/dbt-release/.github/workflows/slack-post-notification.yml@main with: status: "failure" - secrets: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_DEV_CORE_ALERTS }} diff --git a/Makefile b/Makefile index af3a51541..ff4c0fc1b 100644 --- a/Makefile +++ b/Makefile @@ -38,3 +38,7 @@ help: ## Show this help message. @echo @echo 'targets:' @grep -E '^[7+a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +.PHONY: docker-prod +docker-prod: + docker build -f docker/Dockerfile -t dbt-spark . diff --git a/docker-compose.yml b/docker-compose.yml index ad083eaf4..cd3e1c776 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,9 @@ version: "3.7" services: dbt-spark3-thrift: - build: docker/ + build: + context: ./docker + dockerfile: spark.Dockerfile ports: - "10000:10000" - "4040:4040" diff --git a/docker/Dockerfile b/docker/Dockerfile index bb4d378ed..ef4574ddd 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,30 +1,42 @@ -ARG OPENJDK_VERSION=8 -FROM eclipse-temurin:${OPENJDK_VERSION}-jre - -ARG BUILD_DATE -ARG SPARK_VERSION=3.3.2 -ARG HADOOP_VERSION=3 - -LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \ - org.label-schema.build-date=$BUILD_DATE \ - org.label-schema.version=$SPARK_VERSION - -ENV SPARK_HOME /usr/spark -ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}" - -RUN apt-get update && \ - apt-get install -y wget netcat procps libpostgresql-jdbc-java && \ - wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ - tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ - rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ - mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \ - ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \ - apt-get remove -y wget && \ - apt-get autoremove -y && \ - apt-get clean - -COPY entrypoint.sh /scripts/ -RUN chmod +x /scripts/entrypoint.sh - -ENTRYPOINT ["/scripts/entrypoint.sh"] -CMD ["--help"] +# this image gets published to GHCR for production use +ARG py_version=3.11.2 + +FROM python:$py_version-slim-bullseye as base + +RUN apt-get update \ + && apt-get dist-upgrade -y \ + && apt-get install -y --no-install-recommends \ + build-essential=12.9 \ + ca-certificates=20210119 \ + gcc=4:10.2.1-1 \ + git=1:2.30.2-1+deb11u2 \ + libpq-dev=13.14-0+deb11u1 \ + libsasl2-dev=2.1.27+dfsg-2.1+deb11u1 \ + make=4.3-4.1 \ + openssh-client=1:8.4p1-5+deb11u3 \ + python-dev-is-python2=2.7.18-9 \ + software-properties-common=0.96.20.2-2.1 \ + unixodbc-dev=2.3.6-0.1+b1 \ + && apt-get clean \ + && rm -rf \ + /var/lib/apt/lists/* \ + /tmp/* \ + /var/tmp/* + +ENV PYTHONIOENCODING=utf-8 +ENV LANG=C.UTF-8 + +RUN python -m pip install --upgrade "pip==24.0" "setuptools==69.2.0" "wheel==0.43.0" --no-cache-dir + + +FROM base as dbt-spark + +ARG commit_ref=main +ARG extras=all + +HEALTHCHECK CMD dbt --version || exit 1 + +WORKDIR /usr/app/dbt/ +ENTRYPOINT ["dbt"] + +RUN python -m pip install --no-cache-dir "dbt-spark[${extras}] @ git+https://github.com/dbt-labs/dbt-spark@${commit_ref}" diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 000000000..42ca5e227 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,70 @@ +# Docker for dbt +`Dockerfile` is suitable for building dbt Docker images locally or using with CI/CD to automate populating a container registry. + +## Building an image: +This Dockerfile can create images for the following target: `dbt-spark` + +In order to build a new image, run the following docker command. +```shell +docker build --tag --target dbt-spark +``` +--- +> **Note:** Docker must be configured to use [BuildKit](https://docs.docker.com/develop/develop-images/build_enhancements/) in order for images to build properly! + +--- + +By default the image will be populated with the latest version of `dbt-spark` on `main`. +If you need to use a different version you can specify it by git ref using the `--build-arg` flag: +```shell +docker build --tag \ + --target dbt-spark \ + --build-arg commit_ref= \ + +``` + +### Examples: +To build an image named "my-dbt" that supports Snowflake using the latest releases: +```shell +cd dbt-core/docker +docker build --tag my-dbt --target dbt-spark . +``` + +To build an image named "my-other-dbt" that supports Snowflake using the adapter version 1.0.0b1: +```shell +cd dbt-core/docker +docker build \ + --tag my-other-dbt \ + --target dbt-spark \ + --build-arg commit_ref=v1.0.0b1 \ + . +``` + +## Special cases +There are a few special cases worth noting: +* The `dbt-spark` database adapter comes in three different versions named `PyHive`, `ODBC`, and the default `all`. +If you wish to override this you can use the `--build-arg` flag with the value of `extras= `. +See the [docs](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile) for more information. +```shell +docker build --tag my_dbt \ + --target dbt-spark \ + --build-arg commit_ref=v1.0.0b1 \ + --build-arg extras=PyHive \ + +``` + +## Running an image in a container: +The `ENTRYPOINT` for this Dockerfile is the command `dbt` so you can bind-mount your project to `/usr/app` and use dbt as normal: +```shell +docker run \ + --network=host \ + --mount type=bind,source=path/to/project,target=/usr/app \ + --mount type=bind,source=path/to/profiles.yml,target=/root/.dbt/profiles.yml \ + my-dbt \ + ls +``` +--- +**Notes:** +* Bind-mount sources _must_ be an absolute path +* You may need to make adjustments to the docker networking setting depending on the specifics of your data warehouse/database host. + +--- diff --git a/docker/spark.Dockerfile b/docker/spark.Dockerfile new file mode 100644 index 000000000..bb4d378ed --- /dev/null +++ b/docker/spark.Dockerfile @@ -0,0 +1,30 @@ +ARG OPENJDK_VERSION=8 +FROM eclipse-temurin:${OPENJDK_VERSION}-jre + +ARG BUILD_DATE +ARG SPARK_VERSION=3.3.2 +ARG HADOOP_VERSION=3 + +LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \ + org.label-schema.build-date=$BUILD_DATE \ + org.label-schema.version=$SPARK_VERSION + +ENV SPARK_HOME /usr/spark +ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}" + +RUN apt-get update && \ + apt-get install -y wget netcat procps libpostgresql-jdbc-java && \ + wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ + tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ + rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ + mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \ + ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \ + apt-get remove -y wget && \ + apt-get autoremove -y && \ + apt-get clean + +COPY entrypoint.sh /scripts/ +RUN chmod +x /scripts/entrypoint.sh + +ENTRYPOINT ["/scripts/entrypoint.sh"] +CMD ["--help"] From 7850da3d542608128ad80c41e03972649943d449 Mon Sep 17 00:00:00 2001 From: Mila Page <67295367+VersusFacit@users.noreply.github.com> Date: Fri, 14 Jun 2024 05:00:32 +0000 Subject: [PATCH 37/44] Adap 1049/lazy load agate (#1050) * Add changelog * Lazy load agate. * More comments on types and lint. --------- Co-authored-by: Mila Page --- .../Under the Hood-20240612-195629.yaml | 6 +++ dbt/adapters/spark/impl.py | 39 ++++++++++++------- 2 files changed, 30 insertions(+), 15 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20240612-195629.yaml diff --git a/.changes/unreleased/Under the Hood-20240612-195629.yaml b/.changes/unreleased/Under the Hood-20240612-195629.yaml new file mode 100644 index 000000000..c90ebcdab --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240612-195629.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Lazy load agate to improve performance +time: 2024-06-12T19:56:29.943204-07:00 +custom: + Author: versusfacit + Issue: "1049" diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 255ab7806..d33ebde20 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -14,6 +14,7 @@ Callable, Set, FrozenSet, + TYPE_CHECKING, ) from dbt.adapters.base.relation import InformationSchema @@ -24,7 +25,10 @@ from typing_extensions import TypeAlias -import agate +if TYPE_CHECKING: + # Indirectly imported via agate_helper, which is lazy loaded further downfile. + # Used by mypy for earlier type hints. + import agate from dbt.adapters.base import AdapterConfig, PythonJobHelper from dbt.adapters.base.impl import catch_as_completed, ConstraintSupport @@ -127,34 +131,36 @@ def date_function(cls) -> str: return "current_timestamp()" @classmethod - def convert_text_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_text_type(cls, agate_table: "agate.Table", col_idx: int) -> str: return "string" @classmethod - def convert_number_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_number_type(cls, agate_table: "agate.Table", col_idx: int) -> str: + import agate + decimals = agate_table.aggregate(agate.MaxPrecision(col_idx)) return "double" if decimals else "bigint" @classmethod - def convert_integer_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_integer_type(cls, agate_table: "agate.Table", col_idx: int) -> str: return "bigint" @classmethod - def convert_date_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_date_type(cls, agate_table: "agate.Table", col_idx: int) -> str: return "date" @classmethod - def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_time_type(cls, agate_table: "agate.Table", col_idx: int) -> str: return "time" @classmethod - def convert_datetime_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_datetime_type(cls, agate_table: "agate.Table", col_idx: int) -> str: return "timestamp" def quote(self, identifier: str) -> str: return "`{}`".format(identifier) - def _get_relation_information(self, row: agate.Row) -> RelationInfo: + def _get_relation_information(self, row: "agate.Row") -> RelationInfo: """relation info was fetched with SHOW TABLES EXTENDED""" try: _schema, name, _, information = row @@ -165,7 +171,7 @@ def _get_relation_information(self, row: agate.Row) -> RelationInfo: return _schema, name, information - def _get_relation_information_using_describe(self, row: agate.Row) -> RelationInfo: + def _get_relation_information_using_describe(self, row: "agate.Row") -> RelationInfo: """Relation info fetched using SHOW TABLES and an auxiliary DESCRIBE statement""" try: _schema, name, _ = row @@ -193,8 +199,8 @@ def _get_relation_information_using_describe(self, row: agate.Row) -> RelationIn def _build_spark_relation_list( self, - row_list: agate.Table, - relation_info_func: Callable[[agate.Row], RelationInfo], + row_list: "agate.Table", + relation_info_func: Callable[["agate.Row"], RelationInfo], ) -> List[BaseRelation]: """Aggregate relations with format metadata included.""" relations = [] @@ -370,7 +376,7 @@ def get_catalog( self, relation_configs: Iterable[RelationConfig], used_schemas: FrozenSet[Tuple[str, str]], - ) -> Tuple[agate.Table, List[Exception]]: + ) -> Tuple["agate.Table", List[Exception]]: schema_map = self._get_catalog_schemas(relation_configs) if len(schema_map) > 1: raise CompilationError( @@ -378,7 +384,7 @@ def get_catalog( ) with executor(self.config) as tpe: - futures: List[Future[agate.Table]] = [] + futures: List[Future["agate.Table"]] = [] for info, schemas in schema_map.items(): for schema in schemas: futures.append( @@ -399,7 +405,7 @@ def _get_one_catalog( information_schema: InformationSchema, schemas: Set[str], used_schemas: FrozenSet[Tuple[str, str]], - ) -> agate.Table: + ) -> "agate.Table": if len(schemas) != 1: raise CompilationError( f"Expected only one schema in spark _get_one_catalog, found " f"{schemas}" @@ -412,6 +418,9 @@ def _get_one_catalog( for relation in self.list_relations(database, schema): logger.debug("Getting table schema for relation {}", str(relation)) columns.extend(self._get_columns_for_catalog(relation)) + + import agate + return agate.Table.from_object(columns, column_types=DEFAULT_TYPE_TESTER) def check_schema_exists(self, database: str, schema: str) -> bool: @@ -486,7 +495,7 @@ def python_submission_helpers(self) -> Dict[str, Type[PythonJobHelper]]: "all_purpose_cluster": AllPurposeClusterPythonJobHelper, } - def standardize_grants_dict(self, grants_table: agate.Table) -> dict: + def standardize_grants_dict(self, grants_table: "agate.Table") -> dict: grants_dict: Dict[str, List[str]] = {} for row in grants_table: grantee = row["Principal"] From ba5b3f05bb699c9be59b39661cb8186ce7175587 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Fri, 21 Jun 2024 16:53:38 -0400 Subject: [PATCH 38/44] skip broken tests, link to the issue for resolution (#1056) --- tests/functional/adapter/test_python_model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py index 1195cbd3e..957361cb7 100644 --- a/tests/functional/adapter/test_python_model.py +++ b/tests/functional/adapter/test_python_model.py @@ -15,7 +15,9 @@ class TestPythonModelSpark(BasePythonModelTests): @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint") class TestPySpark(BasePySparkTests): - pass + @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054") + def test_different_dataframes(self, project): + return super().test_different_dataframes(project) @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint") @@ -69,6 +71,7 @@ class TestChangingSchemaSpark: def models(self): return {"simple_python_model.py": models__simple_python_model} + @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054") def test_changing_schema_with_log_validation(self, project, logs_dir): run_dbt(["run"]) write_file( From cd6efba4f006f7bc3de761a02717ff9261b736a1 Mon Sep 17 00:00:00 2001 From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com> Date: Fri, 21 Jun 2024 16:50:48 -0500 Subject: [PATCH 39/44] update user docs-issue workflow (#1051) * update user docs-issue workflow * pre-commit fix * update workflow based onf feedback * whitespace * update to match bigquery * pin numpy to below 2.0 new release * remove numpy pin for its own pr --- .github/workflows/docs-issues.yml | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/.github/workflows/docs-issues.yml b/.github/workflows/docs-issues.yml index 00a098df8..f49cf517c 100644 --- a/.github/workflows/docs-issues.yml +++ b/.github/workflows/docs-issues.yml @@ -1,19 +1,18 @@ # **what?** -# Open an issue in docs.getdbt.com when a PR is labeled `user docs` +# Open an issue in docs.getdbt.com when an issue is labeled `user docs` and closed as completed # **why?** # To reduce barriers for keeping docs up to date # **when?** -# When a PR is labeled `user docs` and is merged. Runs on pull_request_target to run off the workflow already merged, -# not the workflow that existed on the PR branch. This allows old PRs to get comments. +# When an issue is labeled `user docs` and is closed as completed. Can be labeled before or after the issue is closed. -name: Open issues in docs.getdbt.com repo when a PR is labeled -run-name: "Open an issue in docs.getdbt.com for PR #${{ github.event.pull_request.number }}" +name: Open issues in docs.getdbt.com repo when an issue is labeled +run-name: "Open an issue in docs.getdbt.com for issue #${{ github.event.issue.number }}" on: - pull_request_target: + issues: types: [labeled, closed] defaults: @@ -21,23 +20,22 @@ defaults: shell: bash permissions: - issues: write # opens new issues - pull-requests: write # comments on PRs - + issues: write # comments on issues jobs: open_issues: - # we only want to run this when the PR has been merged or the label in the labeled event is `user docs`. Otherwise it runs the + # we only want to run this when the issue is closed as completed and the label `user docs` has been assigned. + # If this logic does not exist in this workflow, it runs the # risk of duplicaton of issues being created due to merge and label both triggering this workflow to run and neither having # generating the comment before the other runs. This lives here instead of the shared workflow because this is where we # decide if it should run or not. if: | - (github.event.pull_request.merged == true) && - ((github.event.action == 'closed' && contains( github.event.pull_request.labels.*.name, 'user docs')) || + (github.event.issue.state == 'closed' && github.event.issue.state_reason == 'completed') && ( + (github.event.action == 'closed' && contains(github.event.issue.labels.*.name, 'user docs')) || (github.event.action == 'labeled' && github.event.label.name == 'user docs')) uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main with: issue_repository: "dbt-labs/docs.getdbt.com" - issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} PR #${{ github.event.pull_request.number }}" + issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}" issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated." secrets: inherit From 50634b9c6038016f888ea84be542d9a84e52a141 Mon Sep 17 00:00:00 2001 From: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Mon, 24 Jun 2024 09:09:25 -0700 Subject: [PATCH 40/44] update spark internal-release workflow (#1052) Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com> --- .github/workflows/release-internal.yml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml index eb892415c..d4e7a3c93 100644 --- a/.github/workflows/release-internal.yml +++ b/.github/workflows/release-internal.yml @@ -10,15 +10,12 @@ # # Manual trigger. -name: "Release internal patch" +name: "Release to Cloud" +run-name: "Release to Cloud off of ${{ inputs.ref }}" on: workflow_dispatch: inputs: - version_number: - description: "The release version number (i.e. 1.0.0b1)" - type: string - required: true ref: description: "The ref (sha or branch name) to use" type: string @@ -29,6 +26,11 @@ on: type: string default: "python -c \"import dbt.adapters.spark\"" required: true + skip_tests: + description: "Should the tests be skipped? (default to false)" + type: boolean + required: true + default: false defaults: run: @@ -129,15 +131,14 @@ jobs: run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }} invoke-reusable-workflow: - name: "Build and Release Internally" + name: "Create cloud release" needs: [run-integration-tests] - uses: "dbt-labs/dbt-release/.github/workflows/internal-archive-release.yml@main" with: - version_number: "${{ inputs.version_number }}" package_test_command: "${{ inputs.package_test_command }}" dbms_name: "spark" ref: "${{ inputs.ref }}" + skip_tests: "${{ inputs.skip_tests }}" secrets: "inherit" From 824ca0f2249d145234f21d7e4066e033a273e2e2 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Wed, 26 Jun 2024 12:06:32 -0400 Subject: [PATCH 41/44] Update the spark version to the current version (#1055) * update the spark version to the current version * update pin for pydantic to resolve https://github.com/explosion/spaCy/issues/12659 * exclude koalas dataframes from test --- tests/functional/adapter/test_python_model.py | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py index 957361cb7..cd798d1da 100644 --- a/tests/functional/adapter/test_python_model.py +++ b/tests/functional/adapter/test_python_model.py @@ -15,9 +15,22 @@ class TestPythonModelSpark(BasePythonModelTests): @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint") class TestPySpark(BasePySparkTests): - @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054") def test_different_dataframes(self, project): - return super().test_different_dataframes(project) + """ + Test that python models are supported using dataframes from: + - pandas + - pyspark + - pyspark.pandas (formerly dataspark.koalas) + + Note: + The CI environment is on Apache Spark >3.1, which includes koalas as pyspark.pandas. + The only Databricks runtime that supports Apache Spark <=3.1 is 9.1 LTS, which is EOL 2024-09-23. + For more information, see: + - https://github.com/databricks/koalas + - https://docs.databricks.com/en/release-notes/runtime/index.html + """ + results = run_dbt(["run", "--exclude", "koalas_df"]) + assert len(results) == 3 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint") @@ -37,7 +50,7 @@ def model(dbt, spark): materialized='table', submission_method='job_cluster', job_cluster_config={ - "spark_version": "7.3.x-scala2.12", + "spark_version": "12.2.x-scala2.12", "node_type_id": "i3.xlarge", "num_workers": 0, "spark_conf": { @@ -48,7 +61,7 @@ def model(dbt, spark): "ResourceClass": "SingleNode" } }, - packages=['spacy', 'torch', 'pydantic<1.10.3'] + packages=['spacy', 'torch', 'pydantic>=1.10.8'] ) data = [[1,2]] * 10 return spark.createDataFrame(data, schema=['test', 'test2']) @@ -67,11 +80,23 @@ def model(dbt, spark): @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint") class TestChangingSchemaSpark: + """ + Confirm that we can setup a spot instance and parse required packages into the Databricks job. + + Notes: + - This test generates a spot instance on demand using the settings from `job_cluster_config` + in `models__simple_python_model` above. It takes several minutes to run due to creating the cluster. + The job can be monitored via "Data Engineering > Job Runs" or "Workflows > Job Runs" + in the Databricks UI (instead of via the normal cluster). + - The `spark_version` argument will need to periodically be updated. It will eventually become + unsupported and start experiencing issues. + - See https://github.com/explosion/spaCy/issues/12659 for why we're pinning pydantic + """ + @pytest.fixture(scope="class") def models(self): return {"simple_python_model.py": models__simple_python_model} - @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054") def test_changing_schema_with_log_validation(self, project, logs_dir): run_dbt(["run"]) write_file( From 034cb6118e808c1c9ad81d3553a136ac94b77781 Mon Sep 17 00:00:00 2001 From: Mila Page <67295367+VersusFacit@users.noreply.github.com> Date: Mon, 8 Jul 2024 12:11:09 -0400 Subject: [PATCH 42/44] Base 207/add test (#1057) * Add test for upstream change. * Skip session since it's not liking the test. * Import pytest to fix skip error. * Dial in tests to reflect error messages from spark. --------- Co-authored-by: Mila Page Co-authored-by: Mike Alfare --- .../adapter/dbt_show/test_dbt_show.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 tests/functional/adapter/dbt_show/test_dbt_show.py diff --git a/tests/functional/adapter/dbt_show/test_dbt_show.py b/tests/functional/adapter/dbt_show/test_dbt_show.py new file mode 100644 index 000000000..bc56fd908 --- /dev/null +++ b/tests/functional/adapter/dbt_show/test_dbt_show.py @@ -0,0 +1,22 @@ +import pytest + +from dbt.tests.adapter.dbt_show.test_dbt_show import ( + BaseShowSqlHeader, + BaseShowLimit, + BaseShowDoesNotHandleDoubleLimit, +) + + +class TestSparkShowLimit(BaseShowLimit): + pass + + +class TestSparkShowSqlHeader(BaseShowSqlHeader): + pass + + +@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_http_cluster") +class TestSparkShowDoesNotHandleDoubleLimit(BaseShowDoesNotHandleDoubleLimit): + """The syntax message is quite variable across clusters, but this hits two at once.""" + + DATABASE_ERROR_MESSAGE = "limit" From 81a7cf25e9880fe1d8114e7e27cd6eacac01093e Mon Sep 17 00:00:00 2001 From: Colin Date: Wed, 10 Jul 2024 16:54:38 -0700 Subject: [PATCH 43/44] delete duplicate fixture --- tests/functional/adapter/test_python_model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py index 1195cbd3e..1e40344ec 100644 --- a/tests/functional/adapter/test_python_model.py +++ b/tests/functional/adapter/test_python_model.py @@ -28,7 +28,6 @@ def project_config_update(self): models__simple_python_model = """ import pandas import torch -import spacy def model(dbt, spark): dbt.config( From 2a31d3f28cc5e2fe6542a62e35b8c654c312f1ab Mon Sep 17 00:00:00 2001 From: Colin Date: Wed, 10 Jul 2024 16:56:47 -0700 Subject: [PATCH 44/44] Revert "delete duplicate fixture" This reverts commit 81a7cf25e9880fe1d8114e7e27cd6eacac01093e. --- tests/functional/adapter/test_python_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py index 99c9fc955..cd798d1da 100644 --- a/tests/functional/adapter/test_python_model.py +++ b/tests/functional/adapter/test_python_model.py @@ -43,6 +43,7 @@ def project_config_update(self): models__simple_python_model = """ import pandas import torch +import spacy def model(dbt, spark): dbt.config(