diff --git a/.bumpversion.cfg b/.bumpversion.cfg index f0c900615..3b3101552 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.6.0b3 +current_version = 1.7.0a1 parse = (?P[\d]+) # major version number \.(?P[\d]+) # minor version number \.(?P[\d]+) # patch version number diff --git a/.changes/0.0.0.md b/.changes/0.0.0.md index 87fa376a8..0bec014d9 100644 --- a/.changes/0.0.0.md +++ b/.changes/0.0.0.md @@ -1,5 +1,6 @@ ## Previous Releases For information on prior major and minor releases, see their changelogs: +- [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md) - [1.5](https://github.com/dbt-labs/dbt-spark/blob/1.5.latest/CHANGELOG.md) - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md) - [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md) diff --git a/.changes/1.6.0-a1.md b/.changes/1.6.0-a1.md deleted file mode 100644 index a748db5a3..000000000 --- a/.changes/1.6.0-a1.md +++ /dev/null @@ -1 +0,0 @@ -## dbt-spark 1.6.0-a1 - April 17, 2023 diff --git a/.changes/1.6.0-b1.md b/.changes/1.6.0-b1.md deleted file mode 100644 index 5c385a7f2..000000000 --- a/.changes/1.6.0-b1.md +++ /dev/null @@ -1,31 +0,0 @@ -## dbt-spark 1.6.0-b1 - May 12, 2023 - -### Features - -- Support insert_overwrite strategy with delta ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013)) - -### Fixes - -- Fixed issue where table materialization was not always properly refreshing for non-admin users on Databricks ([#725](https://github.com/dbt-labs/dbt-spark/issues/725)) -- Remove dead code 💀 ([#758](https://github.com/dbt-labs/dbt-spark/issues/758)) - -### Under the Hood - -- Remove unneeded type ignore ([#753](https://github.com/dbt-labs/dbt-spark/issues/753)) - -### Dependencies - -- Update black requirement from ~=23.1 to ~=23.3 ([#701](https://github.com/dbt-labs/dbt-spark/pull/701)) -- Bump mypy from 1.1.1 to 1.2.0 ([#708](https://github.com/dbt-labs/dbt-spark/pull/708)) -- Update pytest requirement from ~=7.2 to ~=7.3 ([#726](https://github.com/dbt-labs/dbt-spark/pull/726)) -- Update types-pytz requirement from ~=2023.2 to ~=2023.3 ([#704](https://github.com/dbt-labs/dbt-spark/pull/704)) -- Update pip-tools requirement from ~=6.12 to ~=6.13 ([#737](https://github.com/dbt-labs/dbt-spark/pull/737)) -- Update pytz requirement from ~=2023.2 to ~=2023.3 ([#702](https://github.com/dbt-labs/dbt-spark/pull/702)) - -### Dependency - -- Bump pyodbc from 4.0.34 to 4.0.35 ([#417](https://github.com/dbt-labs/dbt-spark/issues/417)) - -### Contributors -- [@Fokko](https://github.com/Fokko) ([#758](https://github.com/dbt-labs/dbt-spark/issues/758), [#753](https://github.com/dbt-labs/dbt-spark/issues/753)) -- [@flvndh](https://github.com/flvndh) ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013)) diff --git a/.changes/1.6.0-b2.md b/.changes/1.6.0-b2.md deleted file mode 100644 index ea758062d..000000000 --- a/.changes/1.6.0-b2.md +++ /dev/null @@ -1,5 +0,0 @@ -## dbt-spark 1.6.0-b2 - May 25, 2023 - -### Features - -- All constraint types are supported, but not enforced. ([#656](https://github.com/dbt-labs/dbt-spark/issues/656), [#657](https://github.com/dbt-labs/dbt-spark/issues/657)) diff --git a/.changes/1.6.0-b3.md b/.changes/1.6.0-b3.md deleted file mode 100644 index 3379fe507..000000000 --- a/.changes/1.6.0-b3.md +++ /dev/null @@ -1,13 +0,0 @@ -## dbt-spark 1.6.0-b3 - June 09, 2023 - -### Features - -- Standardize the _connection_keys and debug_query for `dbt debug`. ([#PR754](https://github.com/dbt-labs/dbt-spark/issues/PR754)) - -### Fixes - -- wrap expression for check constraints in parentheses ([#7480](https://github.com/dbt-labs/dbt-spark/issues/7480)) - -### Under the Hood - -- drop support for python 3.7 ([#792](https://github.com/dbt-labs/dbt-spark/issues/792)) diff --git a/.changes/1.6.0/Dependencies-20230329-102021.yaml b/.changes/1.6.0/Dependencies-20230329-102021.yaml deleted file mode 100644 index c60c4e695..000000000 --- a/.changes/1.6.0/Dependencies-20230329-102021.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update black requirement from ~=23.1 to ~=23.3" -time: 2023-03-29T10:20:21.00000Z -custom: - Author: dependabot[bot] - PR: 701 diff --git a/.changes/1.6.0/Dependencies-20230406-230203.yaml b/.changes/1.6.0/Dependencies-20230406-230203.yaml deleted file mode 100644 index 969d21024..000000000 --- a/.changes/1.6.0/Dependencies-20230406-230203.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.1.1 to 1.2.0" -time: 2023-04-06T23:02:03.00000Z -custom: - Author: dependabot[bot] - PR: 708 diff --git a/.changes/1.6.0/Dependencies-20230421-041623.yaml b/.changes/1.6.0/Dependencies-20230421-041623.yaml deleted file mode 100644 index bd05eb906..000000000 --- a/.changes/1.6.0/Dependencies-20230421-041623.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pytest requirement from ~=7.2 to ~=7.3" -time: 2023-04-21T04:16:23.00000Z -custom: - Author: mikealfare - PR: 726 diff --git a/.changes/1.6.0/Dependencies-20230421-180054.yaml b/.changes/1.6.0/Dependencies-20230421-180054.yaml deleted file mode 100644 index c5522ef63..000000000 --- a/.changes/1.6.0/Dependencies-20230421-180054.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update types-pytz requirement from ~=2023.2 to ~=2023.3" -time: 2023-04-21T18:00:54.00000Z -custom: - Author: dependabot[bot] - PR: 704 diff --git a/.changes/1.6.0/Dependencies-20230421-230051.yaml b/.changes/1.6.0/Dependencies-20230421-230051.yaml deleted file mode 100644 index 2bcd41258..000000000 --- a/.changes/1.6.0/Dependencies-20230421-230051.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pip-tools requirement from ~=6.12 to ~=6.13" -time: 2023-04-21T23:00:51.00000Z -custom: - Author: dependabot[bot] - PR: 737 diff --git a/.changes/1.6.0/Dependencies-20230423-215745.yaml b/.changes/1.6.0/Dependencies-20230423-215745.yaml deleted file mode 100644 index a6f733f7c..000000000 --- a/.changes/1.6.0/Dependencies-20230423-215745.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pytz requirement from ~=2023.2 to ~=2023.3" -time: 2023-04-23T21:57:45.00000Z -custom: - Author: dependabot[bot] - PR: 702 diff --git a/.changes/1.6.0/Dependency-20221116-221906.yaml b/.changes/1.6.0/Dependency-20221116-221906.yaml deleted file mode 100644 index 5e126d078..000000000 --- a/.changes/1.6.0/Dependency-20221116-221906.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: "Dependency" -body: "Bump pyodbc from 4.0.34 to 4.0.35" -time: 2022-11-16T22:19:06.00000Z -custom: - Author: dependabot[bot] - Issue: 417 - PR: 519 diff --git a/.changes/1.6.0/Features-20220812-091652.yaml b/.changes/1.6.0/Features-20220812-091652.yaml deleted file mode 100644 index 58c60aabe..000000000 --- a/.changes/1.6.0/Features-20220812-091652.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Features -body: Support insert_overwrite strategy with delta -time: 2022-08-12T09:16:52.7995122+02:00 -custom: - Author: flvndh - Issue: "1013" - PR: "430" diff --git a/.changes/1.6.0/Features-20230427-123135.yaml b/.changes/1.6.0/Features-20230427-123135.yaml deleted file mode 100644 index a1cf88be5..000000000 --- a/.changes/1.6.0/Features-20230427-123135.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: All constraint types are supported, but not enforced. -time: 2023-04-27T12:31:35.011284-04:00 -custom: - Author: peterallenwebb - Issue: 656 657 diff --git a/.changes/1.6.0/Features-20230604-043421.yaml b/.changes/1.6.0/Features-20230604-043421.yaml deleted file mode 100644 index 153b43e5a..000000000 --- a/.changes/1.6.0/Features-20230604-043421.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: Standardize the _connection_keys and debug_query for `dbt debug`. -time: 2023-06-04T04:34:21.968669-07:00 -custom: - Author: versusfacit - Issue: PR754 diff --git a/.changes/1.6.0/Fixes-20230420-214433.yaml b/.changes/1.6.0/Fixes-20230420-214433.yaml deleted file mode 100644 index 57a3fe3a3..000000000 --- a/.changes/1.6.0/Fixes-20230420-214433.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Fixed issue where table materialization was not always properly refreshing for non-admin users on Databricks -time: 2023-04-20T21:44:33.343598-04:00 -custom: - Author: mikealfare - Issue: "725" diff --git a/.changes/1.6.0/Fixes-20230510-154735.yaml b/.changes/1.6.0/Fixes-20230510-154735.yaml deleted file mode 100644 index d3078564e..000000000 --- a/.changes/1.6.0/Fixes-20230510-154735.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: "Remove dead code \U0001F480" -time: 2023-05-10T15:47:35.848176+02:00 -custom: - Author: Fokko - Issue: "758" diff --git a/.changes/1.6.0/Fixes-20230512-151453.yaml b/.changes/1.6.0/Fixes-20230512-151453.yaml deleted file mode 100644 index cb8c2b767..000000000 --- a/.changes/1.6.0/Fixes-20230512-151453.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: wrap expression for check constraints in parentheses -time: 2023-05-12T15:14:53.151149-04:00 -custom: - Author: michelleark - Issue: "7480" diff --git a/.changes/1.6.0/Under the Hood-20230508-222118.yaml b/.changes/1.6.0/Under the Hood-20230508-222118.yaml deleted file mode 100644 index daf66ad15..000000000 --- a/.changes/1.6.0/Under the Hood-20230508-222118.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Remove unneeded type ignore -time: 2023-05-08T22:21:18.093232+02:00 -custom: - Author: Fokko - Issue: "753" diff --git a/.changes/1.6.0/Under the Hood-20230530-162533.yaml b/.changes/1.6.0/Under the Hood-20230530-162533.yaml deleted file mode 100644 index 857db085f..000000000 --- a/.changes/1.6.0/Under the Hood-20230530-162533.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: drop support for python 3.7 -time: 2023-05-30T16:25:33.109326-05:00 -custom: - Author: McKnight-42 - Issue: "792" diff --git a/.changes/unreleased/Features-20230627-155913.yaml b/.changes/unreleased/Features-20230627-155913.yaml deleted file mode 100644 index af12abfdb..000000000 --- a/.changes/unreleased/Features-20230627-155913.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: add dbt-spark portion of dbt_clone functionality -time: 2023-06-27T15:59:13.605278-05:00 -custom: - Author: McKnight-42 aranke - Issue: "815" diff --git a/.changes/unreleased/Features-20230707-104150.yaml b/.changes/unreleased/Features-20230707-104150.yaml new file mode 100644 index 000000000..183a37b45 --- /dev/null +++ b/.changes/unreleased/Features-20230707-104150.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Support server_side_parameters for Spark session connection method +time: 2023-07-07T10:41:50.01541+02:00 +custom: + Author: alarocca-apixio + Issue: "690" diff --git a/.changes/unreleased/Features-20230707-113337.yaml b/.changes/unreleased/Features-20230707-113337.yaml new file mode 100644 index 000000000..de0a50fe8 --- /dev/null +++ b/.changes/unreleased/Features-20230707-113337.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add server_side_parameters to HTTP connection method +time: 2023-07-07T11:33:37.794112+02:00 +custom: + Author: Fokko,JCZuurmond + Issue: "824" diff --git a/.changes/unreleased/Features-20230707-114650.yaml b/.changes/unreleased/Features-20230707-114650.yaml new file mode 100644 index 000000000..6f1b3d38a --- /dev/null +++ b/.changes/unreleased/Features-20230707-114650.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Enforce server side parameters keys and values to be strings +time: 2023-07-07T11:46:50.390918+02:00 +custom: + Author: Fokko,JCZuurmond + Issue: "826" diff --git a/.changes/unreleased/Fixes-20230319-155618.yaml b/.changes/unreleased/Fixes-20230319-155618.yaml deleted file mode 100644 index 58c88727e..000000000 --- a/.changes/unreleased/Fixes-20230319-155618.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: allow negative args for spark split part -time: 2023-03-19T15:56:18.630146-05:00 -custom: - Author: dave-connors-3 - Issue: "688" diff --git a/.changes/unreleased/Fixes-20230510-163110.yaml b/.changes/unreleased/Fixes-20230510-163110.yaml deleted file mode 100644 index 06672ac91..000000000 --- a/.changes/unreleased/Fixes-20230510-163110.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Disallow untyped `def`'s -time: 2023-05-10T16:31:10.593358+02:00 -custom: - Author: Fokko - Issue: "760" diff --git a/.changes/unreleased/Fixes-20230623-112100.yaml b/.changes/unreleased/Fixes-20230623-112100.yaml deleted file mode 100644 index d6129c908..000000000 --- a/.changes/unreleased/Fixes-20230623-112100.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Allow hostname to be provided with or without trailing slash -time: 2023-06-23T11:21:00.901430172Z -custom: - Author: tim-steinkuhler - Issue: "302" diff --git a/.changes/unreleased/Fixes-20230628-162413.yaml b/.changes/unreleased/Fixes-20230628-162413.yaml deleted file mode 100644 index ffa86652d..000000000 --- a/.changes/unreleased/Fixes-20230628-162413.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Remove dependent_projects argument from PartialProject call in unit tests -time: 2023-06-28T16:24:13.288246-05:00 -custom: - Author: McKnight-42 - Issue: "7955" diff --git a/.changes/unreleased/Under the Hood-20230724-165508.yaml b/.changes/unreleased/Under the Hood-20230724-165508.yaml new file mode 100644 index 000000000..889484644 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20230724-165508.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Update stale workflow to use centralized version +time: 2023-07-24T16:55:08.096947-04:00 +custom: + Author: mikealfare + Issue: "842" diff --git a/.circleci/config.yml b/.circleci/config.yml index 5fea78c3f..71ca356cf 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,23 +10,24 @@ jobs: - checkout - run: tox -e flake8,unit - integration-spark-session: - environment: - DBT_INVOCATION_ENV: circle - docker: - - image: godatadriven/pyspark:3.1 - steps: - - checkout - - run: apt-get update - - run: python3 -m pip install --upgrade pip - - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev - - run: python3 -m pip install tox - - run: - name: Run integration tests - command: tox -e integration-spark-session - no_output_timeout: 1h - - store_artifacts: - path: ./logs +# Turning off for now due to flaky runs of tests will turn back on at later date. + # integration-spark-session: + # environment: + # DBT_INVOCATION_ENV: circle + # docker: + # - image: godatadriven/pyspark:3.1 + # steps: + # - checkout + # - run: apt-get update + # - run: python3 -m pip install --upgrade pip + # - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev + # - run: python3 -m pip install tox + # - run: + # name: Run integration tests + # command: tox -e integration-spark-session + # no_output_timeout: 1h + # - store_artifacts: + # path: ./logs integration-spark-thrift: environment: @@ -115,9 +116,9 @@ workflows: test-everything: jobs: - unit - - integration-spark-session: - requires: - - unit + # - integration-spark-session: + # requires: + # - unit - integration-spark-thrift: requires: - unit diff --git a/.github/workflows/bot-changelog.yml b/.github/workflows/bot-changelog.yml index 89972070e..52f50b1e3 100644 --- a/.github/workflows/bot-changelog.yml +++ b/.github/workflows/bot-changelog.yml @@ -57,4 +57,4 @@ jobs: commit_message: "Add automated changelog yaml from template for bot PR" changie_kind: ${{ matrix.changie_kind }} label: ${{ matrix.label }} - custom_changelog_string: "custom:\n Author: ${{ github.event.pull_request.user.login }}\n PR: ${{ github.event.pull_request.number }}" + custom_changelog_string: "custom:\n Author: ${{ github.event.pull_request.user.login }}\n PR: ${{ github.event.pull_request.number }}\n" diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index a56455d55..d902340a9 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -1,15 +1,12 @@ -name: 'Close stale issues and PRs' +name: "Close stale issues and PRs" on: schedule: - cron: "30 1 * * *" + +permissions: + issues: write + pull-requests: write + jobs: stale: - runs-on: ubuntu-latest - steps: - # pinned at v4 (https://github.com/actions/stale/releases/tag/v4.0.0) - - uses: actions/stale@cdf15f641adb27a71842045a94023bef6945e3aa - with: - stale-issue-message: "This issue has been marked as Stale because it has been open for 180 days with no activity. If you would like the issue to remain open, please remove the stale label or comment on the issue, or it will be closed in 7 days." - stale-pr-message: "This PR has been marked as Stale because it has been open for 180 days with no activity. If you would like the PR to remain open, please remove the stale label or comment on the PR, or it will be closed in 7 days." - # mark issues/PRs stale when they haven't seen activity in 180 days - days-before-stale: 180 + uses: dbt-labs/actions/.github/workflows/stale-bot-matrix.yml@main diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f525b3bb..902db37fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,64 +5,9 @@ - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version. - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry) -## dbt-spark 1.6.0-b3 - June 09, 2023 - -### Features - -- Standardize the _connection_keys and debug_query for `dbt debug`. ([#PR754](https://github.com/dbt-labs/dbt-spark/issues/PR754)) - -### Fixes - -- wrap expression for check constraints in parentheses ([#7480](https://github.com/dbt-labs/dbt-spark/issues/7480)) - -### Under the Hood - -- drop support for python 3.7 ([#792](https://github.com/dbt-labs/dbt-spark/issues/792)) - - - -## dbt-spark 1.6.0-b2 - May 25, 2023 - -### Features - -- All constraint types are supported, but not enforced. ([#656](https://github.com/dbt-labs/dbt-spark/issues/656), [#657](https://github.com/dbt-labs/dbt-spark/issues/657)) - -## dbt-spark 1.6.0-b1 - May 12, 2023 - -### Features - -- Support insert_overwrite strategy with delta ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013)) - -### Fixes - -- Fixed issue where table materialization was not always properly refreshing for non-admin users on Databricks ([#725](https://github.com/dbt-labs/dbt-spark/issues/725)) -- Remove dead code 💀 ([#758](https://github.com/dbt-labs/dbt-spark/issues/758)) - -### Under the Hood - -- Remove unneeded type ignore ([#753](https://github.com/dbt-labs/dbt-spark/issues/753)) - -### Dependencies - -- Update black requirement from ~=23.1 to ~=23.3 ([#701](https://github.com/dbt-labs/dbt-spark/pull/701)) -- Bump mypy from 1.1.1 to 1.2.0 ([#708](https://github.com/dbt-labs/dbt-spark/pull/708)) -- Update pytest requirement from ~=7.2 to ~=7.3 ([#726](https://github.com/dbt-labs/dbt-spark/pull/726)) -- Update types-pytz requirement from ~=2023.2 to ~=2023.3 ([#704](https://github.com/dbt-labs/dbt-spark/pull/704)) -- Update pip-tools requirement from ~=6.12 to ~=6.13 ([#737](https://github.com/dbt-labs/dbt-spark/pull/737)) -- Update pytz requirement from ~=2023.2 to ~=2023.3 ([#702](https://github.com/dbt-labs/dbt-spark/pull/702)) - -### Dependency - -- Bump pyodbc from 4.0.34 to 4.0.35 ([#417](https://github.com/dbt-labs/dbt-spark/issues/417)) - -### Contributors -- [@Fokko](https://github.com/Fokko) ([#758](https://github.com/dbt-labs/dbt-spark/issues/758), [#753](https://github.com/dbt-labs/dbt-spark/issues/753)) -- [@flvndh](https://github.com/flvndh) ([#1013](https://github.com/dbt-labs/dbt-spark/issues/1013)) - -## dbt-spark 1.6.0-a1 - April 17, 2023 - ## Previous Releases For information on prior major and minor releases, see their changelogs: +- [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md) - [1.5](https://github.com/dbt-labs/dbt-spark/blob/1.5.latest/CHANGELOG.md) - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md) - [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 0c2870f87..874bd74c8 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "1.6.0b3" +version = "1.7.0a1" diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 2a7f8188d..6c7899ad9 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -73,7 +73,7 @@ class SparkCredentials(Credentials): connect_retries: int = 0 connect_timeout: int = 10 use_ssl: bool = False - server_side_parameters: Dict[str, Any] = field(default_factory=dict) + server_side_parameters: Dict[str, str] = field(default_factory=dict) retry_all: bool = False @classmethod @@ -142,6 +142,10 @@ def __post_init__(self) -> None: if self.method != SparkConnectionMethod.SESSION: self.host = self.host.rstrip("/") + self.server_side_parameters = { + str(key): str(value) for key, value in self.server_side_parameters.items() + } + @property def type(self) -> str: return "spark" @@ -350,6 +354,7 @@ def open(cls, connection: Connection) -> Connection: creds = connection.credentials exc = None + handle: Any for i in range(1 + creds.connect_retries): try: @@ -376,7 +381,10 @@ def open(cls, connection: Connection) -> Connection: token = base64.standard_b64encode(raw_token).decode() transport.setCustomHeaders({"Authorization": "Basic {}".format(token)}) - conn = hive.connect(thrift_transport=transport) + conn = hive.connect( + thrift_transport=transport, + configuration=creds.server_side_parameters, + ) handle = PyhiveConnectionWrapper(conn) elif creds.method == SparkConnectionMethod.THRIFT: cls.validate_creds(creds, ["host", "port", "user", "schema"]) @@ -460,7 +468,9 @@ def open(cls, connection: Connection) -> Connection: SessionConnectionWrapper, ) - handle = SessionConnectionWrapper(Connection()) # type: ignore + handle = SessionConnectionWrapper( + Connection(server_side_parameters=creds.server_side_parameters) + ) else: raise dbt.exceptions.DbtProfileError( f"invalid credential method: {creds.method}" diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py index 5e4bcc492..0e3717172 100644 --- a/dbt/adapters/spark/session.py +++ b/dbt/adapters/spark/session.py @@ -4,7 +4,7 @@ import datetime as dt from types import TracebackType -from typing import Any, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union from dbt.events import AdapterLogger from dbt.utils import DECIMALS @@ -24,9 +24,10 @@ class Cursor: https://github.com/mkleehammer/pyodbc/wiki/Cursor """ - def __init__(self) -> None: + def __init__(self, *, server_side_parameters: Optional[Dict[str, Any]] = None) -> None: self._df: Optional[DataFrame] = None self._rows: Optional[List[Row]] = None + self.server_side_parameters = server_side_parameters or {} def __enter__(self) -> Cursor: return self @@ -106,7 +107,12 @@ def execute(self, sql: str, *parameters: Any) -> None: """ if len(parameters) > 0: sql = sql % parameters - spark_session = SparkSession.builder.enableHiveSupport().getOrCreate() + builder = SparkSession.builder.enableHiveSupport() + + for parameter, value in self.server_side_parameters.items(): + builder = builder.config(parameter, value) + + spark_session = builder.getOrCreate() self._df = spark_session.sql(sql) def fetchall(self) -> Optional[List[Row]]: @@ -159,6 +165,9 @@ class Connection: https://github.com/mkleehammer/pyodbc/wiki/Connection """ + def __init__(self, *, server_side_parameters: Optional[Dict[Any, str]] = None) -> None: + self.server_side_parameters = server_side_parameters or {} + def cursor(self) -> Cursor: """ Get a cursor. @@ -168,7 +177,7 @@ def cursor(self) -> Cursor: out : Cursor The cursor. """ - return Cursor() + return Cursor(server_side_parameters=self.server_side_parameters) class SessionConnectionWrapper(object): diff --git a/setup.py b/setup.py index 2cd78c3ac..c6713e895 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ def _get_dbt_core_version(): package_name = "dbt-spark" -package_version = "1.6.0b3" +package_version = "1.7.0a1" dbt_core_version = _get_dbt_core_version() description = """The Apache Spark adapter plugin for dbt""" diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py index 3c7fccd35..1eb818241 100644 --- a/tests/unit/test_adapter.py +++ b/tests/unit/test_adapter.py @@ -39,6 +39,7 @@ def _get_target_http(self, project): "token": "abc123", "organization": "0123456789", "cluster": "01234-23423-coffeetime", + "server_side_parameters": {"spark.driver.memory": "4g"}, } }, "target": "test", @@ -147,13 +148,14 @@ def test_http_connection(self): config = self._get_target_http(self.project_cfg) adapter = SparkAdapter(config) - def hive_http_connect(thrift_transport): + def hive_http_connect(thrift_transport, configuration): self.assertEqual(thrift_transport.scheme, "https") self.assertEqual(thrift_transport.port, 443) self.assertEqual(thrift_transport.host, "myorg.sparkhost.com") self.assertEqual( thrift_transport.path, "/sql/protocolv1/o/0123456789/01234-23423-coffeetime" ) + self.assertEqual(configuration["spark.driver.memory"], "4g") # with mock.patch.object(hive, 'connect', new=hive_http_connect): with mock.patch("dbt.adapters.spark.connections.hive.connect", new=hive_http_connect): diff --git a/tests/unit/test_credentials.py b/tests/unit/test_credentials.py new file mode 100644 index 000000000..7a81fdbb1 --- /dev/null +++ b/tests/unit/test_credentials.py @@ -0,0 +1,12 @@ +from dbt.adapters.spark.connections import SparkConnectionMethod, SparkCredentials + + +def test_credentials_server_side_parameters_keys_and_values_are_strings() -> None: + credentials = SparkCredentials( + host="localhost", + method=SparkConnectionMethod.THRIFT, + database="tests", + schema="tests", + server_side_parameters={"spark.configuration": 10}, + ) + assert credentials.server_side_parameters["spark.configuration"] == "10" diff --git a/tox.ini b/tox.ini index 1bba38b6c..97017a926 100644 --- a/tox.ini +++ b/tox.ini @@ -71,7 +71,7 @@ deps = [testenv:integration-spark-session] allowlist_externals = /bin/bash -basepython = python3 +basepython = python3.10 commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*' passenv = DBT_*