From 1c8d7d012d61c1558719bbffa94b2b5c43769707 Mon Sep 17 00:00:00 2001 From: Manrique Vargas Date: Fri, 13 Oct 2023 11:31:30 -0600 Subject: [PATCH 1/7] insert overwrite instead of insert into for new seed runs changie commit Mv1742 load csv table seed (#1) * changie commit * Update Fixes-20231013-120628.yaml --- .changes/unreleased/Fixes-20231013-120032.yaml | 7 +++++++ .changes/unreleased/Fixes-20231013-120628.yaml | 7 +++++++ dbt/include/spark/macros/materializations/seed.sql | 2 +- 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Fixes-20231013-120032.yaml create mode 100644 .changes/unreleased/Fixes-20231013-120628.yaml diff --git a/.changes/unreleased/Fixes-20231013-120032.yaml b/.changes/unreleased/Fixes-20231013-120032.yaml new file mode 100644 index 000000000..f1afb0ca1 --- /dev/null +++ b/.changes/unreleased/Fixes-20231013-120032.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: '- dbt seed now removes all rows from the existing seed tables and replace values. + As explained in [issue 112](https://github.com/fishtown-analytics/dbt-spar' +time: 2023-10-13T12:00:32.866957-06:00 +custom: + Author: mv1742 + Issue: "112" diff --git a/.changes/unreleased/Fixes-20231013-120628.yaml b/.changes/unreleased/Fixes-20231013-120628.yaml new file mode 100644 index 000000000..a448944ad --- /dev/null +++ b/.changes/unreleased/Fixes-20231013-120628.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: Overwrite existing rows on existing seed tables. For unmanaged databases (no location specified), the current seed command in + dbt-spark appends to existing seeded tables instead overwriting. +time: 2023-10-13T12:06:28.078483-06:00 +custom: + Author: mv1742 + Issue: "112" diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index 196479cb0..d239295be 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -27,7 +27,7 @@ {% endfor %} {% set sql %} - insert into {{ this.render() }} values + insert {% if loop.index0 == 0 -%} overwrite {% else -%} into {% endif -%} {{ this.render() }} values {% for row in chunk -%} ({%- for col_name in agate_table.column_names -%} {%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%} From be5729b4ea7e49548e5430df8fb261fe88d597ee Mon Sep 17 00:00:00 2001 From: mv1742 Date: Fri, 13 Oct 2023 19:25:34 -0600 Subject: [PATCH 2/7] merge remote --- dbt/include/spark/macros/materializations/seed.sql | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index d239295be..dd37ff8f6 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -1,11 +1,8 @@ -{% macro spark__get_binding_char() %} - {{ return('?' if target.method == 'odbc' else '%s') }} -{% endmacro %} - - {% macro spark__reset_csv_table(model, full_refresh, old_relation, agate_table) %} {% if old_relation %} - {{ adapter.drop_relation(old_relation) }} + {{ adapter.truncate_relation(old_relation) }} + {% set sql = "truncate table " ~ old_relation %} + {{ return(sql) }} {% endif %} {% set sql = create_csv_table(model, agate_table) %} {{ return(sql) }} From 8fd6cb6cdb9a0b169d8ebfdef3de30b36baa8b38 Mon Sep 17 00:00:00 2001 From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com> Date: Fri, 13 Oct 2023 15:48:35 -0500 Subject: [PATCH 3/7] add docs-issue workflow to dbt-spark (#913) --- .github/workflows/docs-issues.yml | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 .github/workflows/docs-issues.yml diff --git a/.github/workflows/docs-issues.yml b/.github/workflows/docs-issues.yml new file mode 100644 index 000000000..00a098df8 --- /dev/null +++ b/.github/workflows/docs-issues.yml @@ -0,0 +1,43 @@ +# **what?** +# Open an issue in docs.getdbt.com when a PR is labeled `user docs` + +# **why?** +# To reduce barriers for keeping docs up to date + +# **when?** +# When a PR is labeled `user docs` and is merged. Runs on pull_request_target to run off the workflow already merged, +# not the workflow that existed on the PR branch. This allows old PRs to get comments. + + +name: Open issues in docs.getdbt.com repo when a PR is labeled +run-name: "Open an issue in docs.getdbt.com for PR #${{ github.event.pull_request.number }}" + +on: + pull_request_target: + types: [labeled, closed] + +defaults: + run: + shell: bash + +permissions: + issues: write # opens new issues + pull-requests: write # comments on PRs + + +jobs: + open_issues: + # we only want to run this when the PR has been merged or the label in the labeled event is `user docs`. Otherwise it runs the + # risk of duplicaton of issues being created due to merge and label both triggering this workflow to run and neither having + # generating the comment before the other runs. This lives here instead of the shared workflow because this is where we + # decide if it should run or not. + if: | + (github.event.pull_request.merged == true) && + ((github.event.action == 'closed' && contains( github.event.pull_request.labels.*.name, 'user docs')) || + (github.event.action == 'labeled' && github.event.label.name == 'user docs')) + uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main + with: + issue_repository: "dbt-labs/docs.getdbt.com" + issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} PR #${{ github.event.pull_request.number }}" + issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated." + secrets: inherit From ad17f787481da17962d50fc3c589c066bbdc8040 Mon Sep 17 00:00:00 2001 From: Manrique Vargas Date: Fri, 13 Oct 2023 18:51:55 -0600 Subject: [PATCH 4/7] Update seed.sql --- dbt/include/spark/macros/materializations/seed.sql | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index dd37ff8f6..f6bed1313 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -1,3 +1,11 @@ +<<<<<<< HEAD +======= +{% macro spark__get_binding_char() %} + {{ return('?' if target.method == 'odbc' else '%s') }} +{% endmacro %} + + +>>>>>>> ab7c116 (Update seed.sql) {% macro spark__reset_csv_table(model, full_refresh, old_relation, agate_table) %} {% if old_relation %} {{ adapter.truncate_relation(old_relation) }} @@ -7,7 +15,7 @@ {% set sql = create_csv_table(model, agate_table) %} {{ return(sql) }} {% endmacro %} - + {% macro spark__load_csv_rows(model, agate_table) %} @@ -74,3 +82,4 @@ {{ return(sql) }} {% endmacro %} + From 73dff0a0e47a9d67014a96073be4bd3f9886a2cd Mon Sep 17 00:00:00 2001 From: Manrique Vargas Date: Fri, 13 Oct 2023 19:00:27 -0600 Subject: [PATCH 5/7] Update seed.sql --- dbt/include/spark/macros/materializations/seed.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index f6bed1313..7daf6f6f7 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -9,7 +9,8 @@ {% macro spark__reset_csv_table(model, full_refresh, old_relation, agate_table) %} {% if old_relation %} {{ adapter.truncate_relation(old_relation) }} - {% set sql = "truncate table " ~ old_relation %} + {{ adapter.drop_relation(old_relation) }} + {{ return(sql) }} {% endif %} {% set sql = create_csv_table(model, agate_table) %} From 06680dac77ca4a86b4b555439c2ee7abd61159f4 Mon Sep 17 00:00:00 2001 From: Manrique Vargas Date: Fri, 13 Oct 2023 19:18:02 -0600 Subject: [PATCH 6/7] add truncate table function --- dbt/include/spark/macros/adapters.sql | 7 +++++++ .../macros/materializations/incremental/incremental.sql | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index bfc1f198d..461e8f14f 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -342,6 +342,13 @@ {%- endcall %} {% endmacro %} + +{% macro spark__truncate_relation(relation) -%} + {% call statement('truncate_relation', auto_begin=False) -%} + truncate {{ relation.type }} if exists {{ relation }} + {%- endcall %} +{% endmacro %} + {% macro spark__drop_relation(relation) -%} {% call statement('drop_relation', auto_begin=False) -%} drop {{ relation.type }} if exists {{ relation }} diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql index 10d4f3ed8..1e0531c58 100644 --- a/dbt/include/spark/macros/materializations/incremental/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql @@ -66,6 +66,10 @@ re: python models and temporary views. Also, why do neither drop_relation or adapter.drop_relation work here?! + 'unmanaged' tables in spark need to manually delete the database + otherwise drop statement does not delete the underlying data. + TODO:add warning that this feature does not work for Unmanaged tables. + Managed tables are fine. --#} {% call statement('drop_relation') -%} drop table if exists {{ tmp_relation }} From 29af058a064faea5533c36542c76bc83667ca55d Mon Sep 17 00:00:00 2001 From: Manrique Vargas Date: Fri, 13 Oct 2023 19:38:31 -0600 Subject: [PATCH 7/7] rm changelog --- .changes/unreleased/Fixes-20231013-120032.yaml | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 .changes/unreleased/Fixes-20231013-120032.yaml diff --git a/.changes/unreleased/Fixes-20231013-120032.yaml b/.changes/unreleased/Fixes-20231013-120032.yaml deleted file mode 100644 index f1afb0ca1..000000000 --- a/.changes/unreleased/Fixes-20231013-120032.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Fixes -body: '- dbt seed now removes all rows from the existing seed tables and replace values. - As explained in [issue 112](https://github.com/fishtown-analytics/dbt-spar' -time: 2023-10-13T12:00:32.866957-06:00 -custom: - Author: mv1742 - Issue: "112"