From 5cb5dea0acc47252aac20aed9ce064650e847faa Mon Sep 17 00:00:00 2001 From: "V. Ganesh" Date: Wed, 28 Sep 2022 13:30:00 +0530 Subject: [PATCH] Describe your changes Issue Synopsis: For dbt projects on internal EDH cluster, two classes of errors were observed - 1. A temporary table was dropped, but was not reflected in the metastore 2. A regular view / table was dropped before recrating or issuing a alter table rename, but the same was not reflected in the metatore Solution: Issue a invalidate metadata where relevant. Since in the dbt flow, the relevent for which invalidate metadata statement is issued, the object may not exist, the error for same is caught and ignored. Internal Jira ticket number or external issue link https://jira.cloudera.com/browse/DBT-350 Testing procedure/screenshots(if appropriate): For a valid EDH profile, issue debug, run --full-refresh and run. Sample runs are as follows: (dev-dbt-impala) ganesh.venkateshwara@ganesh dbtdemo % dbt debug 07:57:58 Running with dbt=1.1.2 dbt version: 1.1.2 python version: 3.9.12 python path: /Users/ganesh.venkateshwara/code/venv/dev/dev-dbt-impala/bin/python os info: macOS-12.6-arm64-arm-64bit Using profiles.yml file at /Users/ganesh.venkateshwara/.dbt/profiles.yml Using dbt_project.yml file at /Users/ganesh.venkateshwara/code/dbt-examples/dbtdemo/dbt_project.yml Configuration: profiles.yml file [OK found and valid] dbt_project.yml file [OK found and valid] Required dependencies: - git [OK found] Connection: host: westeros.edh.cloudera.com port: 21050 schema: p_strategy username: None Connection test: [OK connection ok] All checks passed! (dev-dbt-impala) ganesh.venkateshwara@ganesh dbtdemo % dbt run --full-refresh 07:08:42 Running with dbt=1.1.2 07:08:42 Found 3 models, 4 tests, 0 snapshots, 0 analyses, 187 macros, 0 operations, 1 seed file, 0 sources, 0 exposures, 0 metrics 07:08:42 07:09:27 Concurrency: 1 threads (target='dev_impala_kerberos') 07:09:27 07:09:27 1 of 3 START table model p_strategy.my_first_dbt_model ......................... [RUN] 07:10:04 1 of 3 OK created table model p_strategy.my_first_dbt_model .................... [OK in 36.10s] 07:10:04 2 of 3 START incremental model p_strategy.my_incremental_model ................. [RUN] 07:11:22 2 of 3 OK created incremental model p_strategy.my_incremental_model ............ [OK in 78.03s] 07:11:22 3 of 3 START table model p_strategy.my_second_dbt_model ........................ [RUN] 07:11:59 3 of 3 OK created table model p_strategy.my_second_dbt_model ................... [OK in 37.87s] 07:12:02 07:12:02 Finished running 2 table models, 1 incremental model in 199.11s. 07:12:02 07:12:02 Completed successfully 07:12:02 07:12:02 Done. PASS=3 WARN=0 ERROR=0 SKIP=0 TOTAL=3 (dev-dbt-impala) ganesh.venkateshwara@ganesh dbtdemo % dbt run 07:12:40 Running with dbt=1.1.2 07:12:40 Found 3 models, 4 tests, 0 snapshots, 0 analyses, 187 macros, 0 operations, 1 seed file, 0 sources, 0 exposures, 0 metrics 07:12:40 07:13:27 Concurrency: 1 threads (target='dev_impala_kerberos') 07:13:27 07:13:27 1 of 3 START table model p_strategy.my_first_dbt_model ......................... [RUN] 07:14:02 1 of 3 OK created table model p_strategy.my_first_dbt_model .................... [OK in 34.96s] 07:14:02 2 of 3 START incremental model p_strategy.my_incremental_model ................. [RUN] 07:14:31 2 of 3 OK created incremental model p_strategy.my_incremental_model ............ [OK in 29.24s] 07:14:33 3 of 3 START table model p_strategy.my_second_dbt_model ........................ [RUN] 07:15:13 3 of 3 OK created table model p_strategy.my_second_dbt_model ................... [OK in 39.58s] 07:15:15 07:15:15 Finished running 2 table models, 1 incremental model in 154.85s. 07:15:15 07:15:15 Completed successfully 07:15:15 07:15:15 Done. PASS=3 WARN=0 ERROR=0 SKIP=0 TOTAL=3 Checklist before requesting a review - [X] I have performed a self-review of my code - [X] I have formatted my added/modified code to follow pep-8 standards - [X] I have checked suggestions from python linter to make sure code is of good quality. --- dbt/adapters/impala/connections.py | 6 +++++- dbt/include/impala/macros/adapters.sql | 13 ++++++++++++- dbt/include/impala/macros/incremental.sql | 7 ++++++- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/dbt/adapters/impala/connections.py b/dbt/adapters/impala/connections.py index 4c301e5..104582e 100644 --- a/dbt/adapters/impala/connections.py +++ b/dbt/adapters/impala/connections.py @@ -308,7 +308,10 @@ def add_query( # https://github.com/cloudera/impyla/pull/486 configuration = {"paramstyle": "format"} query_exception = None + f = open("dbt.sql", "a+") try: + f.write(sql + ";\n") + f.close() cursor.execute(sql, bindings, configuration) query_status = str(self.get_response(cursor)) except Exception as ex: @@ -329,7 +332,8 @@ def add_query( # re-raise query exception so that it propogates to dbt if (query_exception): - raise query_exception + if (sql.find("invalidate metadata") < 0): # ignore errors for invalidate metadata + raise query_exception fire_event( SQLQueryStatus( diff --git a/dbt/include/impala/macros/adapters.sql b/dbt/include/impala/macros/adapters.sql index 501ec31..1258a90 100644 --- a/dbt/include/impala/macros/adapters.sql +++ b/dbt/include/impala/macros/adapters.sql @@ -206,6 +206,7 @@ {% call statement('drop_relation_if_exists_view') %} drop view if exists {{ relation }} {% endcall %} + {{ invalidate_metadata(relation) }} {% endmacro %} {% macro is_relation_present(relation) -%} @@ -248,6 +249,12 @@ {% do return(rel_type) %} {% endmacro %} +{% macro invalidate_metadata(relation) %} + {% call statement('invalidate_metadata') %} + invalidate metadata {{ relation.include(schema=True) }} + {% endcall %} +{% endmacro %} + {% macro impala__rename_relation(from_relation, to_relation) -%} {% set from_rel_type = get_relation_type(from_relation) %} @@ -255,8 +262,12 @@ drop table if exists {{ to_relation }} {% endcall %} {% call statement('drop_relation_if_exists_view') %} - drop view if exists {{ to_relation }}; + drop view if exists {{ to_relation }} {% endcall %} + + {{ invalidate_metadata(to_relation) }} + {{ invalidate_metadata(from_relation) }} + {% call statement('rename_relation') -%} {% if not from_rel_type %} {% do exceptions.raise_database_error("Cannot rename a relation with a blank type: " ~ from_relation.identifier) %} diff --git a/dbt/include/impala/macros/incremental.sql b/dbt/include/impala/macros/incremental.sql index 436432f..a09d893 100644 --- a/dbt/include/impala/macros/incremental.sql +++ b/dbt/include/impala/macros/incremental.sql @@ -111,6 +111,7 @@ {% do to_drop.append(backup_relation) %} {% do to_drop.append(intermediate_relation) %} {% else %} + {{ drop_relation_if_exists(tmp_relation) }} {% do run_query(create_table_as(True, tmp_relation, sql)) %} {% do adapter.expand_target_column_types( from_relation=tmp_relation, @@ -125,6 +126,8 @@ {#-- set build_sql = get_delete_insert_merge_sql(target_relation, tmp_relation, unique_key, dest_columns) --#} {% set build_sql = get_insert_overwrite_sql(target_relation, tmp_relation, dest_columns) %} + + {% do to_drop.append(tmp_relation) %} {% endif %} @@ -133,7 +136,9 @@ {% endcall %} {% if need_swap %} + {{ drop_relation_if_exists(backup_relation) }} {% do adapter.rename_relation(target_relation, backup_relation) %} + {{ drop_relation_if_exists(target_relation) }} {% do adapter.rename_relation(intermediate_relation, target_relation) %} {% endif %} @@ -149,7 +154,7 @@ {% do adapter.commit() %} {% for rel in to_drop %} - {% do adapter.drop_relation(rel) %} + {{ drop_relation_if_exists(rel) }} {% endfor %} {{ run_hooks(post_hooks, inside_transaction=False) }}