From 92a6876378322fccb9df84f6768b0e5a1c4befdd Mon Sep 17 00:00:00 2001 From: Brendan Date: Fri, 24 Nov 2023 15:28:37 +1100 Subject: [PATCH] add redshift support --- CONTRIBUTING.md | 6 ++++++ integration_test_project/example-env.sh | 4 ++++ integration_test_project/profiles.yml | 10 ++++++++++ macros/_macros.yml | 4 ++++ .../type_helpers.sql | 20 +++++++++++++++++++ tox.ini | 18 ++++++++++++++++- 6 files changed, 61 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 35653061..0a54d90d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -78,12 +78,18 @@ Tox will take care of installing the dependencies for each environment, so you d tox -e integration_snowflake # For the Snowflake tests tox -e integration_databricks # For the Databricks tests tox -e integration_bigquery # For the BigQuery tests + tox -e integration_redshift # For the Redshift testss ``` The Spark tests require installing the [ODBC driver](https://www.databricks.com/spark/odbc-drivers-download). On a Mac, DBT_ENV_SPARK_DRIVER_PATH should be set to `/Library/simba/spark/lib/libsparkodbc_sbu.dylib`. Spark tests have not yet been added to the integration tests. +The Redshift tests require your AWS credentials configured in the current environment (either as environment variables or in your credentials +file - see [Configure the AWS cli](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html)). They are currently configured with IAM +authorisation, so your principal will require the redshift:getClusterCredentials permission to retrieve the password for the specified redshift +database user. + If you don't have access to a particular database type, this isn't a problem. Test on the one you do have, and let us know in the PR. #### SQLFluff diff --git a/integration_test_project/example-env.sh b/integration_test_project/example-env.sh index 47cb0d67..859cb0fc 100755 --- a/integration_test_project/example-env.sh +++ b/integration_test_project/example-env.sh @@ -16,6 +16,10 @@ export DBT_ENV_SECRET_DATABRICKS_TOKEN= export DBT_ENV_SECRET_GCP_PROJECT= export DBT_ENV_SPARK_DRIVER_PATH= # /Library/simba/spark/lib/libsparkodbc_sbu.dylib on a Mac export DBT_ENV_SPARK_ENDPOINT= # The endpoint ID from the Databricks HTTP path +export DBT_ENV_SECRET_REDSHIFT_HOST= +export DBT_ENV_SECRET_REDSHIFT_CLUSTER_ID= +export DBT_ENV_SECRET_REDSHIFT_DB= +export DBT_ENV_SECRET_REDSHIFT_USER= # dbt environment variables, change these export DBT_VERSION="1_5_0" diff --git a/integration_test_project/profiles.yml b/integration_test_project/profiles.yml index b24ad80d..d9f0a5d1 100644 --- a/integration_test_project/profiles.yml +++ b/integration_test_project/profiles.yml @@ -52,3 +52,13 @@ dbt_artifacts: dbname: postgres schema: public threads: 8 + redshift: + type: redshift + method: iam + threads: 8 + host: "{{ env_var('DBT_ENV_SECRET_REDSHIFT_HOST') }}" + port: 5439 + dbname: "{{ env_var('DBT_ENV_SECRET_REDSHIFT_DB') }}" + user: "{{ env_var('DBT_ENV_SECRET_REDSHIFT_USER') }}" + schema: dbt_artifacts_test_commit_{{ env_var('DBT_VERSION', '') }}_{{ env_var('GITHUB_SHA_OVERRIDE', '') if env_var('GITHUB_SHA_OVERRIDE', '') else env_var('GITHUB_SHA') }} + cluster_id: "{{ env_var('DBT_ENV_SECRET_REDSHIFT_CLUSTER_ID') }}" diff --git a/macros/_macros.yml b/macros/_macros.yml index 7b798447..baecfbbd 100644 --- a/macros/_macros.yml +++ b/macros/_macros.yml @@ -57,6 +57,10 @@ macros: description: | Dependent on the adapter type, returns the native type for storing JSON. + - name: type_string + description: | + Dependent on the adapter type, returns the native type for storing a string. + ## MIGRATION ## - name: migrate_from_v0_to_v1 description: | diff --git a/macros/database_specific_helpers/type_helpers.sql b/macros/database_specific_helpers/type_helpers.sql index 4064ad46..93959de7 100644 --- a/macros/database_specific_helpers/type_helpers.sql +++ b/macros/database_specific_helpers/type_helpers.sql @@ -26,6 +26,10 @@ json {% endmacro %} +{% macro redshift__type_json() %} + varchar(max) +{% endmacro %} + {#- ARRAY -#} {% macro type_array() %} @@ -43,3 +47,19 @@ {% macro bigquery__type_array() %} array {% endmacro %} + +{% macro redshift__type_array() %} + varchar(max) +{% endmacro %} + +{% macro type_string() %} + {{ return(adapter.dispatch('type_string', 'dbt_artifacts')()) }} +{% endmacro %} + +{% macro default__type_string() %} + {{ return(api.Column.translate_type("string")) }} +{% endmacro %} + +{% macro redshift__type_string() %} + varchar(max) +{% endmacro %} diff --git a/tox.ini b/tox.ini index 542d6e21..8cb4d600 100644 --- a/tox.ini +++ b/tox.ini @@ -35,7 +35,7 @@ rules = LT01,LT02,LT03,CP01,AL01,AL02,CP02,ST08,LT06,LT07,AM01,LT08,AL05,RF02,RF # ST08: [structure.distinct] 'DISTINCT' used with parentheses. deps = - sqlfluff-templater-dbt~=2.0.2 + sqlfluff-templater-dbt~=2.3.5 dbt-snowflake~=1.7.0 [sqlfluff:indentation] @@ -69,6 +69,9 @@ profiles_dir = integration_test_project [testenv] passenv = + AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY + AWS_SESSION_TOKEN DBT_PROFILES_DIR GITHUB_SHA_OVERRIDE GITHUB_SHA @@ -85,6 +88,10 @@ passenv = DBT_ENV_SECRET_GCP_PROJECT DBT_ENV_SPARK_DRIVER_PATH DBT_ENV_SPARK_ENDPOINT + DBT_ENV_SECRET_REDSHIFT_HOST + DBT_ENV_SECRET_REDSHIFT_CLUSTER_ID + DBT_ENV_SECRET_REDSHIFT_DB + DBT_ENV_SECRET_REDSHIFT_USER GOOGLE_APPLICATION_CREDENTIALS DBT_CLOUD_PROJECT_ID DBT_CLOUD_JOB_ID @@ -265,6 +272,15 @@ commands = dbt deps dbt build --target bigquery --vars '"my_var": "my value"' +# Redshift integration test +[testenv:integration_test_redshift] +changedir = integration_test_project +deps = dbt-redshift~=1.7.0 +commands = + dbt clean + dbt deps + dbt build --target redshift --vars '"my_var": "my value"' + # Spark integration test (disabled) [testenv:integration_spark] changedir = integration_test_project