From 551e39166fd477a76b5b4524d4119f2a4318554f Mon Sep 17 00:00:00 2001 From: Michael Whitaker Date: Wed, 17 Jan 2024 09:45:18 -0800 Subject: [PATCH] update dbt-core to v1.7.3 (#60) * update dbt-core to v1.7.3 * add dbt-databricks * update databricks to v1.7.3 and add http_path as a user input update ghcr instructions * updates from Leo (#61) * fix optional parameter INPUT_HTTP_PATH * fix databricks token profiles.yml file * fix INPUT_HTTP_PATH escape slash in sed * check if profiles.yml exist --------- Co-authored-by: Leo Schick --------- Co-authored-by: Leo Schick --- Docker_build/Dockerfile | 20 +++++++++++----- Docker_build/README.md | 7 ++++-- Dockerfile | 2 +- README.md | 13 ++++------- action.yml | 3 +++ entrypoint.sh | 51 ++++++++++++++++++++++++++--------------- 6 files changed, 60 insertions(+), 36 deletions(-) diff --git a/Docker_build/Dockerfile b/Docker_build/Dockerfile index d1f8878..188c37b 100644 --- a/Docker_build/Dockerfile +++ b/Docker_build/Dockerfile @@ -16,12 +16,13 @@ FROM --platform=$build_for python:3.10.7-slim-bullseye as base # N.B. The refs updated automagically every release via bumpversion # N.B. dbt-postgres is currently found in the core codebase so a value of dbt-core@ is correct -ARG dbt_core_ref=dbt-core@v1.6.3 -ARG dbt_postgres_ref=dbt-core@v1.6.3 -ARG dbt_redshift_ref=dbt-redshift@v1.6.1 -ARG dbt_bigquery_ref=dbt-bigquery@v1.6.5 -ARG dbt_snowflake_ref=dbt-snowflake@v1.6.2 -ARG dbt_spark_ref=dbt-spark@v1.6.0 +ARG dbt_core_ref=dbt-core@v1.7.3 +ARG dbt_postgres_ref=dbt-core@v1.7.3 +ARG dbt_redshift_ref=dbt-redshift@v1.7.1 +ARG dbt_bigquery_ref=dbt-bigquery@v1.7.2 +ARG dbt_snowflake_ref=dbt-snowflake@v1.7.1 +ARG dbt_spark_ref=dbt-spark@v1.7.1 +ARG dbt_databricks_ref=dbt-databricks@v1.7.3 # special case args ARG dbt_spark_version=all ARG dbt_third_party @@ -106,6 +107,12 @@ RUN apt-get update \ RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_spark_ref}#egg=dbt-spark[${dbt_spark_version}]" +## +# dbt-databricks +## +FROM base as dbt-databricks +RUN python -m pip install --no-cache-dir "git+https://github.com/databricks/${dbt_databricks_ref}#egg=dbt-databricks" + ## # dbt-third-party ## @@ -131,5 +138,6 @@ RUN apt-get update \ RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_redshift_ref}#egg=dbt-redshift" RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_bigquery_ref}#egg=dbt-bigquery" RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_snowflake_ref}#egg=dbt-snowflake" + RUN python -m pip install --no-cache "git+https://github.com/databricks/${dbt_databricks_ref}#egg=dbt-databricks" RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_spark_ref}#egg=dbt-spark[${dbt_spark_version}]" RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_postgres_ref}#egg=dbt-postgres&subdirectory=plugins/postgres" \ No newline at end of file diff --git a/Docker_build/README.md b/Docker_build/README.md index 58603bf..5aea614 100644 --- a/Docker_build/README.md +++ b/Docker_build/README.md @@ -4,6 +4,9 @@ update the references in the Dockerfile to current ones. using the dbt [Dockerfile](https://github.com/dbt-labs/dbt-core/blob/main/docker/Dockerfile) as a template. -`docker build --tag mwhitaker/dbt_all:v1.6.3 --target dbt-all .` +`docker build --tag ghcr.io/mwhitaker/dbt_all:v1.7.3 --target dbt-all .` -`docker push mwhitaker/dbt_all:v1.6.3` \ No newline at end of file +export CR_PAT=ghp_xxxx +echo $CR_PAT | docker login ghcr.io -u mwhitaker --password-stdin + +`docker push ghcr.io/mwhitaker/dbt_all:v1.7.3` diff --git a/Dockerfile b/Dockerfile index 9e29173..c9f135e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG DBT_VERSION=v1.6.3 +ARG DBT_VERSION=v1.7.3 FROM ghcr.io/mwhitaker/dbt_all:${DBT_VERSION} COPY entrypoint.sh /entrypoint.sh diff --git a/README.md b/README.md index 1af5ecc..4e7ede1 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ A GitHub Action to run [dbt](https://www.getdbt.com) commands in a Docker container. It uses the official images provided by [Fishtown Analytics](https://hub.docker.com/r/fishtownanalytics/dbt/tags). You can use [dbt commands](https://docs.getdbt.com/reference/dbt-commands) such as `run`, `test` and `debug`. This action captures the dbt console output for use in subsequent steps. ### dbt version -The current version of dbt is **1.6.3**. Please note that from dbt v1.0.0. you may have to change your dbt project structure compared to v0.x.x. See the [migration](https://docs.getdbt.com/docs/guides/migration-guide/upgrading-to-1-0-0) docs. +The current version of dbt is **1.7.3**. Please note that from dbt v1.0.0. you may have to change your dbt project structure compared to v0.x.x. See the [migration](https://docs.getdbt.com/docs/guides/migration-guide/upgrading-to-1-0-0) docs. dbt updates their [docker images](https://hub.docker.com/r/fishtownanalytics/dbt/tags?page=1&ordering=last_updated) on a frequent basis and the main branch of this Github Action should be close to the last stable tag. If you need to use an earlier version of dbt, you can call this action with a specific [release](https://github.com/mwhitaker/dbt-action/releases), eg `mwhitaker/dbt-action@v0.21.0` or `mwhitaker/dbt-action@v1.5.0`. @@ -113,16 +113,12 @@ default: target: dev outputs: dev: - type: spark - method: http + type: databricks schema: dev_user host: abc-12345-3cc5.cloud.databricks.com - port: 443 + schema: abc token: _token_ # this will be substituted during build time - cluster: 1234-56789-abc233 - connect_timeout: 30 - connect_retries: 15 - threads: 5 + http_path: _http_path_ # this will be substituted during build time ``` Create a secret for `DBT_TOKEN` and reference it in your workflow. ```yml @@ -130,6 +126,7 @@ Create a secret for `DBT_TOKEN` and reference it in your workflow. uses: mwhitaker/dbt-action@master with: dbt_command: "dbt run --profiles-dir ." + http_path: "sql/protocol/" env: DBT_TOKEN: ${{ secrets.DBT_TOKEN }} ``` diff --git a/action.yml b/action.yml index d727379..32260ed 100644 --- a/action.yml +++ b/action.yml @@ -11,6 +11,9 @@ inputs: description: "dbt project folder. Defaults to ." default: "." required: false + http_path: + description: "http_path for databricks" + required: false outputs: result: description: "Success or failure of the dbt command" diff --git a/entrypoint.sh b/entrypoint.sh index 27cef65..39ac275 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -5,31 +5,44 @@ set -o pipefail echo "dbt project folder set as: \"${INPUT_DBT_PROJECT_FOLDER}\"" cd ${INPUT_DBT_PROJECT_FOLDER} -if [ -n "${DBT_BIGQUERY_TOKEN}" ] +export PROFILES_FILE="${DBT_PROFILES_DIR:-.}/profiles.yml" +if [ -e "${PROFILES_FILE}" ] # check if file exist then - echo trying to parse bigquery token - $(echo ${DBT_BIGQUERY_TOKEN} | base64 -d > ./creds.json 2>/dev/null) - if [ $? -eq 0 ] + if [ -n "${DBT_BIGQUERY_TOKEN}" ] then - echo success parsing base64 encoded token - elif $(echo ${DBT_BIGQUERY_TOKEN} > ./creds.json) + echo trying to parse bigquery token + $(echo ${DBT_BIGQUERY_TOKEN} | base64 -d > ./creds.json 2>/dev/null) + if [ $? -eq 0 ] + then + echo success parsing base64 encoded token + elif $(echo ${DBT_BIGQUERY_TOKEN} > ./creds.json) + then + echo success parsing plain token + else + echo cannot parse bigquery token + exit 1 + fi + elif [ -n "${DBT_USER}" ] && [ -n "$DBT_PASSWORD" ] then - echo success parsing plain token + echo trying to use user/password + sed -i "s/_user_/${DBT_USER}/g" $PROFILES_FILE + sed -i "s/_password_/${DBT_PASSWORD}/g" $PROFILES_FILE + elif [ -n "${DBT_TOKEN}" ] + then + echo trying to use DBT_TOKEN/databricks + sed -i "s/_token_/${DBT_TOKEN}/g" $PROFILES_FILE else - echo cannot parse bigquery token - exit 1 + echo no tokens or credentials supplied + fi + + if [ -n "${INPUT_HTTP_PATH}" ] + then + echo trying to use http_path for databricks + sed -i "s/_http_path_/$(echo $INPUT_HTTP_PATH | sed 's/\//\\\//g')/g" $PROFILES_FILE fi -elif [ -n "${DBT_USER}" ] && [ -n "$DBT_PASSWORD" ] -then - echo trying to use user/password - sed -i "s/_user_/${DBT_USER}/g" ./profiles.yml - sed -i "s/_password_/${DBT_PASSWORD}/g" ./profiles.yml -elif [ -n "${DBT_TOKEN}" ] -then - echo trying to use DBT_TOKEN/databricks - sed -i "s/_token_/${DBT_TOKEN}/g" ./datab.yml else - echo no tokens or credentials supplied + echo "profiles.yml not found" + exit 1 fi DBT_ACTION_LOG_FILE=${DBT_ACTION_LOG_FILE:="dbt_console_output.txt"}