forked from dbt-labs/dbt-core
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New Dockerfile supporting individual db adapters and architectures
- Loading branch information
Showing
8 changed files
with
381 additions
and
70 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,134 @@ | ||
ARG BASE_IMAGE=python:3.8-slim-bullseye | ||
## | ||
# Generic dockerfile for dbt image building. | ||
# See README for operational details | ||
## | ||
|
||
FROM $BASE_IMAGE | ||
ARG BASE_REQUIREMENTS_SRC_PATH | ||
ARG WHEEL_REQUIREMENTS_SRC_PATH | ||
ARG DIST_PATH | ||
# Top level build args | ||
ARG build_for=linux/amd64 | ||
|
||
## | ||
# base image (abstract) | ||
## | ||
FROM --platform=$build_for python:3.9.9-slim-bullseye as base | ||
|
||
# N.B. The refs updated automagically every release via bumpversion | ||
# N.B. dbt-postgres is currently found in the core codebase so a value of dbt-core@<some_version> is correct | ||
|
||
ARG [email protected] | ||
ARG [email protected] | ||
ARG [email protected] | ||
ARG [email protected] | ||
ARG [email protected] | ||
ARG [email protected] | ||
# special case args | ||
ARG dbt_spark_version=all | ||
ARG dbt_third_party | ||
|
||
# System setup | ||
RUN apt-get update \ | ||
&& apt-get dist-upgrade -y \ | ||
&& apt-get install -y --no-install-recommends \ | ||
git \ | ||
ssh-client \ | ||
software-properties-common \ | ||
make \ | ||
build-essential \ | ||
ca-certificates \ | ||
libpq-dev \ | ||
git \ | ||
ssh-client \ | ||
software-properties-common \ | ||
make \ | ||
build-essential \ | ||
ca-certificates \ | ||
libpq-dev \ | ||
&& apt-get clean \ | ||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* | ||
|
||
RUN echo BASE_REQUIREMENTS_SRC_PATH=$BASE_REQUIREMENTS_SRC_PATH | ||
RUN echo WHEEL_REQUIREMENTS_SRC_PATH=$WHEEL_REQUIREMENTS_SRC_PATH | ||
RUN echo DIST_PATH=$DIST_PATH | ||
COPY $BASE_REQUIREMENTS_SRC_PATH ./requirements.txt | ||
COPY $WHEEL_REQUIREMENTS_SRC_PATH ./wheel_requirements.txt | ||
COPY $DIST_PATH ./dist | ||
RUN pip install --upgrade pip setuptools | ||
RUN pip install --requirement ./requirements.txt | ||
RUN pip install --requirement ./wheel_requirements.txt | ||
&& rm -rf \ | ||
/var/lib/apt/lists/* \ | ||
/tmp/* \ | ||
/var/tmp/* | ||
|
||
# Env vars | ||
ENV PYTHONIOENCODING=utf-8 | ||
ENV LANG C.UTF-8 | ||
WORKDIR /usr/app | ||
ENV LANG=C.UTF-8 | ||
|
||
# Update python | ||
RUN python -m pip install --upgrade pip setuptools wheel --no-cache-dir | ||
|
||
# Set docker basics | ||
WORKDIR /usr/app/dbt/ | ||
VOLUME /usr/app | ||
ENTRYPOINT ["dbt"] | ||
|
||
## | ||
# dbt-core | ||
## | ||
FROM base as dbt-core | ||
RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_core_ref}#egg=dbt-core&subdirectory=core" | ||
|
||
## | ||
# dbt-postgres | ||
## | ||
FROM base as dbt-postgres | ||
RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_postgres_ref}#egg=dbt-postgres&subdirectory=plugins/postgres" | ||
|
||
|
||
## | ||
# dbt-redshift | ||
## | ||
FROM base as dbt-redshift | ||
RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_redshift_ref}#egg=dbt-redshift" | ||
|
||
|
||
## | ||
# dbt-bigquery | ||
## | ||
FROM base as dbt-bigquery | ||
RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_bigquery_ref}#egg=dbt-bigquery" | ||
|
||
|
||
## | ||
# dbt-snowflake | ||
## | ||
FROM base as dbt-snowflake | ||
RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_snowflake_ref}#egg=dbt-snowflake" | ||
|
||
## | ||
# dbt-spark | ||
## | ||
FROM base as dbt-spark | ||
RUN apt-get update \ | ||
&& apt-get dist-upgrade -y \ | ||
&& apt-get install -y --no-install-recommends \ | ||
python-dev \ | ||
libsasl2-dev \ | ||
gcc \ | ||
unixodbc-dev \ | ||
&& apt-get clean \ | ||
&& rm -rf \ | ||
/var/lib/apt/lists/* \ | ||
/tmp/* \ | ||
/var/tmp/* | ||
RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_spark_ref}#egg=dbt-spark[${dbt_spark_version}]" | ||
|
||
|
||
## | ||
# dbt-third-party | ||
## | ||
FROM dbt-core as dbt-third-party | ||
RUN python -m pip install --no-cache-dir "${dbt_third_party}" | ||
|
||
## | ||
# dbt-all | ||
## | ||
FROM base as dbt-all | ||
RUN apt-get update \ | ||
&& apt-get dist-upgrade -y \ | ||
&& apt-get install -y --no-install-recommends \ | ||
python-dev \ | ||
libsasl2-dev \ | ||
gcc \ | ||
unixodbc-dev \ | ||
&& apt-get clean \ | ||
&& rm -rf \ | ||
/var/lib/apt/lists/* \ | ||
/tmp/* \ | ||
/var/tmp/* | ||
RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_redshift_ref}#egg=dbt-redshift" | ||
RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_bigquery_ref}#egg=dbt-bigquery" | ||
RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_snowflake_ref}#egg=dbt-snowflake" | ||
RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_spark_ref}#egg=dbt-spark[${dbt_spark_version}]" | ||
RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_postgres_ref}#egg=dbt-postgres&subdirectory=plugins/postgres" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
# Docker for dbt | ||
This docker file is suitable for building dbt Docker images locally or using with CI/CD to automate populating a container registry. | ||
|
||
|
||
## Building an image: | ||
This Dockerfile can create images for the following targets, each named after the database they support: | ||
* `dbt-core` _(no db-adapter support)_ | ||
* `dbt-postgres` | ||
* `dbt-redshift` | ||
* `dbt-bigquery` | ||
* `dbt-snowflake` | ||
* `dbt-spark` | ||
* `dbt-third-party` _(requires additional build-arg)_ | ||
* `dbt-all` _(installs all of the above in a single image)_ | ||
|
||
In order to build a new image, run the following docker command. | ||
``` | ||
docker build --tag <your_image_name> --target <target_name> <path/to/dockerfile> | ||
``` | ||
By default the images will be populated with the most recent release of `dbt-core` and whatever database adapter you select. If you need to use a different version you can specify it by git ref using the `--build-arg` flag: | ||
``` | ||
docker build --tag <your_image_name> \ | ||
--target <target_name> \ | ||
--build-arg <arg_name>=<git_ref> \ | ||
<path/to/dockerfile> | ||
``` | ||
valid arg names for versioning are: | ||
* `dbt_core_ref` | ||
* `dbt_postgres_ref` | ||
* `dbt_redshift_ref` | ||
* `dbt_bigquery_ref` | ||
* `dbt_snowflake_ref` | ||
* `dbt_spark_ref` | ||
|
||
> Note: Only overide a _single_ build arg for each build. Using multiple overides may lead to a non-functioning image. | ||
If you wish to build an image with a third-party adapter you can use the `dbt-third-party` target. This target requires you provide a path to the adapter that can be processed by `pip` by using the `dbt_third_party` build arg: | ||
``` | ||
docker build --tag <your_image_name> \ | ||
--target dbt-third-party \ | ||
--build-arg dbt_third_party=<pip_parsable_install_string> \ | ||
<path/to/dockerfile> | ||
``` | ||
|
||
### Examples: | ||
To build an image named "my-dbt" that supports redshift using the latest releases: | ||
``` | ||
cd dbt-core/docker | ||
docker build --tag my-dbt --target dbt-redshift . | ||
``` | ||
|
||
To build an image named "my-other-dbt" that supports bigquery using `dbt-core` version 0.21.latest and the bigquery adapter version 1.0.0b1: | ||
``` | ||
cd dbt-core/docker | ||
docker build \ | ||
--tag my-other-dbt \ | ||
--target dbt-bigquery \ | ||
--build-arg [email protected] \ | ||
--build-arg [email protected] \ | ||
. | ||
``` | ||
|
||
To build an image named "my-third-party-dbt" that uses [Materilize third party adapter](https://github.com/MaterializeInc/materialize/tree/main/misc/dbt-materialize) and the latest release of `dbt-core`: | ||
``` | ||
cd dbt-core/docker | ||
docker build --tag my-third-party-dbt \ | ||
--target dbt-third-party \ | ||
--build-arg dbt_third_party=dbt-materialize \ | ||
. | ||
``` | ||
|
||
|
||
## Special cases | ||
There are a few special cases worth noting: | ||
* The `dbt-spark` database adapter comes in three different versions named `PyHive`, `ODBC`, and the default `all`. If you wish to overide this you can use the `--build-arg` flag with the value of `dbt_spark_version=<version_name>`. See the [docs](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile) for more information. | ||
|
||
* The `dbt-postgres` database adapter is released as part of the `dbt-core` codebase. If you wish to overide the version used, make sure you use the gitref for `dbt-core`: | ||
``` | ||
docker build --tag my_dbt \ | ||
--target dbt-postgres \ | ||
--build-arg [email protected] \ | ||
<path/to/dockerfile> \ | ||
``` | ||
|
||
* If you need to build against another architecture (linux/arm64 in this example) you can overide the `build_for` build arg: | ||
``` | ||
docker build --tag my_dbt \ | ||
--target dbt-postgres \ | ||
--build-arg build_for=linux/arm64 \ | ||
<path/to/dockerfile> \ | ||
``` | ||
Supported architectures can be found in the python docker [dockerhub page](https://hub.docker.com/_/python). | ||
|
||
## Running an image in a container: | ||
The `ENTRYPOINT` for this Dockerfile is the command `dbt` so you can bind-mount your project to `/usr/app` and use dbt as normal: | ||
``` | ||
docker run \ | ||
--network=host | ||
--mount type=bind,source=path/to/project,target=/usr/app \ | ||
--mount type=bind,source=path/to/profiles.yml,target=/root/.dbt/ \ | ||
my-dbt \ | ||
ls | ||
``` | ||
> Notes: | ||
> * Bind-mount sources _must_ be an absolute path | ||
> * You may need to make adjustments to the docker networking setting depending on the specifics of your data warehouse/database host. |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.