Skip to content

Commit

Permalink
Add docker release to the full release process for final releases (#…
Browse files Browse the repository at this point in the history
…1004)

* add docker release to release pipeline
* update docker release to align with other adapters, add dev docker
* remove defaulted input for docker package, override default for docker release image
* fix docker release dependent steps
* only release docker when not testing, allow to only release to docker
* remove dev container
* remove test script
* rename the spark Dockerfile to make space for the release Dockerfile
* move the release Dockerfile into ./docker

---------

Co-authored-by: Emily Rockman <[email protected]>
  • Loading branch information
mikealfare and emmyoop authored May 21, 2024
1 parent 6f8ff60 commit 944dbea
Show file tree
Hide file tree
Showing 7 changed files with 187 additions and 62 deletions.
5 changes: 5 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,8 @@ updates:
schedule:
interval: "weekly"
rebase-strategy: "disabled"
- package-ecosystem: "docker"
directory: "/docker-dev"
schedule:
interval: "weekly"
rebase-strategy: "disabled"
64 changes: 33 additions & 31 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
# This will only run manually. Run this workflow only after the
# version bump workflow is completed and related changes are reviewed and merged.
#

name: Release to GitHub and PyPI
name: "Release to GitHub, PyPI, and Docker"
run-name: "Release ${{ inputs.version_number }} to GitHub, PyPI, and Docker"

on:
workflow_dispatch:
Expand Down Expand Up @@ -56,6 +56,11 @@ on:
type: boolean
default: true
required: false
only_docker:
description: "Only release Docker image, skip GitHub & PyPI"
type: boolean
default: false
required: false

permissions:
contents: write # this is the permission that allows creating a new release
Expand All @@ -66,7 +71,7 @@ defaults:

jobs:
log-inputs:
name: Log Inputs
name: "Log Inputs"
runs-on: ubuntu-latest
steps:
- name: "[DEBUG] Print Variables"
Expand All @@ -79,6 +84,7 @@ jobs:
echo AWS S3 bucket name: ${{ inputs.s3_bucket_name }}
echo Package test command: ${{ inputs.package_test_command }}
echo Test run: ${{ inputs.test_run }}
echo Only Docker: ${{ inputs.only_docker }}
# The Spark repository uses CircleCI to run integration tests.
# Because of this, the process of version bumps will be manual
Expand All @@ -87,40 +93,32 @@ jobs:
# We are passing `env_setup_script_path` as an empty string
# so that the integration tests stage will be skipped.
audit-version-and-changelog:
name: Bump package version, Generate changelog

name: "Bump package version, Generate changelog"
uses: dbt-labs/dbt-spark/.github/workflows/release-prep.yml@main

with:
sha: ${{ inputs.sha }}
version_number: ${{ inputs.version_number }}
target_branch: ${{ inputs.target_branch }}
env_setup_script_path: ""
test_run: ${{ inputs.test_run }}

secrets: inherit

log-outputs-audit-version-and-changelog:
name: "[Log output] Bump package version, Generate changelog"
if: ${{ !failure() && !cancelled() }}

if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
needs: [audit-version-and-changelog]

runs-on: ubuntu-latest

steps:
- name: Print variables
run: |
echo Final SHA : ${{ needs.audit-version-and-changelog.outputs.final_sha }}
echo Changelog path: ${{ needs.audit-version-and-changelog.outputs.changelog_path }}
build-test-package:
name: Build, Test, Package
if: ${{ !failure() && !cancelled() }}
name: "Build, Test, Package"
if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
needs: [audit-version-and-changelog]

uses: dbt-labs/dbt-release/.github/workflows/build.yml@main

with:
sha: ${{ needs.audit-version-and-changelog.outputs.final_sha }}
version_number: ${{ inputs.version_number }}
Expand All @@ -129,55 +127,59 @@ jobs:
s3_bucket_name: ${{ inputs.s3_bucket_name }}
package_test_command: ${{ inputs.package_test_command }}
test_run: ${{ inputs.test_run }}

secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}

github-release:
name: GitHub Release
if: ${{ !failure() && !cancelled() }}

name: "GitHub Release"
if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
needs: [audit-version-and-changelog, build-test-package]

uses: dbt-labs/dbt-release/.github/workflows/github-release.yml@main

with:
sha: ${{ needs.audit-version-and-changelog.outputs.final_sha }}
version_number: ${{ inputs.version_number }}
changelog_path: ${{ needs.audit-version-and-changelog.outputs.changelog_path }}
test_run: ${{ inputs.test_run }}

pypi-release:
name: PyPI Release

name: "PyPI Release"
if: ${{ !failure() && !cancelled() && !inputs.only_docker }}
needs: [github-release]

uses: dbt-labs/dbt-release/.github/workflows/pypi-release.yml@main

with:
version_number: ${{ inputs.version_number }}
test_run: ${{ inputs.test_run }}

secrets:
PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }}

docker-release:
name: "Docker Release"
# We cannot release to docker on a test run because it uses the tag in GitHub as
# what we need to release but draft releases don't actually tag the commit so it
# finds nothing to release
if: ${{ !failure() && !cancelled() && (!inputs.test_run || inputs.only_docker) }}
needs: [github-release]
permissions:
packages: write
uses: dbt-labs/dbt-release/.github/workflows/release-docker.yml@main
with:
version_number: ${{ inputs.version_number }}
dockerfile: "docker/Dockerfile"
test_run: ${{ inputs.test_run }}

slack-notification:
name: Slack Notification
if: ${{ failure() && (!inputs.test_run || inputs.nightly_release) }}

needs:
[
audit-version-and-changelog,
build-test-package,
github-release,
pypi-release,
docker-release,
]

uses: dbt-labs/dbt-release/.github/workflows/slack-post-notification.yml@main
with:
status: "failure"

secrets:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_DEV_CORE_ALERTS }}
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,7 @@ help: ## Show this help message.
@echo
@echo 'targets:'
@grep -E '^[7+a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

.PHONY: docker-prod
docker-prod:
docker build -f docker/Dockerfile -t dbt-spark .
4 changes: 3 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ version: "3.7"
services:

dbt-spark3-thrift:
build: docker/
build:
context: ./docker
dockerfile: spark.Dockerfile
ports:
- "10000:10000"
- "4040:4040"
Expand Down
72 changes: 42 additions & 30 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,30 +1,42 @@
ARG OPENJDK_VERSION=8
FROM eclipse-temurin:${OPENJDK_VERSION}-jre

ARG BUILD_DATE
ARG SPARK_VERSION=3.3.2
ARG HADOOP_VERSION=3

LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
org.label-schema.build-date=$BUILD_DATE \
org.label-schema.version=$SPARK_VERSION

ENV SPARK_HOME /usr/spark
ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"

RUN apt-get update && \
apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \
apt-get remove -y wget && \
apt-get autoremove -y && \
apt-get clean

COPY entrypoint.sh /scripts/
RUN chmod +x /scripts/entrypoint.sh

ENTRYPOINT ["/scripts/entrypoint.sh"]
CMD ["--help"]
# this image gets published to GHCR for production use
ARG py_version=3.11.2

FROM python:$py_version-slim-bullseye as base

RUN apt-get update \
&& apt-get dist-upgrade -y \
&& apt-get install -y --no-install-recommends \
build-essential=12.9 \
ca-certificates=20210119 \
gcc=4:10.2.1-1 \
git=1:2.30.2-1+deb11u2 \
libpq-dev=13.14-0+deb11u1 \
libsasl2-dev=2.1.27+dfsg-2.1+deb11u1 \
make=4.3-4.1 \
openssh-client=1:8.4p1-5+deb11u3 \
python-dev-is-python2=2.7.18-9 \
software-properties-common=0.96.20.2-2.1 \
unixodbc-dev=2.3.6-0.1+b1 \
&& apt-get clean \
&& rm -rf \
/var/lib/apt/lists/* \
/tmp/* \
/var/tmp/*

ENV PYTHONIOENCODING=utf-8
ENV LANG=C.UTF-8

RUN python -m pip install --upgrade "pip==24.0" "setuptools==69.2.0" "wheel==0.43.0" --no-cache-dir


FROM base as dbt-spark

ARG commit_ref=main
ARG extras=all

HEALTHCHECK CMD dbt --version || exit 1

WORKDIR /usr/app/dbt/
ENTRYPOINT ["dbt"]

RUN python -m pip install --no-cache-dir "dbt-spark[${extras}] @ git+https://github.com/dbt-labs/dbt-spark@${commit_ref}"
70 changes: 70 additions & 0 deletions docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Docker for dbt
`Dockerfile` is suitable for building dbt Docker images locally or using with CI/CD to automate populating a container registry.

## Building an image:
This Dockerfile can create images for the following target: `dbt-spark`

In order to build a new image, run the following docker command.
```shell
docker build --tag <your_image_name> --target dbt-spark <path/to/dockerfile>
```
---
> **Note:** Docker must be configured to use [BuildKit](https://docs.docker.com/develop/develop-images/build_enhancements/) in order for images to build properly!
---

By default the image will be populated with the latest version of `dbt-spark` on `main`.
If you need to use a different version you can specify it by git ref using the `--build-arg` flag:
```shell
docker build --tag <your_image_name> \
--target dbt-spark \
--build-arg commit_ref=<commit_ref> \
<path/to/dockerfile>
```

### Examples:
To build an image named "my-dbt" that supports Snowflake using the latest releases:
```shell
cd dbt-core/docker
docker build --tag my-dbt --target dbt-spark .
```

To build an image named "my-other-dbt" that supports Snowflake using the adapter version 1.0.0b1:
```shell
cd dbt-core/docker
docker build \
--tag my-other-dbt \
--target dbt-spark \
--build-arg commit_ref=v1.0.0b1 \
.
```

## Special cases
There are a few special cases worth noting:
* The `dbt-spark` database adapter comes in three different versions named `PyHive`, `ODBC`, and the default `all`.
If you wish to override this you can use the `--build-arg` flag with the value of `extras=<extras_name>`.
See the [docs](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile) for more information.
```shell
docker build --tag my_dbt \
--target dbt-spark \
--build-arg commit_ref=v1.0.0b1 \
--build-arg extras=PyHive \
<path/to/dockerfile>
```

## Running an image in a container:
The `ENTRYPOINT` for this Dockerfile is the command `dbt` so you can bind-mount your project to `/usr/app` and use dbt as normal:
```shell
docker run \
--network=host \
--mount type=bind,source=path/to/project,target=/usr/app \
--mount type=bind,source=path/to/profiles.yml,target=/root/.dbt/profiles.yml \
my-dbt \
ls
```
---
**Notes:**
* Bind-mount sources _must_ be an absolute path
* You may need to make adjustments to the docker networking setting depending on the specifics of your data warehouse/database host.

---
30 changes: 30 additions & 0 deletions docker/spark.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
ARG OPENJDK_VERSION=8
FROM eclipse-temurin:${OPENJDK_VERSION}-jre

ARG BUILD_DATE
ARG SPARK_VERSION=3.3.2
ARG HADOOP_VERSION=3

LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
org.label-schema.build-date=$BUILD_DATE \
org.label-schema.version=$SPARK_VERSION

ENV SPARK_HOME /usr/spark
ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"

RUN apt-get update && \
apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
mv "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" /usr/spark && \
ln -s /usr/share/java/postgresql-jdbc4.jar /usr/spark/jars/postgresql-jdbc4.jar && \
apt-get remove -y wget && \
apt-get autoremove -y && \
apt-get clean

COPY entrypoint.sh /scripts/
RUN chmod +x /scripts/entrypoint.sh

ENTRYPOINT ["/scripts/entrypoint.sh"]
CMD ["--help"]

0 comments on commit 944dbea

Please sign in to comment.