Prepare Ecommerce codebase for Spark ( Iceberg ) Support #128
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: pr_tests | |
on: | |
pull_request: | |
concurrency: dbt_integration_tests | |
env: | |
# Set profiles.yml directory | |
DBT_PROFILES_DIR: ./ci | |
# Redshift Connection | |
REDSHIFT_TEST_HOST: ${{ secrets.REDSHIFT_TEST_HOST }} | |
REDSHIFT_TEST_USER: ${{ secrets.REDSHIFT_TEST_USER }} | |
REDSHIFT_TEST_PASS: ${{ secrets.REDSHIFT_TEST_PASS }} | |
REDSHIFT_TEST_DBNAME: ${{ secrets.REDSHIFT_TEST_DBNAME }} | |
REDSHIFT_TEST_PORT: ${{ secrets.REDSHIFT_TEST_PORT }} | |
# BigQuery Connection | |
BIGQUERY_TEST_DATABASE: ${{ secrets.BIGQUERY_TEST_DATABASE }} | |
BIGQUERY_LOCATION: ${{ secrets.BIGQUERY_LOCATION }} | |
BIGQUERY_SERVICE_TYPE: ${{ secrets.BIGQUERY_SERVICE_TYPE }} | |
BIGQUERY_SERVICE_PROJECT_ID: ${{ secrets.BIGQUERY_SERVICE_PROJECT_ID }} | |
BIGQUERY_SERVICE_PRIVATE_KEY_ID: ${{ secrets.BIGQUERY_SERVICE_PRIVATE_KEY_ID }} | |
BIGQUERY_SERVICE_PRIVATE_KEY: ${{ secrets.BIGQUERY_SERVICE_PRIVATE_KEY }} | |
BIGQUERY_SERVICE_CLIENT_EMAIL: ${{ secrets.BIGQUERY_SERVICE_CLIENT_EMAIL }} | |
BIGQUERY_SERVICE_CLIENT_ID: ${{ secrets.BIGQUERY_SERVICE_CLIENT_ID }} | |
BIGQUERY_SERVICE_AUTH_URI: ${{ secrets.BIGQUERY_SERVICE_AUTH_URI }} | |
BIGQUERY_SERVICE_TOKEN_URI: ${{ secrets.BIGQUERY_SERVICE_TOKEN_URI }} | |
BIGQUERY_SERVICE_AUTH_PROVIDER_X509_CERT_URL: ${{ secrets.BIGQUERY_SERVICE_AUTH_PROVIDER_X509_CERT_URL }} | |
BIGQUERY_SERVICE_CLIENT_X509_CERT_URL: ${{ secrets.BIGQUERY_SERVICE_CLIENT_X509_CERT_URL }} | |
# Snowflake Connection | |
SNOWFLAKE_TEST_ACCOUNT: ${{ secrets.SNOWFLAKE_TEST_ACCOUNT }} | |
SNOWFLAKE_TEST_USER: ${{ secrets.SNOWFLAKE_TEST_USER }} | |
SNOWFLAKE_TEST_PASSWORD: ${{ secrets.SNOWFLAKE_TEST_PASSWORD }} | |
SNOWFLAKE_TEST_ROLE: ${{ secrets.SNOWFLAKE_TEST_ROLE }} | |
SNOWFLAKE_TEST_DATABASE: ${{ secrets.SNOWFLAKE_TEST_DATABASE }} | |
SNOWFLAKE_TEST_WAREHOUSE: ${{ secrets.SNOWFLAKE_TEST_WAREHOUSE }} | |
# Postgres Connection | |
POSTGRES_TEST_USER: ${{ secrets.POSTGRES_TEST_USER }} | |
POSTGRES_TEST_PASS: ${{ secrets.POSTGRES_TEST_PASS }} | |
POSTGRES_TEST_DBNAME: ${{ secrets.POSTGRES_TEST_DBNAME }} | |
POSTGRES_TEST_HOST: ${{ secrets.POSTGRES_TEST_HOST }} | |
POSTGRES_TEST_PORT: ${{ secrets.POSTGRES_TEST_PORT }} | |
DATABRICKS_TEST_HOST: ${{ secrets.DATABRICKS_TEST_HOST }} | |
DATABRICKS_TEST_HTTP_PATH: ${{ secrets.DATABRICKS_TEST_HTTP_PATH }} | |
DATABRICKS_TEST_TOKEN: ${{ secrets.DATABRICKS_TEST_TOKEN }} | |
DATABRICKS_TEST_ENDPOINT: ${{ secrets.DATABRICKS_TEST_ENDPOINT }} | |
jobs: | |
pr_tests: | |
name: pr_tests | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
# Run tests from integration_tests sub dir | |
working-directory: ./integration_tests | |
strategy: | |
fail-fast: false | |
matrix: | |
# dbt_version: ["1.*"] | |
# warehouse: ["postgres", "bigquery", "snowflake", "databricks", "spark_iceberg"] # TODO: Add RS self-hosted runner | |
include: | |
- dbt_version: "1.8.2" # Explicit version for Postgres | |
warehouse: "postgres" | |
- dbt_version: "1.8.*" # Wildcard version for BigQuery | |
warehouse: "bigquery" | |
- dbt_version: "1.8.*" # Wildcard version for Snowflake | |
warehouse: "snowflake" | |
- dbt_version: "1.8.*" # Wildcard version for Databricks | |
warehouse: "databricks" | |
- dbt_version: "1.8.6" # Wildcard version for Spark | |
warehouse: "spark_iceberg" | |
services: | |
postgres: | |
image: postgres:latest | |
env: | |
POSTGRES_DB: ${{ secrets.POSTGRES_TEST_DBNAME }} | |
POSTGRES_USER: ${{ secrets.POSTGRES_TEST_USER }} | |
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_TEST_PASS }} | |
# Set health checks to wait until postgres has started | |
options: >- | |
--health-cmd pg_isready | |
--health-interval 10s | |
--health-timeout 5s | |
--health-retries 5 | |
ports: | |
# Maps tcp port 5432 on service container to the host | |
- 5432:5432 | |
steps: | |
- name: Check out | |
uses: actions/checkout@v4 | |
# Remove '*' and replace '.' with '_' in DBT_VERSION & set as SCHEMA_SUFFIX. | |
# SCHEMA_SUFFIX allows us to run multiple versions of dbt in parallel without overwriting the output tables | |
- name: Set SCHEMA_SUFFIX env | |
run: echo "SCHEMA_SUFFIX=$(echo ${DBT_VERSION%.*} | tr . _)" >> $GITHUB_ENV | |
env: | |
DBT_VERSION: ${{ matrix.dbt_version }} | |
- name: Configure Docker credentials | |
uses: docker/login-action@v2 | |
with: | |
username: ${{ secrets.DOCKERHUB_SNOWPLOWCI_READ_USERNAME }} | |
password: ${{ secrets.DOCKERHUB_SNOWPLOWCI_READ_PASSWORD }} | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: eu-west-1 | |
- name: Set warehouse variables | |
id: set_warehouse | |
run: | | |
WAREHOUSE_PLATFORM=$(echo ${{ matrix.warehouse }} | cut -d'_' -f1) | |
WAREHOUSE_SPECIFIC=$(echo ${{ matrix.warehouse }} | cut -s -d'_' -f2) | |
echo "WAREHOUSE_PLATFORM=${WAREHOUSE_PLATFORM}" >> $GITHUB_ENV | |
echo "WAREHOUSE_SPECIFIC=${WAREHOUSE_SPECIFIC}" >> $GITHUB_ENV | |
echo "warehouse_platform=${WAREHOUSE_PLATFORM}" >> $GITHUB_OUTPUT | |
echo "warehouse_specific=${WAREHOUSE_SPECIFIC}" >> $GITHUB_OUTPUT | |
# Remove '*' and replace '.' with '_' in DBT_VERSION & set as SCHEMA_SUFFIX. | |
# SCHEMA_SUFFIX allows us to run multiple versions of dbt in parallel without overwriting the output tables | |
- name: Set SCHEMA_SUFFIX env | |
run: echo "SCHEMA_SUFFIX=$(echo ${DBT_VERSION%.*} | tr . _)" >> $GITHUB_ENV | |
env: | |
DBT_VERSION: ${{ matrix.dbt_version }} | |
- name: Set DEFAULT_TARGET env | |
run: | | |
echo "DEFAULT_TARGET=${{matrix.warehouse}}" >> $GITHUB_ENV | |
- name: Python setup | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.8.x" | |
# - name: Pip cache | |
# uses: actions/cache@v3 | |
# with: | |
# path: ~/.cache/pip | |
# key: ${{ runner.os }}-pip-${{ matrix.dbt_version }}-${{env.WAREHOUSE_PLATFORM}} | |
# restore-keys: | | |
# ${{ runner.os }}-pip-${{ matrix.dbt_version }}-${{env.WAREHOUSE_PLATFORM}} | |
# Install latest patch version. Upgrade if cache contains old patch version. | |
- name: Install dependencies | |
run: | | |
pip install wheel setuptools | |
pip install -Iv dbt-${{env.WAREHOUSE_PLATFORM}}==${{ matrix.dbt_version }} --upgrade | |
dbt deps | |
if: ${{env.WAREHOUSE_PLATFORM != 'spark'}} | |
- name: Install spark dependencies | |
run: | | |
pip install --upgrade pip wheel setuptools | |
pip install -Iv "dbt-core"==${{ matrix.dbt_version }} | |
pip install -Iv "dbt-${{ env.WAREHOUSE_PLATFORM }}[PyHive]"==1.8.0 | |
echo "Installing Spark dependencies..." | |
echo version: ${{ matrix.dbt_version }} | |
dbt deps | |
if: ${{env.WAREHOUSE_PLATFORM == 'spark'}} | |
- name: Install Docker Compose | |
run: | | |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose | |
sudo chmod +x /usr/local/bin/docker-compose | |
- name: Build and start Spark cluster | |
working-directory: .github/workflows/spark_deployment | |
run: | | |
docker-compose up -d | |
echo "Waiting for Spark services to start..." | |
sleep 90 | |
if: ${{env.WAREHOUSE_PLATFORM == 'spark'}} | |
- name: "Pre-test: Drop ci schemas" | |
run: | | |
dbt run-operation post_ci_cleanup --target ${{matrix.warehouse}} | |
if: ${{env.WAREHOUSE_PLATFORM != 'spark'}} | |
- name: Run tests | |
run: ./.scripts/integration_test.sh -d ${{matrix.warehouse}} | |
- name: "Post-test: Drop ci schemas" | |
run: | | |
dbt run-operation post_ci_cleanup --target ${{matrix.warehouse}} | |
if: ${{env.WAREHOUSE_PLATFORM != 'spark'}} |