Skip to content

Prepare Ecommerce codebase for Spark ( Iceberg ) Support #128

Prepare Ecommerce codebase for Spark ( Iceberg ) Support

Prepare Ecommerce codebase for Spark ( Iceberg ) Support #128

Workflow file for this run

name: pr_tests
on:
pull_request:
concurrency: dbt_integration_tests
env:
# Set profiles.yml directory
DBT_PROFILES_DIR: ./ci
# Redshift Connection
REDSHIFT_TEST_HOST: ${{ secrets.REDSHIFT_TEST_HOST }}
REDSHIFT_TEST_USER: ${{ secrets.REDSHIFT_TEST_USER }}
REDSHIFT_TEST_PASS: ${{ secrets.REDSHIFT_TEST_PASS }}
REDSHIFT_TEST_DBNAME: ${{ secrets.REDSHIFT_TEST_DBNAME }}
REDSHIFT_TEST_PORT: ${{ secrets.REDSHIFT_TEST_PORT }}
# BigQuery Connection
BIGQUERY_TEST_DATABASE: ${{ secrets.BIGQUERY_TEST_DATABASE }}
BIGQUERY_LOCATION: ${{ secrets.BIGQUERY_LOCATION }}
BIGQUERY_SERVICE_TYPE: ${{ secrets.BIGQUERY_SERVICE_TYPE }}
BIGQUERY_SERVICE_PROJECT_ID: ${{ secrets.BIGQUERY_SERVICE_PROJECT_ID }}
BIGQUERY_SERVICE_PRIVATE_KEY_ID: ${{ secrets.BIGQUERY_SERVICE_PRIVATE_KEY_ID }}
BIGQUERY_SERVICE_PRIVATE_KEY: ${{ secrets.BIGQUERY_SERVICE_PRIVATE_KEY }}
BIGQUERY_SERVICE_CLIENT_EMAIL: ${{ secrets.BIGQUERY_SERVICE_CLIENT_EMAIL }}
BIGQUERY_SERVICE_CLIENT_ID: ${{ secrets.BIGQUERY_SERVICE_CLIENT_ID }}
BIGQUERY_SERVICE_AUTH_URI: ${{ secrets.BIGQUERY_SERVICE_AUTH_URI }}
BIGQUERY_SERVICE_TOKEN_URI: ${{ secrets.BIGQUERY_SERVICE_TOKEN_URI }}
BIGQUERY_SERVICE_AUTH_PROVIDER_X509_CERT_URL: ${{ secrets.BIGQUERY_SERVICE_AUTH_PROVIDER_X509_CERT_URL }}
BIGQUERY_SERVICE_CLIENT_X509_CERT_URL: ${{ secrets.BIGQUERY_SERVICE_CLIENT_X509_CERT_URL }}
# Snowflake Connection
SNOWFLAKE_TEST_ACCOUNT: ${{ secrets.SNOWFLAKE_TEST_ACCOUNT }}
SNOWFLAKE_TEST_USER: ${{ secrets.SNOWFLAKE_TEST_USER }}
SNOWFLAKE_TEST_PASSWORD: ${{ secrets.SNOWFLAKE_TEST_PASSWORD }}
SNOWFLAKE_TEST_ROLE: ${{ secrets.SNOWFLAKE_TEST_ROLE }}
SNOWFLAKE_TEST_DATABASE: ${{ secrets.SNOWFLAKE_TEST_DATABASE }}
SNOWFLAKE_TEST_WAREHOUSE: ${{ secrets.SNOWFLAKE_TEST_WAREHOUSE }}
# Postgres Connection
POSTGRES_TEST_USER: ${{ secrets.POSTGRES_TEST_USER }}
POSTGRES_TEST_PASS: ${{ secrets.POSTGRES_TEST_PASS }}
POSTGRES_TEST_DBNAME: ${{ secrets.POSTGRES_TEST_DBNAME }}
POSTGRES_TEST_HOST: ${{ secrets.POSTGRES_TEST_HOST }}
POSTGRES_TEST_PORT: ${{ secrets.POSTGRES_TEST_PORT }}
DATABRICKS_TEST_HOST: ${{ secrets.DATABRICKS_TEST_HOST }}
DATABRICKS_TEST_HTTP_PATH: ${{ secrets.DATABRICKS_TEST_HTTP_PATH }}
DATABRICKS_TEST_TOKEN: ${{ secrets.DATABRICKS_TEST_TOKEN }}
DATABRICKS_TEST_ENDPOINT: ${{ secrets.DATABRICKS_TEST_ENDPOINT }}
jobs:
pr_tests:
name: pr_tests
runs-on: ubuntu-latest
defaults:
run:
# Run tests from integration_tests sub dir
working-directory: ./integration_tests
strategy:
fail-fast: false
matrix:
# dbt_version: ["1.*"]
# warehouse: ["postgres", "bigquery", "snowflake", "databricks", "spark_iceberg"] # TODO: Add RS self-hosted runner
include:
- dbt_version: "1.8.2" # Explicit version for Postgres
warehouse: "postgres"
- dbt_version: "1.8.*" # Wildcard version for BigQuery
warehouse: "bigquery"
- dbt_version: "1.8.*" # Wildcard version for Snowflake
warehouse: "snowflake"
- dbt_version: "1.8.*" # Wildcard version for Databricks
warehouse: "databricks"
- dbt_version: "1.8.6" # Wildcard version for Spark
warehouse: "spark_iceberg"
services:
postgres:
image: postgres:latest
env:
POSTGRES_DB: ${{ secrets.POSTGRES_TEST_DBNAME }}
POSTGRES_USER: ${{ secrets.POSTGRES_TEST_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_TEST_PASS }}
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
# Maps tcp port 5432 on service container to the host
- 5432:5432
steps:
- name: Check out
uses: actions/checkout@v4
# Remove '*' and replace '.' with '_' in DBT_VERSION & set as SCHEMA_SUFFIX.
# SCHEMA_SUFFIX allows us to run multiple versions of dbt in parallel without overwriting the output tables
- name: Set SCHEMA_SUFFIX env
run: echo "SCHEMA_SUFFIX=$(echo ${DBT_VERSION%.*} | tr . _)" >> $GITHUB_ENV
env:
DBT_VERSION: ${{ matrix.dbt_version }}
- name: Configure Docker credentials
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_SNOWPLOWCI_READ_USERNAME }}
password: ${{ secrets.DOCKERHUB_SNOWPLOWCI_READ_PASSWORD }}
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-1
- name: Set warehouse variables
id: set_warehouse
run: |
WAREHOUSE_PLATFORM=$(echo ${{ matrix.warehouse }} | cut -d'_' -f1)
WAREHOUSE_SPECIFIC=$(echo ${{ matrix.warehouse }} | cut -s -d'_' -f2)
echo "WAREHOUSE_PLATFORM=${WAREHOUSE_PLATFORM}" >> $GITHUB_ENV
echo "WAREHOUSE_SPECIFIC=${WAREHOUSE_SPECIFIC}" >> $GITHUB_ENV
echo "warehouse_platform=${WAREHOUSE_PLATFORM}" >> $GITHUB_OUTPUT
echo "warehouse_specific=${WAREHOUSE_SPECIFIC}" >> $GITHUB_OUTPUT
# Remove '*' and replace '.' with '_' in DBT_VERSION & set as SCHEMA_SUFFIX.
# SCHEMA_SUFFIX allows us to run multiple versions of dbt in parallel without overwriting the output tables
- name: Set SCHEMA_SUFFIX env
run: echo "SCHEMA_SUFFIX=$(echo ${DBT_VERSION%.*} | tr . _)" >> $GITHUB_ENV
env:
DBT_VERSION: ${{ matrix.dbt_version }}
- name: Set DEFAULT_TARGET env
run: |
echo "DEFAULT_TARGET=${{matrix.warehouse}}" >> $GITHUB_ENV
- name: Python setup
uses: actions/setup-python@v4
with:
python-version: "3.8.x"
# - name: Pip cache
# uses: actions/cache@v3
# with:
# path: ~/.cache/pip
# key: ${{ runner.os }}-pip-${{ matrix.dbt_version }}-${{env.WAREHOUSE_PLATFORM}}
# restore-keys: |
# ${{ runner.os }}-pip-${{ matrix.dbt_version }}-${{env.WAREHOUSE_PLATFORM}}
# Install latest patch version. Upgrade if cache contains old patch version.
- name: Install dependencies
run: |
pip install wheel setuptools
pip install -Iv dbt-${{env.WAREHOUSE_PLATFORM}}==${{ matrix.dbt_version }} --upgrade
dbt deps
if: ${{env.WAREHOUSE_PLATFORM != 'spark'}}
- name: Install spark dependencies
run: |
pip install --upgrade pip wheel setuptools
pip install -Iv "dbt-core"==${{ matrix.dbt_version }}
pip install -Iv "dbt-${{ env.WAREHOUSE_PLATFORM }}[PyHive]"==1.8.0
echo "Installing Spark dependencies..."
echo version: ${{ matrix.dbt_version }}
dbt deps
if: ${{env.WAREHOUSE_PLATFORM == 'spark'}}
- name: Install Docker Compose
run: |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
- name: Build and start Spark cluster
working-directory: .github/workflows/spark_deployment
run: |
docker-compose up -d
echo "Waiting for Spark services to start..."
sleep 90
if: ${{env.WAREHOUSE_PLATFORM == 'spark'}}
- name: "Pre-test: Drop ci schemas"
run: |
dbt run-operation post_ci_cleanup --target ${{matrix.warehouse}}
if: ${{env.WAREHOUSE_PLATFORM != 'spark'}}
- name: Run tests
run: ./.scripts/integration_test.sh -d ${{matrix.warehouse}}
- name: "Post-test: Drop ci schemas"
run: |
dbt run-operation post_ci_cleanup --target ${{matrix.warehouse}}
if: ${{env.WAREHOUSE_PLATFORM != 'spark'}}