-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from datacoves/feature/setup
Feature/setup
- Loading branch information
Showing
41 changed files
with
1,342 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
version: 1 | ||
dbt-project-dir: transform |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
name: Test and Check on Pull Request | ||
|
||
on: # yamllint disable-line rule:truthy | ||
pull_request: | ||
paths: | ||
- transform/* | ||
- transform/**/* | ||
|
||
# Allows you to run this workflow manually from the Actions tab | ||
workflow_dispatch: | ||
|
||
# This cancels a run if another change is pushed to the same branch | ||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.ref }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
dbt: | ||
name: Pull Request dbt Tests | ||
runs-on: ubuntu-latest | ||
|
||
# Set environment variables in | ||
# https://github.com//<your org>/<your repo>/settings/variables/actions | ||
# | ||
# Alternatively, You can define multiple ENV for different workflows. | ||
# https://github.com/<org>/<repo>/settings/environments | ||
# environment: PR_ENV | ||
|
||
# most people should use this one | ||
container: datacoves/ci-basic-dbt-snowflake:3.2 | ||
|
||
defaults: | ||
run: | ||
working-directory: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform | ||
|
||
env: | ||
DBT_PROFILES_DIR: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/automate/dbt | ||
DATACOVES__DBT_HOME: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform | ||
DATACOVES__YAML_DAGS_FOLDER: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/schedule | ||
|
||
DATACOVES__MAIN__ACCOUNT: ${{ vars.DATACOVES__MAIN__ACCOUNT }} | ||
|
||
DATACOVES__MAIN__DATABASE: ${{ vars.DATACOVES__MAIN__DATABASE }}_PR_${{ github.event.number }} | ||
DATACOVES__MAIN__SCHEMA: ${{ vars.DATACOVES__MAIN__SCHEMA }} | ||
|
||
DATACOVES__MAIN__ROLE: ${{ vars.DATACOVES__MAIN__ROLE }} | ||
DATACOVES__MAIN__WAREHOUSE: ${{ vars.DATACOVES__MAIN__WAREHOUSE }} | ||
|
||
DATACOVES__MAIN__USER: ${{ vars.DATACOVES__MAIN__USER }} | ||
DATACOVES__MAIN__PASSWORD: ${{ secrets.DATACOVES__MAIN__PASSWORD }} | ||
|
||
# This is used by datacoves to drop the test database if permissions | ||
# cannot be applied when using the Datacoves permifrost security model. | ||
DATACOVES__DROP_DB_ON_FAIL: ${{ vars.DATACOVES__DROP_DB_ON_FAIL }} | ||
|
||
steps: | ||
- name: Checkout branch | ||
uses: actions/[email protected] | ||
with: | ||
fetch-depth: 0 | ||
ref: ${{ github.event.pull_request.head.sha }} | ||
|
||
- name: Set Secure Directory | ||
run: git config --global --add safe.directory /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }} | ||
|
||
- name: List of files changed | ||
run: "git diff origin/${{ github.event.pull_request.base.ref }} HEAD --name-status" | ||
|
||
- name: Install dbt packages | ||
run: "dbt deps" | ||
|
||
- name: Create PR database | ||
run: "dbt --no-write-json run-operation create_database" | ||
|
||
- name: Get prod manifest | ||
id: prod_manifest | ||
run: "../automate/dbt/get_artifacts.sh" | ||
|
||
##### Governance Checks | ||
# this first runs dbt but creates empty tables, this is enough to then run the hooks and fail fast | ||
- name: Governance run of dbt with EMPTY models using slim mode | ||
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }} | ||
run: "dbt build --fail-fast --defer --state logs --select state:modified+ --empty" | ||
|
||
- name: Governance run of dbt with EMPTY models using full run | ||
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' || contains(github.event.pull_request.labels.*.name, 'full-refresh') }} | ||
run: "dbt build --fail-fast --empty" | ||
|
||
- name: Generate Docs Combining Prod and branch catalog.json | ||
run: "dbt-coves generate docs --merge-deferred --state logs" | ||
|
||
- name: Run governance checks | ||
run: "pre-commit run --from-ref origin/${{ github.event.pull_request.base.ref }} --to-ref HEAD" | ||
|
||
##### Real dbt run given that we passed governance checks | ||
- name: Run dbt build slim mode | ||
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }} | ||
run: "dbt build --fail-fast --defer --state logs --select state:modified+" | ||
|
||
- name: Run dbt build full run | ||
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' || contains(github.event.pull_request.labels.*.name, 'full-refresh') }} | ||
run: "dbt build --fail-fast" | ||
|
||
- name: Grant access to PR database | ||
id: grant-access-to-database | ||
run: "dbt --no-write-json run-operation grant_access_to_pr_database" | ||
|
||
# We drop the database when there is a failure to grant access to the db because | ||
# most likely the schema was not set properly in dbt_project.yml so models built to default schema | ||
- name: Drop PR database on Failure to grant security access | ||
if: always() && (env.DATACOVES__DROP_DB_ON_FAIL == 'true') && (steps.grant-access-to-database.outcome == 'failure') | ||
run: "dbt --no-write-json run-operation drop_recreate_db --args '{db_name: ${{env.DATACOVES__MAIN__DATABASE}}, recreate: False}'" # yamllint disable-line rule:line-length |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
name: Perform deployment to production | ||
|
||
on: # yamllint disable-line rule:truthy | ||
pull_request: | ||
types: | ||
- closed | ||
branches: | ||
- main | ||
paths: | ||
- .github/workflows/* | ||
- automate/* | ||
- transform/* | ||
- transform/**/* | ||
|
||
# Allows you to run this workflow manually from the Actions tab | ||
workflow_dispatch: | ||
|
||
jobs: | ||
|
||
# Perform the deployment to Prod | ||
build: | ||
# Need to make sure that when the PR was closed, it was actually merged. | ||
if: github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main' | ||
|
||
name: Deployment Script | ||
runs-on: ubuntu-latest | ||
|
||
# Set environment variables in | ||
# https://github.com//<your org>/<your repo>/settings/variables/actions | ||
# | ||
# Alternatively, You can define multiple ENV for different workflows. | ||
# https://github.com/<org>/<repo>/settings/environments | ||
# environment: PR_ENV | ||
container: datacoves/ci-basic-dbt-snowflake:3.2 | ||
|
||
defaults: | ||
run: | ||
working-directory: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform | ||
|
||
env: | ||
DBT_PROFILES_DIR: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/automate/dbt | ||
DATACOVES__DBT_HOME: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform | ||
|
||
DATACOVES__MAIN__ACCOUNT: ${{ vars.DATACOVES__MAIN__ACCOUNT }} | ||
|
||
DATACOVES__MAIN__DATABASE: ${{ vars.DATACOVES__MAIN__DATABASE }} | ||
DATACOVES__MAIN__SCHEMA: ${{ vars.DATACOVES__MAIN__SCHEMA }} | ||
|
||
DATACOVES__MAIN__ROLE: ${{ vars.DATACOVES__MAIN__ROLE }} | ||
DATACOVES__MAIN__WAREHOUSE: ${{ vars.DATACOVES__MAIN__WAREHOUSE }} | ||
|
||
DATACOVES__MAIN__USER: ${{ vars.DATACOVES__MAIN__USER }} | ||
DATACOVES__MAIN__PASSWORD: ${{ secrets.DATACOVES__MAIN__PASSWORD }} | ||
|
||
# This is used by datacoves to drop the staging database for blue/green | ||
# deployments, most likely you don't want to set this, we use it for demos | ||
DATACOVES__DROP_DB_ON_FAIL: ${{ vars.DATACOVES__DROP_DB_ON_FAIL }} | ||
|
||
steps: | ||
- name: Checkout branch | ||
uses: actions/checkout@v2 | ||
with: | ||
ref: ${{ github.event.push.head.sha }} | ||
fetch-depth: 0 | ||
|
||
- name: Set Secure Directory | ||
run: git config --global --add safe.directory /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }} | ||
|
||
- name: Install dbt packages | ||
run: "dbt deps" | ||
|
||
- name: Get prod manifest | ||
id: prod-manifest | ||
run: "../automate/dbt/get_artifacts.sh" | ||
|
||
- name: Drop orphaned relations in db that are no longer in dbt | ||
run: "dbt run-operation drop_orphaned_relations --args '{\"dry_run\": false}'" | ||
|
||
- name: Generate dbt documentation | ||
run: "dbt docs generate" | ||
|
||
- name: Deploy docs 🚀 | ||
uses: JamesIves/[email protected] | ||
with: | ||
branch: dbt-docs | ||
folder: transform/target | ||
|
||
- name: Upload dbt artifacts | ||
run: "dbt run-operation upload_artifacts" | ||
|
||
# Drops the temporary PR database | ||
drop-pr-db-on-close: | ||
name: Drop PR Database on Close | ||
if: ${{ always() }} | ||
|
||
runs-on: ubuntu-latest | ||
|
||
# Set environment variables in | ||
# https://github.com//<your org>/<your repo>/settings/variables/actions | ||
# | ||
# Alternatively, You can define multiple ENV for different workflows. | ||
# https://github.com/<org>/<repo>/settings/environments | ||
# environment: PR_ENV | ||
container: datacoves/ci-basic-dbt-snowflake:3.2 | ||
|
||
defaults: | ||
run: | ||
working-directory: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform | ||
|
||
env: | ||
DBT_PROFILES_DIR: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/automate/dbt | ||
DATACOVES__DBT_HOME: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform | ||
DATACOVES__YAML_DAGS_FOLDER: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/schedule | ||
|
||
DATACOVES__MAIN__ACCOUNT: ${{ vars.DATACOVES__MAIN__ACCOUNT }} | ||
|
||
DATACOVES__MAIN__DATABASE: ${{ vars.DATACOVES__MAIN__DATABASE }}_PR_${{ github.event.number }} | ||
DATACOVES__MAIN__SCHEMA: ${{ vars.DATACOVES__MAIN__SCHEMA }} | ||
|
||
DATACOVES__MAIN__ROLE: ${{ vars.DATACOVES__MAIN__ROLE }} | ||
DATACOVES__MAIN__WAREHOUSE: ${{ vars.DATACOVES__MAIN__WAREHOUSE }} | ||
|
||
DATACOVES__MAIN__USER: ${{ vars.DATACOVES__MAIN__USER }} | ||
DATACOVES__MAIN__PASSWORD: ${{ secrets.DATACOVES__MAIN__PASSWORD }} | ||
|
||
steps: | ||
- name: Checkout branch | ||
uses: actions/[email protected] | ||
with: | ||
fetch-depth: 0 | ||
ref: ${{ github.event.pull_request.head.sha }} | ||
|
||
- name: Set Secure Directory | ||
run: git config --global --add safe.directory /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }} | ||
|
||
- name: Install dbt packages | ||
run: "dbt deps" | ||
|
||
- name: Drop PR database | ||
run: "dbt --no-write-json run-operation drop_recreate_db --args '{db_name: ${{env.DATACOVES__MAIN__DATABASE}}, recreate: False}'" # yamllint disable-line rule:line-length |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# manifest found file | ||
temp_MANIFEST_FOUND.txt | ||
|
||
# dbt | ||
target/ | ||
dbt_modules/ | ||
dbt_packages/ | ||
.config/.user.yml | ||
/profiles.yml | ||
/.user.yml | ||
__pycache__ | ||
|
||
nohup.out | ||
|
||
# Virtual envs | ||
venv/ | ||
.env | ||
.venv | ||
env/ | ||
|
||
scripts/ | ||
|
||
# logs | ||
*.log | ||
logs/ | ||
|
||
# Python cache | ||
__pycache__ | ||
.gitsecret/keys/random_seed | ||
load/secrets/**/*.* | ||
!*.secret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
files: ^transform/models/ | ||
|
||
repos: | ||
|
||
- repo: https://github.com/dbt-checkpoint/dbt-checkpoint | ||
rev: v2.0.1 | ||
|
||
hooks: | ||
- id: check-source-table-has-description | ||
- id: check-script-semicolon | ||
- id: check-script-has-no-table-name | ||
- id: check-script-ref-and-source | ||
- id: check-model-has-description | ||
- id: check-model-has-properties-file | ||
- id: check-model-has-all-columns | ||
|
||
|
||
- repo: https://github.com/sqlfluff/sqlfluff | ||
# this is the version of sqlfluff, needs to be updated when using a new sqlfluff version (pip show sqlfluff) | ||
rev: 2.3.2 | ||
hooks: | ||
- id: sqlfluff-lint | ||
language: python | ||
# Need these two dependencies. | ||
# sqlfluff-templater-dbt should match the version of sqlfluff above in rev (pip show sqlfluff-templater-dbt) | ||
# dbt-snowflake needs to match the version in transform tab of Datacoves (pip show dbt-snowflake) | ||
additional_dependencies: | ||
# ["sqlfluff-templater-dbt==2.3.2", "dbt-snowflake==1.6.8", dbt-core==1.6.9] | ||
[ | ||
"sqlfluff-templater-dbt==2.3.2", | ||
"dbt-redshift==1.6.7", | ||
dbt-core==1.6.9, | ||
] | ||
args: [--config, "transform/.sqlfluff"] | ||
|
||
|
||
- repo: https://github.com/adrienverge/yamllint.git | ||
rev: v1.17.0 | ||
hooks: | ||
- id: yamllint | ||
args: [-c=.yamllint] | ||
exclude: ^transform/.dbt_coves/templates | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,46 @@ | ||
# datacoves_training | ||
# Starter Project | ||
|
||
Welcome to your Starter Project! This repository is designed to help users kickstart their journey with dbt (data build tool) and Airflow. Whether you're new to data modeling or an experienced data engineer, this repo will assist you in setting up dbt for your environment and tailoring it to your specific data needs. | ||
|
||
## Brought by Datacoves | ||
|
||
Datacoves is an [enterprise dbt DataOps platform](https://datacoves.com/product) which helps organizations overcome their data delivery challenges quickly using dbt and Airflow, implementing best practices from the start without the need for multiple vendors or costly consultants. | ||
|
||
## Getting Started | ||
|
||
To make use of this repo on your dbt journey, follow these steps: | ||
|
||
1. **Configure your CICD**: | ||
|
||
- Edit your gitlab-ci,yml or .github/workflows files based on your Data Warehouse and dbt project location. This will involve commenting and uncommenting lines of code in the files. | ||
- For Gitlab users: Generate your Personal Access token. | ||
- Head to user > preferences > Access Tokens | ||
- Name the Token `GITLAB_PUSH_TOKEN` | ||
- Select the expiration data | ||
- Select api, read_repository, write_repository | ||
- Copy the token since it will not be viewable once you navigate from that screen | ||
- Configure the `GITLAB_PUSH_TOKEN` variable in your workflow environment. Settings > CICD > Variables. Be sure to select Masked for sensitive values. | ||
- Configure your workflow environment variables in Gitlab or Github. See .gitlab-ci,yml or .github/workflows. Be sure to select Masked for sensitive values in gitlab or set secrets in github. | ||
|
||
2. **Configure dbt**: | ||
|
||
- Configure dbt for your environment by editing the `profiles.yml` file in the `automate/dbt/` directory. Ensure you provide accurate connection details for your Data Warehouse. | ||
|
||
3. **Customize Your Project**: | ||
|
||
- Define your data models in the `models` directory using SQL files. Organize your models according to your data warehouse schema and naming conventions. | ||
|
||
4. **Run dbt**: | ||
|
||
- Execute your dbt transformations using the `dbt debug` command within the repository directory. | ||
- Execute your dbt transformations using the `dbt run` command within the repository directory. This will compile your SQL models and execute them against your warehouse. | ||
|
||
## Resources | ||
|
||
- [Datacoves](https://datacoves.com) | ||
- [dbt docs](https://docs.getdbt.com) | ||
- [Airflow docs](https://airflow.apache.org/docs/) | ||
- [dbt-checkpoint](https://github.com/dbt-checkpoint/dbt-checkpoint) | ||
- [sqlfluff](https://github.com/sqlfluff/sqlfluff) | ||
|
||
Happy modeling! 🚀 |
Oops, something went wrong.