Skip to content

Commit

Permalink
Merge pull request #2 from datacoves/feature/mayrapena1324
Browse files Browse the repository at this point in the history
Feature/mayrapena1324
  • Loading branch information
mayrapena1324 authored Nov 11, 2024
2 parents 5afc1e3 + 5e8c900 commit bdcfd96
Show file tree
Hide file tree
Showing 16 changed files with 230 additions and 36 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pull_request_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ jobs:
run: "dbt build --fail-fast --empty"

- name: Generate Docs Combining Prod and branch catalog.json
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }}
run: "dbt-coves generate docs --merge-deferred --state logs"

- name: Run governance checks
Expand Down
23 changes: 10 additions & 13 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
files: ^transform/models/

repos:

- repo: https://github.com/dbt-checkpoint/dbt-checkpoint
rev: v2.0.1
rev: v2.0.5

hooks:
- id: check-source-table-has-description
Expand All @@ -12,32 +11,30 @@ repos:
- id: check-script-ref-and-source
- id: check-model-has-description
- id: check-model-has-properties-file
- id: check-model-has-all-columns

# - id: check-model-has-all-columns
# - id: check-database-casing-consistency
always_run: true

- repo: https://github.com/sqlfluff/sqlfluff
# this is the version of sqlfluff, needs to be updated when using a new sqlfluff version (pip show sqlfluff)
rev: 2.3.2
rev: 3.1.1
hooks:
- id: sqlfluff-lint
language: python
# Need these two dependencies.
# sqlfluff-templater-dbt should match the version of sqlfluff above in rev (pip show sqlfluff-templater-dbt)
# dbt-snowflake needs to match the version in transform tab of Datacoves (pip show dbt-snowflake)
additional_dependencies:
# ["sqlfluff-templater-dbt==2.3.2", "dbt-snowflake==1.6.8", dbt-core==1.6.9]
[
"sqlfluff-templater-dbt==2.3.2",
"dbt-redshift==1.6.7",
dbt-core==1.6.9,
"sqlfluff-templater-dbt==3.1.1",
"dbt-core==1.8.7",
"dbt-snowflake==1.8.4",
]
args: [--config, "transform/.sqlfluff"]

args: [--config, transform/.sqlfluff]

- repo: https://github.com/adrienverge/yamllint.git
rev: v1.17.0
rev: v1.35.1
hooks:
- id: yamllint
args: [-c=.yamllint]
exclude: ^transform/.dbt_coves/templates

File renamed without changes.
7 changes: 6 additions & 1 deletion automate/dbt/get_artifacts.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#! /bin/bash

# Cause script to exit on error
Expand All @@ -9,7 +8,13 @@ cd $DATACOVES__DBT_HOME
mkdir -p logs

dbt run-operation get_last_artifacts

# Check if manifest,son exist, count lines if does or set to 0
if [ -e "logs/manifest.json" ]; then
LINES_IN_MANIFEST="$(grep -c '^' logs/manifest.json)"
else
LINES_IN_MANIFEST="0"
fi

if [ $LINES_IN_MANIFEST -eq 0 ]
then
Expand Down
3 changes: 1 addition & 2 deletions automate/dbt/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ default:
type: snowflake
threads: 16
client_session_keep_alive: true

account: "{{ env_var('DATACOVES__MAIN__ACCOUNT') }}"
database: "{{ env_var('DATACOVES__MAIN__DATABASE') }}"
schema: "{{ env_var('DATACOVES__MAIN__SCHEMA') }}"
user: "{{ env_var('DATACOVES__MAIN__USER') }}"
password: "{{ env_var('DATACOVES__MAIN__PASSWORD') }}"
role: "{{ env_var('DATACOVES__MAIN__ROLE') }}"
warehouse: "{{ env_var('DATACOVES__MAIN__WAREHOUSE') }}"

Empty file modified automate/dbt/remove_test_databases.sh
100644 → 100755
Empty file.
4 changes: 4 additions & 0 deletions transform/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ models:
datacoves_starter_project:
L1_staging:
+materialized: view
loans:
+materialized: view
country_data:
+materialized: view
L2_core:
+materialized: view
L3_marts:
Expand Down
40 changes: 29 additions & 11 deletions transform/macros/cicd/get_last_artifacts.sql
Original file line number Diff line number Diff line change
@@ -1,30 +1,48 @@
{# Macro for returning dbt manifest from a snowflake stage. #}
{#
dbt run-operation get_last_artifacts
#}
#}
{# Once this is completed, deferral and state modifiers are available using --state logs #}

{% macro get_last_artifacts(stage = 'RAW.DBT_ARTIFACTS.ARTIFACTS') %}
{# we will put the manifest.json in the log directory and use the with the --state param in dbt #}
{% macro get_last_artifacts() %}
{# Fallback variable used to run/debug macro in vscode #}
{% set stage_name = 'RAW.DBT_ARTIFACTS.ARTIFACTS' %}

{# We will put the manifest.json in the log directory and use it with the --state param in dbt #}
{% set logs_dir = env_var('DATACOVES__DBT_HOME') ~ "/logs/" %}

{# List only the .json files in the root folder (excludes archive dir) #}
{% set list_stage_query %}
LIST @{{ stage }} PATTERN = '^((?!(archive/)).)*.json$';
LIST @{{ stage_name }} PATTERN = '^((?!(archive/)).)*.json$';
{% endset %}

{{ print("\nCurrent items in stage " ~ stage) }}
{{ print("\nCurrent items in stage " ~ stage_name) }}
{% set results = run_query(list_stage_query) %}
{{ results.exclude('md5').print_table(max_column_width=40) }}
{{ print("\n" ~ "="*85) }}

{% set artifacts_destination = "file://" + logs_dir %}
{% if results and results.rows %}

{% set get_query %}
get @{{ stage }}/manifest.json {{ artifacts_destination }};
get @{{ stage }}/catalog.json {{ artifacts_destination }};
{% endset %}
{% set artifacts_destination = "file://" + logs_dir %}

{# Download and print manifest.json #}
{% set get_manifest_query %}
get @{{ stage_name }}/manifest.json {{ artifacts_destination }};
{% endset %}
{% set download_manifest_results = run_query(get_manifest_query) %}x
{{ print("Manifest Downloaded") }}
{{ download_manifest_results.print_table(max_column_width=40) }}

{# Download and print catalog.json #}
{% set get_catalog_query %}
get @{{ stage_name }}/catalog.json {{ artifacts_destination }};
{% endset %}
{% set download_catalog_results = run_query(get_catalog_query) %}
{{ print("Catalog Downloaded") }}
{{ download_catalog_results.print_table(max_column_width=40) }}

{% set results = run_query(get_query) %}
{% else %}
{{ print("No artifacts found in stage " ~ stage_name ~ ". Skipping file download.") }}
{% endif %}

{% endmacro %}
11 changes: 6 additions & 5 deletions transform/macros/cicd/grant_access_to_pr_database.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,29 @@
#}

{%- macro grant_access_to_pr_database() -%}
{% set db_role_name = 'z_db_balboa_tst' %}
{% set db_role_name = 'analyst' %}
{% set db_name = target.database %}

{% set apply_db_grants_sql %}
grant usage on database {{ db_name }} to role {{db_role_name}};
{% endset %}

{% do run_query(apply_db_grants_sql) %}

{% set schemas_list %}

select schema_name
from {{ db_name }}.information_schema.schemata
where schema_name not in ('INFORMATION_SCHEMA','PUBLIC','DBT_TEST__AUDIT')
{{print(schema_list)}}
{% endset %}

{% set schemas = run_query(schemas_list) %}
{% for schema in schemas %}

{% set apply_schema_grants_sql %}
grant usage on schema {{db_name}}.{{ schema[0] }} to z_schema_{{schema[0]}};
grant select on all tables in schema {{db_name}}.{{ schema[0] }} to role z_tables_views_general;
grant select on all views in schema {{db_name}}.{{ schema[0] }} to role z_tables_views_general;
grant usage on schema {{db_name}}.{{ schema[0] }} to {{db_role_name}};
grant select on all tables in schema {{db_name}}.{{ schema[0] }} to role {{db_role_name}};
grant select on all views in schema {{db_name}}.{{ schema[0] }} to role {{db_role_name}};
{% endset %}

{% do run_query(apply_schema_grants_sql) %}
Expand Down
2 changes: 1 addition & 1 deletion transform/macros/create_database.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
identifier="tables") -%}
{% if not database_exists %}
{% set create_db_sql %}
use role transformer_dbt;
use role analyst;
create database {{ target.database }};
grant ownership on database {{ target.database }} to role {{ target.role }};
use role {{ target.role }};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
version: 2

models:
- name: COUNTRY_POPULATIONS
- name: stg_country_populations
description: 'Raw population information from Github Datasets repository'
columns:
- name: year
description: The year for which the population value is recorded
data_tests:
- not_null
- name: country_name
description: The name of the country
data_tests:
- not_null
- name: value
description: The population value for a particular year and country
- name: country_code
Expand Down
8 changes: 8 additions & 0 deletions transform/models/L1_staging/loans/_loans.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 2

sources:
- name: MAYRAPENA1324
database: RAW
tables:
- name: PERSONAL_LOANS
description: 'A personal loans source table'
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
with raw_source as (

select *
from {{ source('MAYRAPENA1324', 'PERSONAL_LOANS') }}

),

final as (

select
"_AIRBYTE_RAW_ID"::varchar as airbyte_raw_id,
"_AIRBYTE_EXTRACTED_AT"::timestamp_tz as airbyte_extracted_at,
"_AIRBYTE_META"::variant as airbyte_meta,
"TOTAL_ACC"::float as total_acc,
"ANNUAL_INC"::float as annual_inc,
"EMP_LENGTH"::varchar as emp_length,
"DESC"::varchar as desc,
"TOTAL_PYMNT"::float as total_pymnt,
"LAST_PYMNT_D"::varchar as last_pymnt_d,
"ADDR_STATE"::varchar as addr_state,
"NEXT_PYMNT_D"::varchar as next_pymnt_d,
"EMP_TITLE"::varchar as emp_title,
"COLLECTION_RECOVERY_FEE"::float as collection_recovery_fee,
"MTHS_SINCE_LAST_MAJOR_DEROG"::float as mths_since_last_major_derog,
"INQ_LAST_6MTHS"::float as inq_last_6mths,
"SUB_GRADE"::varchar as sub_grade,
"FUNDED_AMNT_INV"::float as funded_amnt_inv,
"DELINQ_2YRS"::float as delinq_2yrs,
"LOAN_ID"::varchar as loan_id,
"FUNDED_AMNT"::float as funded_amnt,
"VERIFICATION_STATUS"::varchar as verification_status,
"DTI"::float as dti,
"TOTAL_REC_PRNCP"::float as total_rec_prncp,
"GRADE"::varchar as grade,
"HOME_OWNERSHIP"::varchar as home_ownership,
"ISSUE_D"::varchar as issue_d,
"MTHS_SINCE_LAST_DELINQ"::float as mths_since_last_delinq,
"OUT_PRNCP"::float as out_prncp,
"PUB_REC"::float as pub_rec,
"INT_RATE"::float as int_rate,
"ZIP_CODE"::varchar as zip_code,
"OPEN_ACC"::float as open_acc,
"TERM"::varchar as term,
"PYMNT_PLAN"::varchar as pymnt_plan,
"URL"::varchar as url,
"REVOL_BAL"::float as revol_bal,
"RECOVERIES"::float as recoveries,
"LAST_PYMNT_AMNT"::float as last_pymnt_amnt,
"LOAN_AMNT"::float as loan_amnt,
"PURPOSE"::varchar as purpose,
"INITIAL_LIST_STATUS"::varchar as initial_list_status,
"TOTAL_REC_INT"::float as total_rec_int,
"TOTAL_PYMNT_INV"::float as total_pymnt_inv,
"MTHS_SINCE_LAST_RECORD"::float as mths_since_last_record,
"LAST_CREDIT_PULL_D"::varchar as last_credit_pull_d,
"TOTAL_REC_LATE_FEE"::float as total_rec_late_fee,
"MEMBER_ID"::float as member_id,
"POLICY_CODE"::float as policy_code,
"TITLE"::varchar as title,
"LOAN_STATUS"::varchar as loan_status,
"INSTALLMENT"::float as installment,
"EARLIEST_CR_LINE"::varchar as earliest_cr_line,
"REVOL_UTIL"::varchar as revol_util,
"OUT_PRNCP_INV"::float as out_prncp_inv,
"COLLECTIONS_12_MTHS_EX_MED"::float as collections_12_mths_ex_med

from raw_source

)

select * from final
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
version: 2

models:
- name: stg_mayrapena1324_personal_loans
description: 'A staging model for personal loans'
columns:
- name: airbyte_raw_id
- name: airbyte_extracted_at
- name: airbyte_meta
- name: total_acc
- name: annual_inc
- name: emp_length
- name: desc
- name: total_pymnt
- name: last_pymnt_d
- name: addr_state
- name: next_pymnt_d
- name: emp_title
- name: collection_recovery_fee
- name: mths_since_last_major_derog
- name: inq_last_6mths
- name: sub_grade
- name: funded_amnt_inv
- name: delinq_2yrs
- name: loan_id
- name: funded_amnt
- name: verification_status
- name: dti
- name: total_rec_prncp
- name: grade
- name: home_ownership
- name: issue_d
- name: mths_since_last_delinq
- name: out_prncp
- name: pub_rec
- name: int_rate
- name: zip_code
- name: open_acc
- name: term
- name: pymnt_plan
- name: url
- name: revol_bal
- name: recoveries
- name: last_pymnt_amnt
- name: loan_amnt
- name: purpose
- name: initial_list_status
- name: total_rec_int
- name: total_pymnt_inv
- name: mths_since_last_record
- name: last_credit_pull_d
- name: total_rec_late_fee
- name: member_id
- name: policy_code
- name: title
- name: loan_status
- name: installment
- name: earliest_cr_line
- name: revol_util
- name: out_prncp_inv
- name: collections_12_mths_ex_med
20 changes: 20 additions & 0 deletions transform/models/L2_core/mayrapena1324_avg_by_grade.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
with raw_source as (

select * from {{ ref('stg_mayrapena1324_personal_loans') }}

),

final as (

select
grade,
avg(loan_amnt) as avg_loan_amount,
count(*) as total_loans
from raw_source
where loan_status = 'Fully Paid'
group by grade
order by grade

)

select * from final
Loading

0 comments on commit bdcfd96

Please sign in to comment.