diff --git a/analysis/test_file.sql b/analysis/test_file.sql new file mode 100644 index 00000000..e69de29b diff --git a/dbt_project.yml b/dbt_project.yml index 0801a25d..45fab5f0 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -31,8 +31,8 @@ vars: incremental_lookback_period: 'hour' future_proof_date: '9999-12-31' -on-run-end: - - "{% if target.name == 'prod' %}{{ dbt_artifacts.upload_results(results) }}{% endif %}" +# on-run-end: +# - "{% if target.name == 'prod' %}{{ dbt_artifacts.upload_results(results) }}{% endif %}" # Configuring models @@ -42,12 +42,23 @@ models: marts: +materialized: table +tags: ['tag'] + dims: + +tags: ["myorders"] + facts: + +tags: ["myorders"] finance: +group: finance marketing: +group: marketing operations: +group: operations + accounting: + +group: accounting + +access: public + cfo: + +group: cfo + +access: public + intermediate: +materialized: view staging: @@ -65,7 +76,4 @@ models: +materialized: ephemeral tests: - rapid_onboarding_exemplar: - _samples: - staging: - +store_failures: "{{ true if target.name == 'prod' else false }}" + +store_failures: false diff --git a/macros/test_macro.sql b/macros/test_macro.sql new file mode 100644 index 00000000..0db90691 --- /dev/null +++ b/macros/test_macro.sql @@ -0,0 +1,5 @@ +{% macro test_macro() %} + + {{ log("This is a message by log macro", info=True) }} + +{% endmacro %} \ No newline at end of file diff --git a/models/_groups.yml b/models/_groups.yml new file mode 100644 index 00000000..80736aed --- /dev/null +++ b/models/_groups.yml @@ -0,0 +1,11 @@ +groups: + - name: cfo + owner: + # 'name' or 'email' is required; additional properties allowed + email: cfo@mycompany.com + + - name: accounting + owner: + email: finance@mycompany.com + + diff --git a/models/_samples/incremental/example_incremental_model.sql b/models/_samples/incremental/example_incremental_model.sql index cf8cb7bc..032a1b4d 100644 --- a/models/_samples/incremental/example_incremental_model.sql +++ b/models/_samples/incremental/example_incremental_model.sql @@ -7,6 +7,7 @@ with source as ( select * from {{ ref('example_source_for_incremental') }} + {% if is_incremental() %} -- this filter will only be applied on an incremental run where _etl_loaded_at > (select max(_etl_loaded_at) from {{ this }}) diff --git a/models/_samples/snapshot/example_join_snapshots.sql b/models/_samples/snapshot/example_join_snapshots.md similarity index 98% rename from models/_samples/snapshot/example_join_snapshots.sql rename to models/_samples/snapshot/example_join_snapshots.md index 201d1c73..9ea20d45 100644 --- a/models/_samples/snapshot/example_join_snapshots.sql +++ b/models/_samples/snapshot/example_join_snapshots.md @@ -1,4 +1,4 @@ -with order_snapshot as ( +{# with order_snapshot as ( select * exclude dbt_valid_to, coalesce(dbt_valid_to, cast('{{ var("future_proof_date") }}' as timestamp)) as dbt_valid_to @@ -47,3 +47,4 @@ final as ( ) select * from final +#} \ No newline at end of file diff --git a/models/_samples/staging/jaffle_shop/_jaffle_shop__models.yml b/models/_samples/staging/jaffle_shop/_jaffle_shop__models.yml deleted file mode 100644 index 838684c8..00000000 --- a/models/_samples/staging/jaffle_shop/_jaffle_shop__models.yml +++ /dev/null @@ -1,72 +0,0 @@ -version: 2 - -models: - - - name: stg_jaffle_shop__orders - description: > - multi line description - of my staging orders model from - jaffle shop - data_tests: - - dbt_utils.recency: - datepart: day - field: _etl_loaded_at - interval: 7 - columns: - - name: order_id - data_tests: - - not_null: - config: - where: _etl_loaded_at >= dateadd('day', -3, current_timestamp()) - severity: error - error_if: ">20" - warn_if: ">10" - limit: 25 - store_failures: true - - filtered_unique: - records_to_check: _etl_loaded_at >= dateadd('day', -3, current_timestamp()) - - name: order_date - description: | - multi line description - of my order date column from my - jaffle shop orders model - data_tests: - - dbt_expectations.expect_column_values_to_be_of_type: - column_type: date - - - name: stg_jaffle_shop__customers - description: | - multi line description - of my staging - jaffle shop customers model - data_tests: - - not_empty - - dbt_utils.equal_rowcount: - compare_model: source('jaffle_shop', 'customers') - - dbt_utils.equality: - compare_model: source('jaffle_shop', 'customers') - compare_columns: - - first_name - - last_name - - dbt_utils.expression_is_true: - expression: "len(first_name) > 1" - - dbt_expectations.expect_table_row_count_to_be_between: - min_value: 1 - - columns: - - name: customer_id - description: > - multi line description - of my customer id column from my - jaffle shop customers model - data_tests: - - unique - - not_null - - unique_and_not_null - - name: first_name - data_tests: - - dbt_utils.at_least_one - - dbt_utils.not_constant - - dbt_utils.not_empty_string - - dbt_utils.not_accepted_values: - values: ['test'] diff --git a/models/_samples/staging/jaffle_shop/stg_jaffle_shop__customers.sql b/models/_samples/staging/jaffle_shop/stg_jaffle_shop__customers.sql deleted file mode 100644 index 0e10767e..00000000 --- a/models/_samples/staging/jaffle_shop/stg_jaffle_shop__customers.sql +++ /dev/null @@ -1,53 +0,0 @@ -{#- -Explaining Configurations - - grants: shows how to grant a specific privilege (in this case select) on the object to a role in Snowflake (in this case transformer) - - post_hook: shows you can run a statement after your model is built - - alias: sets the name of the object that will be created in the warehouse and overrides the file name. in this case, the model - will b created as staging_jaffle_shop_customers - - materialized: overwrites the materialization strategy from the dbt_project.yml file to be table instead of view - - persist_docs: persists the documentation into the database - - schema: adjusting the schema that the model gets built in to be a concatenation of the target schema and "jaffle_shop" - - database: example of how you can overwrite the target database by setting the database in the config. in this example, - we're overriding to the analytics database (even though that is the same as the target database, just an example) - --#} - -{{ - config( - grants = {'select': ['transformer']}, - post_hook = 'select * from {{ this }} limit 1', - alias = 'staging_jaffle_shop_customers', - materialized = 'table', - persist_docs = {"relation": true, "columns": true}, - tags = ['finance'], - schema = 'jaffle_shop', - database = 'analytics' - ) -}} - -{{ - config( - materialized='table' - ) -}} - - - -with source as ( - - select * from {{ source('jaffle_shop', 'customers') }} - -), - -renamed as ( - - select - id as customer_id, - first_name, - last_name - - from source - -) - -select * from renamed diff --git a/models/_samples/staging/jaffle_shop/stg_jaffle_shop__orders.sql b/models/_samples/staging/jaffle_shop/stg_jaffle_shop__orders.sql deleted file mode 100644 index afa5cf86..00000000 --- a/models/_samples/staging/jaffle_shop/stg_jaffle_shop__orders.sql +++ /dev/null @@ -1,48 +0,0 @@ -{#- -Explaining Configurations - - grants: shows how to grant a specific privilege (in this case select) on the object to a role in Snowflake (in this case transformer) - - post_hook: shows you can run a statement after your model is built - - alias: sets the name of the object that will be created in the warehouse and overrides the file name. in this case, the model - will b created as staging_jaffle_shop_orders - - materialized: overwrites the materialization strategy from the dbt_project.yml file to be table instead of view - - persist_docs: persists the documentation into the database - - schema: adjusting the schema that the model gets built in to be a concatenation of the target schema and "jaffle_shop" - - database: example of how you can overwrite the target database by setting the database in the config. in this example, - we're overriding to the analytics database (even though that is the same as the target database, just an example) - --#} - -{{ - config( - grants = {'select': ['transformer']}, - post_hook = 'select * from {{ this }} limit 1', - alias = 'staging_jaffle_shop_orders', - materialized = 'table', - persist_docs = {"relation": true, "columns": true}, - tags = ['finance', 'orders'], - schema = 'jaffle_shop', - database = 'analytics' - ) -}} - - -with source as ( - - select * from {{ source('jaffle_shop', 'orders') }} - -), - -renamed as ( - - select - id as order_id, - user_id as customer_id, - order_date, - status, - _etl_loaded_at - - from source - -) - -select * from renamed diff --git a/models/intermediate/finance/int_daily_orders_fact.sql b/models/intermediate/finance/int_daily_orders_fact.sql new file mode 100644 index 00000000..afe02ff2 --- /dev/null +++ b/models/intermediate/finance/int_daily_orders_fact.sql @@ -0,0 +1,36 @@ +with + +dates as ( + + select * from {{ ref('dim_dates') }} + +), + +orders as ( + + select * from {{ ref('dim_orders') }} + +), + +countries as ( + + select * from {{ ref('dim_countries') }} + +), + +join_sources as ( + + select + ordr.order_sk, + country.country_sk as order_country_origin_sk, + date.date_sk as order_date_sk, + ordr.order_quantity + from orders ordr + left join dates date + on ordr.order_date = date.calendar_date + left join countries country + on ordr.order_country_origin = country.country_name + +) + +select * from join_sources diff --git a/models/intermediate/orders/int_revenue_fct.sql b/models/intermediate/orders/int_revenue_fct.sql new file mode 100644 index 00000000..22ebbec8 --- /dev/null +++ b/models/intermediate/orders/int_revenue_fct.sql @@ -0,0 +1,67 @@ +{{ config( + materialized='table', + tags=["cfo"] +) }} + +with + +depreciations as ( + + select * from {{ ref('dim_depreciation_accounts') }} + where account_type = 'Major' + +), + +interests as ( + + select * from {{ ref('dim_interests_accounts') }} + where account_type = 'Major' + +), + +net_income as ( + + select * from {{ ref('dim_net_income') }} + where account_type = 'Major' + +), + + +taxes as ( + + select * from {{ ref('dim_tax_accounts') }} + where account_type = 'Major' + +), + +join_sources as ( + + select + net_inc.account_id, + net_inc.net_income_usd, + deprec.depreciation_cost_usd, + ints.interests_amount_usd, + tax.tax_usd, + net_inc.account_type + from net_income net_inc + left join depreciations deprec + on net_inc.account_id = deprec.account_id + left join interests ints + on net_inc.account_id = ints.account_id + left join taxes tax + on net_inc.account_id = tax.account_id + +), + +final as ( + + select + account_id, + account_type, + (net_income_usd + interests_amount_usd + tax_usd + depreciation_cost_usd)::numeric(10, 3) as ebitda + from join_sources + +) + +select * from final + diff --git a/models/marts/cfo/fct_revenue.sql b/models/marts/cfo/fct_revenue.sql new file mode 100644 index 00000000..3e7fe56e --- /dev/null +++ b/models/marts/cfo/fct_revenue.sql @@ -0,0 +1,14 @@ +{{ config( + materialized='table', + tags=["cfo"] +) }} + +with + +sources as ( + + select * from {{ ref('int_revenue_fct') }} + +) + +select * from sources diff --git a/models/marts/dims/_dims__unit_tests.yml b/models/marts/dims/_dims__unit_tests.yml new file mode 100644 index 00000000..70ea8a8a --- /dev/null +++ b/models/marts/dims/_dims__unit_tests.yml @@ -0,0 +1,119 @@ +version: 2 + +unit_tests: + - name: test_if_full_name_format_is_correct + description: "Checks if the format for customer full name is correct" + model: dim_customer_info + config: + tags: | + {%- if target.name == 'prod' -%} unit_test + {%- else -%} do_not_run + {% endif %} + given: + - input: ref('dim_orders') + rows: + - {"cust_fname": "Johnny", "cust_lname": "Bravo"} + + expect: + rows: + - {cust_fullname: "Bravo, Johnny"} + + - name: test_accuracy_of_ebitda_calculation + description: "Test if the EBITDA calculation is correct" + model: int_revenue_fct + config: + tags: | + {%- if target.name != 'prod' -%} unit_test + {%- else -%} do_not_run + {% endif %} + + given: + - input: ref('dim_depreciation_accounts') + rows: + - {"account_id": "1", "account_type": "Major", "depreciation_cost_usd": "-10"} + + - input: ref('dim_interests_accounts') + rows: + - {"account_id": "1", "account_type": "Major", "interests_amount_usd": "10"} + + - input: ref('dim_net_income') + rows: + - {"account_id": "1", "account_type": "Major", "net_income_usd": "10"} + + - input: ref('dim_tax_accounts') + rows: + - {"account_id": "1", "account_type": "Major", "tax_usd": "-10"} + + expect: + rows: + - {"account_id": "1", "account_type": "Major", "ebitda": "-1"} + + - name: test_accuracy_of_ebitda_calculation__sql_format + description: "Test if the EBITDA calculation is correct using SQL format" + model: int_revenue_fct + config: + tags: | + {%- if target.name != 'prod' -%} unit_test + {%- else -%} do_not_run + {% endif %} + + given: + - input: ref('dim_depreciation_accounts') + format: sql + rows: | + with + main as ( + select + '1'::numeric(10, 3) as account_id, + 'Major'::varchar as account_type, + '-10'::numeric(10, 3) as depreciation_cost_usd + ) + + select * from main + + - input: ref('dim_interests_accounts') + format: sql + rows: | + with + main as ( + select + '1'::numeric(10, 3) as account_id, + 'Major'::varchar as account_type, + '10'::numeric(10, 3) as interests_amount_usd + ) + + select * from main + + - input: ref('dim_net_income') + format: sql + rows: | + with + main as ( + select + '1'::numeric(10, 3) as account_id, + 'Major'::varchar as account_type, + '10'::numeric(10, 3) as net_income_usd + ) + + select * from main + + - input: ref('dim_tax_accounts') + format: sql + rows: | + with + main as ( + select + '1'::numeric(10, 3) as account_id, + 'Major'::varchar as account_type, + '-10'::numeric(10, 3) as tax_usd + ) + + select * from main + + expect: + format: sql + rows: | + select + 1::numeric(10, 3) as account_id, + 'Major' as account_type, + 0::numeric(10, 3) as ebitda \ No newline at end of file diff --git a/models/marts/dims/_properties.yml b/models/marts/dims/_properties.yml new file mode 100644 index 00000000..506b19a4 --- /dev/null +++ b/models/marts/dims/_properties.yml @@ -0,0 +1,23 @@ +version: 2 + + + +models: + - name: dim_orders + + - name: dim_countries + config: + contract: + enforced: true + columns: + - name: country_sk + data_type: numeric(13, 2) + - name: country_name + data_type: string + data_tests: + - accepted_values: + values: + - "United States" + - "France" + - "Italy" + - "Philippines" diff --git a/models/marts/dims/cfo/dim_depreciation_accounts.sql b/models/marts/dims/cfo/dim_depreciation_accounts.sql new file mode 100644 index 00000000..80bf71c1 --- /dev/null +++ b/models/marts/dims/cfo/dim_depreciation_accounts.sql @@ -0,0 +1,16 @@ +{{ config( + materialized='table', + tags=["cfo"] +) }} + + +with + +source as ( + + select '1'::numeric(10, 3) as account_id, '-1000.00'::numeric(10, 3) depreciation_cost_usd,'Major'::varchar as account_type, '2024-01-01'::date as record_updated_ts union all + select '2'::numeric(10, 3) as account_id, '-1000.00'::numeric(10, 3) depreciation_cost_usd,'Minor'::varchar as account_type, '2024-01-01'::date as record_updated_ts + +) + +select * from source diff --git a/models/marts/dims/cfo/dim_interests_accounts.sql b/models/marts/dims/cfo/dim_interests_accounts.sql new file mode 100644 index 00000000..6fadd7fe --- /dev/null +++ b/models/marts/dims/cfo/dim_interests_accounts.sql @@ -0,0 +1,16 @@ +{{ config( + materialized='table', + tags=["cfo"] +) }} + + +with + +source as ( + + select '1'::numeric(10, 3) as account_id, '10000.00'::numeric(10, 3) interests_amount_usd,'Major'::varchar as account_type, '2024-01-01'::date as record_updated_ts union all + select '2'::numeric(10, 3) as account_id, '10000.00'::numeric(10, 3) interests_amount_usd,'Minor'::varchar as account_type, '2024-01-01'::date as record_updated_ts + +) + +select * from source diff --git a/models/marts/dims/cfo/dim_net_income.sql b/models/marts/dims/cfo/dim_net_income.sql new file mode 100644 index 00000000..b434f507 --- /dev/null +++ b/models/marts/dims/cfo/dim_net_income.sql @@ -0,0 +1,16 @@ +{{ config( + materialized='table', + tags=["cfo"] +) }} + + +with + +source as ( + + select '1'::numeric(10, 3) as account_id, '10000.00'::numeric(10, 3) net_income_usd,'Major'::varchar as account_type, '2024-01-01'::date as record_updated_ts union all + select '2'::numeric(10, 3) as account_id, '10000.00'::numeric(10, 3) net_income_usd,'Minor'::varchar as account_type, '2024-01-01'::date as record_updated_ts + +) + +select * from source diff --git a/models/marts/dims/cfo/dim_tax_accounts.sql b/models/marts/dims/cfo/dim_tax_accounts.sql new file mode 100644 index 00000000..92087d9b --- /dev/null +++ b/models/marts/dims/cfo/dim_tax_accounts.sql @@ -0,0 +1,15 @@ +{{ config( + materialized='table', + tags=["cfo"] +) }} + +with + +source as ( + + select '1'::numeric(10, 3) as account_id, '-2000.00'::numeric(10, 3) tax_usd,'Major'::varchar as account_type, '2024-01-01'::date as record_updated_ts union all + select '2'::numeric(10, 3) as account_id, '-2000.00'::numeric(10, 3) tax_usd,'Minor'::varchar as account_type, '2024-01-01'::date as record_updated_ts + +) + +select * from source diff --git a/models/marts/dims/my_orders/dim_countries.sql b/models/marts/dims/my_orders/dim_countries.sql new file mode 100644 index 00000000..2fa22f5c --- /dev/null +++ b/models/marts/dims/my_orders/dim_countries.sql @@ -0,0 +1,21 @@ +{{ + config( + materialized='table', + tags=["orders"] + ) +}} + +with + +countries as ( + + select 1 as country_sk, 'United States'::varchar(100) as country_name union all + select 2 as country_sk, 'France'::varchar(100) as country_name union all + select 3 as country_sk, 'Italy'::varchar(100) as country_name union all + select 4 as country_sk, 'Philippines'::varchar(100) as country_name + -- -- union all + -- select 5 as country_sk, 'Italia'::varchar(100) as country_name + +) + +select * from countries \ No newline at end of file diff --git a/models/marts/dims/my_orders/dim_customer_info.sql b/models/marts/dims/my_orders/dim_customer_info.sql new file mode 100644 index 00000000..a4ee3d82 --- /dev/null +++ b/models/marts/dims/my_orders/dim_customer_info.sql @@ -0,0 +1,23 @@ +{{ + config( + materialized='table', + tags=["orders"] + ) +}} + +with + +dim_orders as ( + + select * from {{ ref('dim_orders') }} + +), + +final as ( + + select + cust_lname || ', ' || cust_fname as cust_fullname, + from dim_orders +) + +select * from final diff --git a/models/marts/dims/my_orders/dim_dates.sql b/models/marts/dims/my_orders/dim_dates.sql new file mode 100644 index 00000000..71d678ba --- /dev/null +++ b/models/marts/dims/my_orders/dim_dates.sql @@ -0,0 +1,22 @@ +{{ + config( + materialized='table', + tags=["orders"] + ) +}} + + + +with + +dates as ( + + select '20240101'::decimal(18,2) as date_sk, '2024-01-01'::date as calendar_date union all + select '20240102'::decimal(18,2) as date_sk, '2024-01-02'::date as calendar_date union all + select '20240103'::decimal(18,2) as date_sk, '2024-01-03'::date as calendar_date union all + select '20240104'::decimal(18,2) as date_sk, '2024-01-04'::date as calendar_date union all + select '20240105'::decimal(18,2) as date_sk, '2024-01-05'::date as calendar_date + +) + +select * from dates \ No newline at end of file diff --git a/models/marts/dims/my_orders/dim_orders.sql b/models/marts/dims/my_orders/dim_orders.sql new file mode 100644 index 00000000..e709c22b --- /dev/null +++ b/models/marts/dims/my_orders/dim_orders.sql @@ -0,0 +1,30 @@ +{{ + config( + materialized='table', + tags=["orders"] + ) +}} + + + +with + +orders as ( + + select '1'::numeric(10,2) as order_sk, 'order-001' as order_id, '2024-01-01'::date as order_date, 'United States'::varchar(100) as order_country_origin,'John'::varchar(10) as cust_fname, 'Lennon'::varchar(10) as cust_lname, 1::decimal(17,3) as order_quantity union all + select '2'::numeric(10,2) as order_sk, 'order-002' as order_id, '2024-01-03'::date as order_date, 'France'::varchar(100) as order_country_origin,'Ringo'::varchar(10) as cust_fname, 'Starr'::varchar(10) as cust_lname, 2::decimal(17,2) as order_quantity union all + select '3'::numeric(10,2) as order_sk, 'order-003' as order_id, '2024-01-03'::date as order_date, 'Philippines'::varchar(100) as order_country_origin,'Paul'::varchar(10) as cust_fname, 'McCartney'::varchar(10) as cust_lname, 3::decimal(17,3) as order_quantity + +-- model contract example: missing column order_quantity + -- select '1'::numeric(10,2) as order_sk, 'order-001' as order_id, '2024-01-01'::date as order_date, 'United States'::varchar(100) as order_country_origin,'John'::varchar(10) as cust_fname, 'Lennon'::varchar(10) as cust_lname union all + -- select '2'::numeric(10,2) as order_sk, 'order-002' as order_id, '2024-01-03'::date as order_date, 'France'::varchar(100) as order_country_origin,'Ringo'::varchar(10) as cust_fname, 'Starr'::varchar(10) as cust_lname union all + -- select '3'::numeric(10,2) as order_sk, 'order-003' as order_id, '2024-01-03'::date as order_date, 'Philippines'::varchar(100) as order_country_origin,'Paul'::varchar(10) as cust_fname, 'McCartney'::varchar(10) as cust_lname + -- union all + -- select NULL::decimal(18,2) as order_sk, 'order-003' as order_id, '2024-01-03'::date as order_date, 'Philippines'::varchar(100) as order_country_origin,'Johnny'::varchar(10) as cust_fname, 'English'::varchar(10) as cust_lname, 3 as order_quantity union all + -- select '3'::decimal(18,2) as order_sk, 'order-003' as order_id, '2024-01-03'::date as order_date, 'Philippines'::varchar(100) as order_country_origin,'Mister'::varchar(10) as cust_fname, 'Bean'::varchar(10) as cust_lname, 3 as order_quantity + +) + +select * from orders + + diff --git a/models/marts/dims/my_orders/dim_orders_v2.sql b/models/marts/dims/my_orders/dim_orders_v2.sql new file mode 100644 index 00000000..372882d8 --- /dev/null +++ b/models/marts/dims/my_orders/dim_orders_v2.sql @@ -0,0 +1,21 @@ +{{ + config( + materialized='table', + tags=["orders"] + ) +}} + + + +with + +orders as ( + + select '1'::numeric(10,2) as order_sk,'1234-567-00'::varchar(100) as credit_card_number ,'order-001' as order_id, '2024-01-01'::date as order_date, 'United States'::varchar(100) as order_country_origin,'John'::varchar(10) as cust_fname, 'Lennon'::varchar(10) as cust_lname union all + select '2'::numeric(10,2) as order_sk,'1234-567-00'::varchar(100) as credit_card_number ,'order-002' as order_id, '2024-01-03'::date as order_date, 'France'::varchar(100) as order_country_origin,'Ringo'::varchar(10) as cust_fname, 'Starr'::varchar(10) as cust_lname union all + select '3'::numeric(10,2) as order_sk,'1234-567-00'::varchar(100) as credit_card_number ,'order-003' as order_id, '2024-01-03'::date as order_date, 'Philippines'::varchar(100) as order_country_origin,'Paul'::varchar(10) as cust_fname, 'McCartney'::varchar(10) as cust_lname + +) +select * from orders + + diff --git a/models/marts/facts/_facts__properties.yml b/models/marts/facts/_facts__properties.yml new file mode 100644 index 00000000..4886a355 --- /dev/null +++ b/models/marts/facts/_facts__properties.yml @@ -0,0 +1,15 @@ +version: 2 + +models: + - name: fct_daily_orders + description: | + "This is a fact table about orders." + + columns: + - name: order_sk + description: "Order SK" + + tests: + - relationships: + to: ref('dim_orders') + field: order_sk \ No newline at end of file diff --git a/models/marts/facts/fct_daily_orders.sql b/models/marts/facts/fct_daily_orders.sql new file mode 100644 index 00000000..4117bedd --- /dev/null +++ b/models/marts/facts/fct_daily_orders.sql @@ -0,0 +1,30 @@ +{{ config( + materialized="table", + tags="orders" +) }} + +with + +int_daily_orders_fact as ( + + select * from {{ ref('int_daily_orders_fact') }} + +), + +agg_order_quantity as ( + + select + order_sk, + order_date_sk, + order_country_origin_sk, + sum(order_quantity) as total_order_quantity + from int_daily_orders_fact + group by + order_sk, + order_date_sk, + order_country_origin_sk + +) + +select * from agg_order_quantity + diff --git a/models/metrics/met_customers.yml b/models/metrics/met_customers.yml new file mode 100644 index 00000000..ab0aefef --- /dev/null +++ b/models/metrics/met_customers.yml @@ -0,0 +1,8 @@ +metrics: + - name: "customers_with_orders" + label: "customers_with_orders" + description: "Unique count of customers placing orders" + type: simple + type_params: + measure: + name: customers \ No newline at end of file diff --git a/models/metrics/met_orders.yml b/models/metrics/met_orders.yml new file mode 100644 index 00000000..b5b7d680 --- /dev/null +++ b/models/metrics/met_orders.yml @@ -0,0 +1,57 @@ +metrics: + # Simple type metrics + - name: "order_total" + description: "Sum of orders value" + type: simple + label: "order_total" + type_params: + measure: + name: order_total + + - name: "order_count" + description: "number of orders" + type: simple + label: "order_count" + type_params: + measure: + name: order_count + + - name: large_orders + description: "Count of orders with order total over 20." + type: simple + label: "Large Orders" + type_params: + measure: + name: order_count + filter: | + {{ Metric('order_total', group_by=['order_id']) }} >= 20 + + # Ratio type metric + - name: "avg_order_value" + label: "avg_order_value" + description: "average value of each order" + type: ratio + type_params: + numerator: order_total + denominator: order_count + + # Cumulative type metrics + - name: "cumulative_order_amount_mtd" + label: "cumulative_order_amount_mtd" + description: "The month to date value of all orders" + type: cumulative + type_params: + measure: + name: order_total + grain_to_date: month + + # Derived metric + - name: "pct_of_orders_that_are_large" + label: "pct_of_orders_that_are_large" + description: "percent of orders that are large" + type: derived + type_params: + expr: large_orders/order_count + metrics: + - name: large_orders + - name: order_count \ No newline at end of file diff --git a/models/metrics/metricflow_time_spine.sql b/models/metrics/metricflow_time_spine.sql new file mode 100644 index 00000000..ac4d3339 --- /dev/null +++ b/models/metrics/metricflow_time_spine.sql @@ -0,0 +1,19 @@ +{{ + config( + materialized = 'table', + ) +}} +with days as ( + {{ + dbt_utils.date_spine( + 'day', + "to_date('01/01/2000','mm/dd/yyyy')", + "to_date('01/01/2027','mm/dd/yyyy')" + ) + }} +), +final as ( + select cast(date_day as date) as date_day + from days +) +select * from final diff --git a/models/metrics/sem_dim_customers.yml b/models/metrics/sem_dim_customers.yml new file mode 100644 index 00000000..11721250 --- /dev/null +++ b/models/metrics/sem_dim_customers.yml @@ -0,0 +1,44 @@ +semantic_models: + + - name: customers + defaults: + agg_time_dimension: most_recent_order_date + description: | + semantic model for dim_customers + + model: ref('dim_customer') + + entities: + - name: customer + expr: customer_id + type: primary + + dimensions: + - name: customer_name + type: categorical + expr: first_name + + - name: first_order_date + type: time + type_params: + time_granularity: day + + - name: most_recent_order_date + type: time + type_params: + time_granularity: day + + measures: + - name: count_lifetime_orders + description: Total count of orders per customer. + agg: sum + expr: number_of_orders + + - name: lifetime_spend + agg: sum + expr: lifetime_value + description: Gross customer lifetime spend inclusive of taxes. + + - name: customers + expr: customer_id + agg: count_distinct diff --git a/models/metrics/sem_fact_orders.yml b/models/metrics/sem_fact_orders.yml new file mode 100644 index 00000000..19fdd2e4 --- /dev/null +++ b/models/metrics/sem_fact_orders.yml @@ -0,0 +1,45 @@ +semantic_models: + + - name: orders + defaults: + agg_time_dimension: order_date + description: | + Order fact table. This table's grain is one row per order. + model: ref('fact_orders') + + entities: + - name: order_id + type: primary + + - name: customer + expr: customer_id + type: foreign + + dimensions: + - name: order_date + type: time + type_params: + time_granularity: day + + measures: + - name: order_total + description: The total amount for each order including taxes. + agg: sum + expr: amount + + - name: order_count + expr: 1 + agg: sum + + - name: customers_with_orders + description: Distinct count of customers placing orders + agg: count_distinct + expr: customer_id + + - name: order_value_p99 ## The 99th percentile order value + expr: amount + agg: percentile + agg_params: + percentile: 0.99 + use_discrete_percentile: True + use_approximate_percentile: False \ No newline at end of file diff --git a/models/python_demo/_stripe__sources.sql b/models/python_demo/_stripe__sources.sql new file mode 100644 index 00000000..c419c3a7 --- /dev/null +++ b/models/python_demo/_stripe__sources.sql @@ -0,0 +1,22 @@ +version: 2 + +sources: + - name: stripe2 + description: Incoming stripe payment data. + database: raw + schema: stripe + tables: + - name: payment + columns: + - name: id + data_tests: + - not_null + - unique + - name: paymentmethod + data_tests: + - accepted_values: + values: ['credit_card', 'bank_transfer', 'gift_card', 'coupon'] + - name: status + data_tests: + - accepted_values: + values: ['success', 'fail'] \ No newline at end of file diff --git a/models/python_demo/demo/_data_tests_py__models.yml b/models/python_demo/demo/_data_tests_py__models.yml new file mode 100644 index 00000000..ee30f0bf --- /dev/null +++ b/models/python_demo/demo/_data_tests_py__models.yml @@ -0,0 +1,9 @@ +version: 2 + +models: + - name: is_holiday_2024 + columns: + - name: date_day + data_tests: + - unique + \ No newline at end of file diff --git a/models/python_demo/demo/date_spine.sql b/models/python_demo/demo/date_spine.sql new file mode 100644 index 00000000..14b6b1ec --- /dev/null +++ b/models/python_demo/demo/date_spine.sql @@ -0,0 +1,14 @@ +with + +dates as ( + + {{ dbt_utils.date_spine( + datepart="day", + start_date="cast('2024-01-01' as date)", + end_date="cast('2024-12-31' as date)" + ) + }} + +) + +select * from dates \ No newline at end of file diff --git a/models/python_demo/demo/is_holiday_2024.py b/models/python_demo/demo/is_holiday_2024.py new file mode 100644 index 00000000..898e5928 --- /dev/null +++ b/models/python_demo/demo/is_holiday_2024.py @@ -0,0 +1,24 @@ +import holidays +import pandas + +#all python models need to be defined at the start with this specific syntax +def model(dbt, session): + + #python models don't use Jinja. Here we are using dbt.config to create model configurations + #be sure to materialize python models as tables, and to specify the packages that were imported above + dbt.config( + materialized="table", + tags=["py_models"], + packages=['pandas', 'holidays'] + ) + + us_holidays = holidays.US() + + #python models don't use Jinja. Here we are using dbt.ref to create model references + df = dbt.ref('date_spine').to_pandas() + + # if you are using snowpark columns need to be uppercase + df['IS_HOLIDAY'] = df['DATE_DAY'].apply(lambda date: date in us_holidays) + + #in dbt, you always need to return your data frame at the end of your models + return df diff --git a/models/python_demo/others/dim_orders_ml.py b/models/python_demo/others/dim_orders_ml.py new file mode 100644 index 00000000..95d5d8ea --- /dev/null +++ b/models/python_demo/others/dim_orders_ml.py @@ -0,0 +1,19 @@ +import numpy +import pandas as pd + +def model(dbt, session): + + dbt.config(materialized="table") + + upstream_model = dbt.ref("dim_orders") + sorted = upstream_model.sort_values(by="CUST_LNAME") + + print("This is a line in python") + + # Process data with the external package + + + # Return the DataFrame as the model output + return sorted + + \ No newline at end of file diff --git a/models/python_demo/others/python_transformations.py b/models/python_demo/others/python_transformations.py new file mode 100644 index 00000000..e00ca6b1 --- /dev/null +++ b/models/python_demo/others/python_transformations.py @@ -0,0 +1,24 @@ +import string +import random +import pandas +import hashlib + +def model(dbt, session): + dbt.config( + materialized="table", + packages=['pandas'] + ) + + df = dbt.ref("dim_orders").to_pandas() + + letters = string.ascii_lowercase + string.ascii_uppercase + string.digits + default_salt = (''.join(random.choice(letters) for i in range(30))) + + # Define the variable to concatenate with + variable_hash = 'some_secret_salt' + + # Use lambda to concatenate the column with variable_hash, and then hash the result + df = df.assign(HASHED = lambda x: (x['ORDER_COUNTRY_ORIGIN'] + default_salt)) + + return df + \ No newline at end of file diff --git a/models/semantic_layer_demo/_semantic_layer_sources.yml b/models/semantic_layer_demo/_semantic_layer_sources.yml new file mode 100644 index 00000000..aff5be45 --- /dev/null +++ b/models/semantic_layer_demo/_semantic_layer_sources.yml @@ -0,0 +1,15 @@ +version: 2 + +sources: + - name: orders + database: raw + schema: jaffle_shop + tables: + - name: orders + - name: customers + + - name: payments + database: raw + schema: stripe + tables: + - name: payment \ No newline at end of file diff --git a/models/semantic_layer_demo/marts/dim_customer.sql b/models/semantic_layer_demo/marts/dim_customer.sql new file mode 100644 index 00000000..b6a21647 --- /dev/null +++ b/models/semantic_layer_demo/marts/dim_customer.sql @@ -0,0 +1,36 @@ +{{ + config( + materialized='table', + tags=["semantic_layer_demo"] + ) +}} + +with customers as ( + select * from {{ ref('stg_jaffle_shop__customers')}} +), +orders as ( + select * from {{ ref('fact_orders')}} +), +customer_orders as ( + select + customer_id, + min(order_date) as first_order_date, + max(order_date) as most_recent_order_date, + count(order_id) as number_of_orders, + sum(amount) as lifetime_value + from orders + group by 1 +), +final as ( + select + customers.customer_id, + customers.first_name, + customers.last_name, + customer_orders.first_order_date, + customer_orders.most_recent_order_date, + coalesce(customer_orders.number_of_orders, 0) as number_of_orders, + customer_orders.lifetime_value + from customers + left join customer_orders using (customer_id) +) +select * from final diff --git a/models/semantic_layer_demo/marts/fact_orders.sql b/models/semantic_layer_demo/marts/fact_orders.sql new file mode 100644 index 00000000..53ae30e5 --- /dev/null +++ b/models/semantic_layer_demo/marts/fact_orders.sql @@ -0,0 +1,43 @@ +{{ + config( + tags=["semantic_layer_demo"] + ) +}} + +with orders as ( + select * from {{ ref('stg_jaffle_shop__orders' )}} +), + + +payments as ( + select * from {{ ref('stg_stripe__payment') }} +), + + +order_payments as ( + select + order_id, + sum(case when status = 'success' then amount end) as amount + + + from payments + group by 1 +), + + +final as ( + + + select + orders.order_id, + orders.customer_id, + orders.order_date, + coalesce(order_payments.amount, 0) as amount + + + from orders + left join order_payments using (order_id) +) + + +select * from final diff --git a/models/semantic_layer_demo/staging/jaffle_shop/_jaffle_shop__models.yml b/models/semantic_layer_demo/staging/jaffle_shop/_jaffle_shop__models.yml new file mode 100644 index 00000000..eba0774d --- /dev/null +++ b/models/semantic_layer_demo/staging/jaffle_shop/_jaffle_shop__models.yml @@ -0,0 +1,72 @@ +# version: 2 + +# models: + +# - name: stg_jaffle_shop__orders +# description: > +# multi line description +# of my staging orders model from +# jaffle shop +# data_tests: +# - dbt_utils.recency: +# datepart: day +# field: _etl_loaded_at +# interval: 7 +# columns: +# - name: order_id +# data_tests: +# - not_null: +# config: +# where: _etl_loaded_at >= dateadd('day', -3, current_timestamp()) +# severity: error +# error_if: ">20" +# warn_if: ">10" +# limit: 25 +# store_failures: true +# - filtered_unique: +# records_to_check: _etl_loaded_at >= dateadd('day', -3, current_timestamp()) +# - name: order_date +# description: | +# multi line description +# of my order date column from my +# jaffle shop orders model +# data_tests: +# - dbt_expectations.expect_column_values_to_be_of_type: +# column_type: date + +# - name: stg_jaffle_shop__customers +# description: | +# multi line description +# of my staging +# jaffle shop customers model +# data_tests: +# - not_empty +# - dbt_utils.equal_rowcount: +# compare_model: source('jaffle_shop', 'customers') +# - dbt_utils.equality: +# compare_model: source('jaffle_shop', 'customers') +# compare_columns: +# - first_name +# - last_name +# - dbt_utils.expression_is_true: +# expression: "len(first_name) > 1" +# - dbt_expectations.expect_table_row_count_to_be_between: +# min_value: 1 + +# columns: +# - name: customer_id +# description: > +# multi line description +# of my customer id column from my +# jaffle shop customers model +# data_tests: +# - unique +# - not_null +# - unique_and_not_null +# - name: first_name +# data_tests: +# - dbt_utils.at_least_one +# - dbt_utils.not_constant +# - dbt_utils.not_empty_string +# - dbt_utils.not_accepted_values: +# values: ['test'] diff --git a/models/_samples/staging/jaffle_shop/_jaffle_shop__sources.yml b/models/semantic_layer_demo/staging/jaffle_shop/_jaffle_shop__sources.yml similarity index 100% rename from models/_samples/staging/jaffle_shop/_jaffle_shop__sources.yml rename to models/semantic_layer_demo/staging/jaffle_shop/_jaffle_shop__sources.yml diff --git a/models/semantic_layer_demo/staging/jaffle_shop/stg_jaffle_shop__customers.sql b/models/semantic_layer_demo/staging/jaffle_shop/stg_jaffle_shop__customers.sql new file mode 100644 index 00000000..6284f71a --- /dev/null +++ b/models/semantic_layer_demo/staging/jaffle_shop/stg_jaffle_shop__customers.sql @@ -0,0 +1,11 @@ +{{ + config( + tags=['semantic_layer_demo'] + ) +}} + + select + id as customer_id, + first_name, + last_name +from {{ source('jaffle_shop', 'customers') }} \ No newline at end of file diff --git a/models/semantic_layer_demo/staging/jaffle_shop/stg_jaffle_shop__orders.sql b/models/semantic_layer_demo/staging/jaffle_shop/stg_jaffle_shop__orders.sql new file mode 100644 index 00000000..6be9048d --- /dev/null +++ b/models/semantic_layer_demo/staging/jaffle_shop/stg_jaffle_shop__orders.sql @@ -0,0 +1,12 @@ +{{ + config( + tags=['semantic_layer_demo'] + ) +}} + + select + id as order_id, + user_id as customer_id, + order_date, + status + from {{ source('jaffle_shop', 'orders') }} \ No newline at end of file diff --git a/models/semantic_layer_demo/staging/stripe/stg_stripe__payment.sql b/models/semantic_layer_demo/staging/stripe/stg_stripe__payment.sql new file mode 100644 index 00000000..e0bc13ff --- /dev/null +++ b/models/semantic_layer_demo/staging/stripe/stg_stripe__payment.sql @@ -0,0 +1,17 @@ +{{ + config( + tags=['semantic_layer_demo'] + ) +}} + +select + id as payment_id, + orderid as order_id, + paymentmethod as payment_method, + status, + -- amount is stored in cents, convert it to dollars + amount / 100 as amount, + created as created_at + + +from {{ source('stripe', 'payment') }} \ No newline at end of file diff --git a/models/staging/stripe/stg_stripe__payments.sql b/models/staging/stripe/stg_stripe__payments.sql index 61ede9e5..61fd513d 100644 --- a/models/staging/stripe/stg_stripe__payments.sql +++ b/models/staging/stripe/stg_stripe__payments.sql @@ -13,6 +13,6 @@ select -- datetimes created as created_at -from {{ ref('snapshot_stg_payments') }} +from {{ source('stripe', 'payment') }} -- pull only the most recent update for each unique record where dbt_valid_to is null diff --git a/models/test_model.sql b/models/test_model.sql new file mode 100644 index 00000000..e36a24f6 --- /dev/null +++ b/models/test_model.sql @@ -0,0 +1,18 @@ +{{ + config( + materialized='table' + ) +}} + + +with + +src as ( + + select 1 as col_id + +) + +select * from src + +{{ log("This is a log inside the model", info=True) }} \ No newline at end of file diff --git a/models/test_query.sql b/models/test_query.sql new file mode 100644 index 00000000..e69de29b diff --git a/seeds/_seeds_properties.yml b/seeds/_seeds_properties.yml new file mode 100644 index 00000000..c4c5956a --- /dev/null +++ b/seeds/_seeds_properties.yml @@ -0,0 +1,7 @@ +version: 2 + +seeds: + - name: seeds_test + config: + database: analytics + schema: dbt_kfajardo \ No newline at end of file diff --git a/seeds/test_seed.csv b/seeds/test_seed.csv new file mode 100644 index 00000000..dbd888af --- /dev/null +++ b/seeds/test_seed.csv @@ -0,0 +1,2 @@ +col1,col2,col3 +1,2,3 \ No newline at end of file diff --git a/snapshots/_samples/example_generate_schema_snapshot.sql b/snapshots/_samples/example_generate_schema_snapshot.sql deleted file mode 100644 index 2174d7b7..00000000 --- a/snapshots/_samples/example_generate_schema_snapshot.sql +++ /dev/null @@ -1,14 +0,0 @@ -{% snapshot example_generate_schema_snapshot %} - {{ - config( - target_database='analytics', - target_schema=generate_schema_name('snapshots'), - unique_key='id', - strategy='timestamp', - updated_at='order_updated_at', - invalidate_hard_deletes=True - ) - }} - - select * from {{ ref('example_orders_line_items_source_for_snapshot') }} - {% endsnapshot %} \ No newline at end of file diff --git a/snapshots/_samples/example_orders_line_items_snapshot.sql b/snapshots/_samples/example_orders_line_items_snapshot.sql deleted file mode 100644 index 63603f50..00000000 --- a/snapshots/_samples/example_orders_line_items_snapshot.sql +++ /dev/null @@ -1,14 +0,0 @@ -{% snapshot example_orders_line_items_snapshot %} - {{ - config( - target_database='analytics', - target_schema='snapshots', - unique_key='id', - strategy='timestamp', - updated_at='order_updated_at', - invalidate_hard_deletes=True - ) - }} - - select * from {{ ref('example_orders_line_items_source_for_snapshot') }} - {% endsnapshot %} \ No newline at end of file diff --git a/snapshots/_samples/example_orders_snapshot.sql b/snapshots/_samples/example_orders_snapshot.sql deleted file mode 100644 index e4e7ad6d..00000000 --- a/snapshots/_samples/example_orders_snapshot.sql +++ /dev/null @@ -1,14 +0,0 @@ -{% snapshot example_orders_snapshot %} - {{ - config( - target_database='analytics', - target_schema='snapshots', - unique_key='order_id', - strategy='timestamp', - updated_at='order_updated_at', - invalidate_hard_deletes=True - ) - }} - - select * from {{ ref('example_orders_source_for_snapshot') }} - {% endsnapshot %} \ No newline at end of file diff --git a/snapshots/jaffle_snapshots.yml b/snapshots/jaffle_snapshots.yml new file mode 100644 index 00000000..98b9695a --- /dev/null +++ b/snapshots/jaffle_snapshots.yml @@ -0,0 +1,51 @@ +version: 2 + +snapshots: +# This is a pretty standard snapshot + - name: snapshot_stg_payments + relation: source('stripe', 'payment') + config: + enabled: true + strategy: timestamp + updated_at: _BATCHED_AT + unique_key: ID + persist_docs: + relation: true + columns: true +# Another standard snapshot but this time with a different table. + - name: snapshot_example_orders + relation: ref('example_orders_source_for_snapshot') + config: + enabled: true + strategy: timestamp + updated_at: order_updated_at + unique_key: order_id + persist_docs: + relation: true + columns: true + invalidate_hard_deletes: true + +# This snapshot has the check_cols example + - name: snapshot_int_line_items + relation: ref('int_line_items_amounts_calculated') + config: + enabled: true + strategy: check + check_cols: ['gross_item_sales_amount', 'net_item_sales_amount'] + unique_key: order_item_id + persist_docs: + relation: true + columns: true +# This snapshot has invalidate hard deletes as an example + - name: snapshot_orders_line_items + relation: ref('example_orders_line_items_source_for_snapshot') + config: + enabled: true + strategy: timestamp + unique_key: id + updated_at: order_updated_at + invalidate_hard_deletes: true + persist_docs: + relation: true + columns: true + diff --git a/snapshots/snapshot_int_line_items.sql b/snapshots/snapshot_int_line_items.sql deleted file mode 100644 index 90bc5502..00000000 --- a/snapshots/snapshot_int_line_items.sql +++ /dev/null @@ -1,22 +0,0 @@ -{% snapshot snapshot_int_line_items %} - --- Example showing check columns strategy and using custom schemas (this is rare) - --- if you don't want to write to the same schema in development and prod, --- use environment-aware variable from dbt_project.yml - --- if you do not have a reliable updated at timestamp, checek columns for changes. - -{{ - config( - target_database='analytics', - target_schema=var('example_target_snapshot_schema'), - unique_key='order_item_id', - strategy='check', - check_cols=['gross_item_sales_amount', 'net_item_sales_amount'], - ) -}} - -select * from {{ ref('int_line_items_amounts_calculated') }} - -{% endsnapshot %} \ No newline at end of file diff --git a/snapshots/snapshot_stg_payments.sql b/snapshots/snapshot_stg_payments.sql deleted file mode 100644 index adc61b2f..00000000 --- a/snapshots/snapshot_stg_payments.sql +++ /dev/null @@ -1,16 +0,0 @@ -{% snapshot snapshot_stg_payments %} - -{{ - config( - target_database='analytics', - target_schema='snapshots', - unique_key='ID', - - strategy='timestamp', - updated_at='_BATCHED_AT', - ) -}} - -select * from {{ source('stripe', 'payment') }} - -{% endsnapshot %} \ No newline at end of file diff --git a/tests/assert_duplicates__fct_daily_orders.sql b/tests/assert_duplicates__fct_daily_orders.sql new file mode 100644 index 00000000..517e7cd5 --- /dev/null +++ b/tests/assert_duplicates__fct_daily_orders.sql @@ -0,0 +1,21 @@ +with + + +source as ( + + select * from {{ ref('fct_daily_orders') }} + +), + +validations as ( + + select + order_sk, + count(*) as rec_cnt + from {{ ref('fct_daily_orders') }} + group by order_sk + having count(*) > 1 + +) + +select * from validations \ No newline at end of file diff --git a/tests/assert_null__payment.sql b/tests/assert_null__payment.sql new file mode 100644 index 00000000..3627f80c --- /dev/null +++ b/tests/assert_null__payment.sql @@ -0,0 +1,12 @@ +with + +source as ( + + select * from {{ source('stripe', 'payment') }} + +) + +select +id +from source +where id is null \ No newline at end of file diff --git a/tests/fixtures/1_fixture.sql b/tests/fixtures/1_fixture.sql new file mode 100644 index 00000000..e69de29b diff --git a/tests/fixtures/input/dim_depreciation_accounts__fixture.sql b/tests/fixtures/input/dim_depreciation_accounts__fixture.sql new file mode 100644 index 00000000..e69de29b diff --git a/tests/fixtures/input/dim_net_income__fixture.sql b/tests/fixtures/input/dim_net_income__fixture.sql new file mode 100644 index 00000000..e69de29b