From fdb9cb170553cc7e6be9b239286baefb9c66f528 Mon Sep 17 00:00:00 2001 From: Ben Liyanage Date: Wed, 14 Dec 2022 14:56:39 -0800 Subject: [PATCH 1/7] fix audits, add project as a parameter --- .gitignore | 1 + README.md | 1 + macros/hooks/model_audit.sql | 8 +++++--- 3 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..485dee6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea diff --git a/README.md b/README.md index 72ca71e..83972f4 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ Example config for `dbt_project.yml` below: vars: "dbt_ml:audit_schema": "audit" "dbt_ml:audit_table": "ml_models" + "dbt_ml:audit_database": "database" on-run-start: - '{% do adapter.create_schema(api.Relation.create(target.project, "audit")) %}' - "{{ dbt_ml.create_model_audit_table() }}" diff --git a/macros/hooks/model_audit.sql b/macros/hooks/model_audit.sql index bb59faf..fbb0139 100644 --- a/macros/hooks/model_audit.sql +++ b/macros/hooks/model_audit.sql @@ -5,13 +5,15 @@ 'schema': 'string', 'created_at': dbt_utils.type_timestamp(), 'training_info': 'array>>>', - 'feature_info': 'array>', + 'feature_info': 'array>', 'weights': 'array>>>', 'evaluate': 'array>', }) %} {% endmacro %} +{% endmacro %} + {% macro _audit_insert_templates() %} {%- set schemas -%} @@ -100,7 +102,7 @@ tensorflow: {} {% set info_types = ['training_info', 'feature_info', 'weights', 'evaluate'] %} - insert `{{ target.database }}.{{ var('dbt_ml:audit_schema') }}.{{ var('dbt_ml:audit_table') }}` + insert `{{ var('dbt_ml:audit_database') }}.{{ var('dbt_ml:audit_schema') }}.{{ var('dbt_ml:audit_table') }}` (model, schema, created_at, {{ info_types | join(', ') }}) select @@ -125,7 +127,7 @@ tensorflow: {} {% macro create_model_audit_table() %} {%- set audit_table = api.Relation.create( - database=target.database, + database=var('dbt_ml:audit_database'), schema=var('dbt_ml:audit_schema'), identifier=var('dbt_ml:audit_table'), type='table' From a856c4abd78c8473f251773bce9bcccce3a29117 Mon Sep 17 00:00:00 2001 From: Ben Liyanage Date: Wed, 14 Dec 2022 15:14:41 -0800 Subject: [PATCH 2/7] update readme on variables --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 83972f4..4be1366 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,11 @@ To install the package add the package path to the `packages.yml` file in your d In order to use the model audit post-hook the following variables have to be set in your `dbt_project.yml` file. -| Variable | Description | -| --------------------- | -------------------------- | -| `dbt_ml:audit_schema` | Schema of the audit table. | -| `dbt_ml:audit_table` | Name of the audit table. | +| Variable | Description | +| --------------------- | --------------------------------- | +| `dbt_ml:database` | Name of the GCP Project to use. | +| `dbt_ml:audit_schema` | Schema of the audit table. | +| `dbt_ml:audit_table` | Name of the audit table. | You will also need to specify the post-hook in your `dbt_project.yml` file[1] as `{{ dbt_ml.model_audit() }}`. Optionally, you can use the `dbt_ml.create_model_audit_table()` macro to create the audit table automatically if it does not exist - for example in an on-run-start hook. From ef16208f4e13e3ebc24b807e9c9cb36cd9d50d43 Mon Sep 17 00:00:00 2001 From: Ben Liyanage Date: Wed, 14 Dec 2022 15:18:33 -0800 Subject: [PATCH 3/7] update readme again --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4be1366..d312126 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,11 @@ To install the package add the package path to the `packages.yml` file in your d In order to use the model audit post-hook the following variables have to be set in your `dbt_project.yml` file. -| Variable | Description | -| --------------------- | --------------------------------- | -| `dbt_ml:database` | Name of the GCP Project to use. | -| `dbt_ml:audit_schema` | Schema of the audit table. | -| `dbt_ml:audit_table` | Name of the audit table. | +| Variable | Description | +| ---------------------- | --------------------------------- | +| `dbt_ml:audit_database`| Name of the GCP Project to use. | +| `dbt_ml:audit_schema` | Schema of the audit table. | +| `dbt_ml:audit_table` | Name of the audit table. | You will also need to specify the post-hook in your `dbt_project.yml` file[1] as `{{ dbt_ml.model_audit() }}`. Optionally, you can use the `dbt_ml.create_model_audit_table()` macro to create the audit table automatically if it does not exist - for example in an on-run-start hook. From 5f622d934a1b964d292262344345ebf0902207e1 Mon Sep 17 00:00:00 2001 From: Ben Liyanage Date: Wed, 14 Dec 2022 15:30:36 -0800 Subject: [PATCH 4/7] bugfix, and add single startup macro --- macros/hooks/model_audit.sql | 2 -- macros/hooks/startup.sql | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 macros/hooks/startup.sql diff --git a/macros/hooks/model_audit.sql b/macros/hooks/model_audit.sql index fbb0139..d518b48 100644 --- a/macros/hooks/model_audit.sql +++ b/macros/hooks/model_audit.sql @@ -12,8 +12,6 @@ {% endmacro %} -{% endmacro %} - {% macro _audit_insert_templates() %} {%- set schemas -%} diff --git a/macros/hooks/startup.sql b/macros/hooks/startup.sql new file mode 100644 index 0000000..8a3efda --- /dev/null +++ b/macros/hooks/startup.sql @@ -0,0 +1,6 @@ +{% macro _startup() %} + +{% do adapter.create_schema(api.Relation.create(var('dbt_ml:audit_database'), var('dbt_ml:audit_schema'))) %} +{{ dbt_ml.create_model_audit_table() }} + +{% endmacro %} From f9d149a36ca6bf0fe871ef341cc387e90a6fd324 Mon Sep 17 00:00:00 2001 From: Ben Liyanage Date: Wed, 14 Dec 2022 15:32:15 -0800 Subject: [PATCH 5/7] remove _ --- macros/hooks/startup.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/hooks/startup.sql b/macros/hooks/startup.sql index 8a3efda..bd25d96 100644 --- a/macros/hooks/startup.sql +++ b/macros/hooks/startup.sql @@ -1,4 +1,4 @@ -{% macro _startup() %} +{% macro startup() %} {% do adapter.create_schema(api.Relation.create(var('dbt_ml:audit_database'), var('dbt_ml:audit_schema'))) %} {{ dbt_ml.create_model_audit_table() }} From c1b4f2416ccfc1de3f61db9288881a0d49cecd6f Mon Sep 17 00:00:00 2001 From: Ben Liyanage Date: Wed, 14 Dec 2022 15:37:12 -0800 Subject: [PATCH 6/7] remove startup function --- macros/hooks/startup.sql | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 macros/hooks/startup.sql diff --git a/macros/hooks/startup.sql b/macros/hooks/startup.sql deleted file mode 100644 index bd25d96..0000000 --- a/macros/hooks/startup.sql +++ /dev/null @@ -1,6 +0,0 @@ -{% macro startup() %} - -{% do adapter.create_schema(api.Relation.create(var('dbt_ml:audit_database'), var('dbt_ml:audit_schema'))) %} -{{ dbt_ml.create_model_audit_table() }} - -{% endmacro %} From 69ce0c574cffce3eb8588f6214250f0745a1c538 Mon Sep 17 00:00:00 2001 From: Rasmus Bjerrum Date: Wed, 15 Feb 2023 11:51:31 +0100 Subject: [PATCH 7/7] Add dimension to feature info; remove labels from CREATE statement. --- macros/hooks/model_audit.sql | 2 +- macros/materializations/model.sql | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/macros/hooks/model_audit.sql b/macros/hooks/model_audit.sql index 74b4b36..d35fe2e 100644 --- a/macros/hooks/model_audit.sql +++ b/macros/hooks/model_audit.sql @@ -5,7 +5,7 @@ 'schema': 'string', 'created_at': type_timestamp(), 'training_info': 'array>>>', - 'feature_info': 'array>', + 'feature_info': 'array>', 'weights': 'array>>>', 'evaluate': 'array>', }) %} diff --git a/macros/materializations/model.sql b/macros/materializations/model.sql index 529692e..354a50d 100644 --- a/macros/materializations/model.sql +++ b/macros/materializations/model.sql @@ -19,18 +19,11 @@ {% endmacro %} {% macro model_options(ml_config, labels) %} - {%- if labels -%} - {%- set label_list = [] -%} - {%- for label, value in labels.items() -%} - {%- do label_list.append((label, value)) -%} - {%- endfor -%} - {%- do ml_config.update({'labels': label_list}) -%} - {%- endif -%} {% set options -%} options( {%- for opt_key, opt_val in ml_config.items() -%} - {%- if opt_val is sequence and not (opt_val | first) is number and (opt_val | first).startswith('hparam_') -%} + {%- if opt_val is sequence and (opt_val | first) is string and (opt_val | first).startswith('hparam_') -%} {{ opt_key }}={{ opt_val[0] }}({{ opt_val[1:] | join(', ') }}) {%- else -%} {{ opt_key }}={{ (opt_val | tojson) if opt_val is string else opt_val }}