bqbooster · Kayrnt · Jul 30, 2024 · May 29, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/.changes/v0.5.0.md b/.changes/v0.5.0.md
@@ -0,0 +1,10 @@
+## dbt-bigquery-monitoring v0.5.0 - June 05, 2024
+
+### Features
+
+- Add new models for storage monitoring ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0))
+- Add SQLMesh support ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0))
+
+### Contributors
+- [@Kayrnt](https://github.com/Kayrnt) ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0), [#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0))
+
diff --git a/.changes/v0.5.1.md b/.changes/v0.5.1.md
@@ -0,0 +1,9 @@
+## dbt-bigquery-monitoring v0.5.1 - July 30, 2024
+
+### Fixes
+
+- Fix YML model name misaligned with information schema dbt models ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0))
+
+### Contributors
+- [@Kayrnt](https://github.com/Kayrnt) ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0))
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html),
 and is generated by [Changie](https://github.com/miniscruff/changie).
 
+## dbt-bigquery-monitoring v0.5.0 - June 05, 2024
+
+### Features
+
+- Add new models for storage monitoring ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0))
+- Add SQLMesh support ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0))
+
+### Contributors
+- [@Kayrnt](https://github.com/Kayrnt) ([#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0), [#0](https://github.com/bqbooster/dbt-bigquery-monitoring/issues/0))
+
+
 ## dbt-bigquery-monitoring v0.4.0 - May 18, 2024
 
 ### Features

diff --git a/README.md b/README.md
@@ -38,7 +38,7 @@ Add the following to your `packages.yml` file:
 ```yml
 packages:
   - package: bqbooster/dbt_bigquery_monitoring
-    version: 0.4.0
+    version: 0.5.0
 ```
 
 ### Configure the package

diff --git a/config.py b/config.py
@@ -0,0 +1,10 @@
+# This file is used to load the sqlmesh configuration
+from pathlib import Path
+
+from sqlmesh.core.config import DuckDBConnectionConfig
+from sqlmesh.dbt.loader import sqlmesh_config
+
+config = sqlmesh_config(
+    Path(__file__).parent,
+    state_connection=DuckDBConnectionConfig(),
+)
diff --git a/dbt_project.yml b/dbt_project.yml
@@ -1,5 +1,5 @@
 name: "dbt_bigquery_monitoring"
-version: "0.3.0"
+version: "0.5.1"
 require-dbt-version: [">=1.3.0", "<2.0.0"]
 config-version: 2
 
@@ -12,6 +12,7 @@ clean-targets:
   - dbt_packages
 
 models:
+  +start: Jan 1 2017
   dbt_bigquery_monitoring:
     +tags:
       - "dbt-bigquery-monitoring"
@@ -38,6 +39,7 @@ vars:
   # Capacity compute (analysis) pricing
   hourly_slot_price: "{{ env_var('DBT_BQ_MONITORING_HOURLY_SLOT_PRICE', 0.04) }}"
   # https://cloud.google.com/bigquery/pricing#storage
+  prefer_physical_pricing_model: "{{ env_var('DBT_BQ_MONITORING_PREFER_PHYSICAL_PRICING_MODEL', true) }}"
   active_logical_storage_gb_price: "{{ env_var('DBT_BQ_MONITORING_ACTIVE_LOGICAL_STORAGE_GB_PRICE', 0.02) }}"
   long_term_logical_storage_gb_price: "{{ env_var('DBT_BQ_MONITORING_LONG_TERM_LOGICAL_STORAGE_GB_PRICE', 0.01) }}"
   active_physical_storage_gb_price: "{{ env_var('DBT_BQ_MONITORING_ACTIVE_PHYSICAL_STORAGE_GB_PRICE', 0.04) }}"

diff --git a/documentation_parser/Makefile b/documentation_parser/Makefile
@@ -14,3 +14,8 @@ setup_environment:
 
 run:
 	python documentation_parser.py
+
+#test using pytest
+test:
+	pytest test_documentation_parser.py
+
diff --git a/documentation_parser/config.py b/documentation_parser/config.py
@@ -60,34 +60,39 @@
     "jobs": {
         "dir": "jobs",
         "url": "https://cloud.google.com/bigquery/docs/information-schema-jobs",
+        "exclude_columns": ["query_info.resource_warning", "query_info.query_hashes.normalized_literals", "query_info.performance_insights", "query_info.optimization_details", "folder_numbers"],
     },
     "jobs_by_project": {
         "dir": "jobs",
         "url": "https://cloud.google.com/bigquery/docs/information-schema-jobs",
         "override_table_name": "JOBS_BY_PROJECT",
+        "exclude_columns": ["query_info.resource_warning", "query_info.query_hashes.normalized_literals", "query_info.performance_insights", "query_info.optimization_details", "folder_numbers"],
     },
     "jobs_by_user": {
         "dir": "jobs",
         "url": "https://cloud.google.com/bigquery/docs/information-schema-jobs-by-user",
+        "exclude_columns": ["query_info.resource_warning", "query_info.query_hashes.normalized_literals", "query_info.performance_insights", "query_info.optimization_details", "folder_numbers"],
     },
     "jobs_by_folder": {
         "dir": "jobs",
         "url": "https://cloud.google.com/bigquery/docs/information-schema-jobs-by-folder",
+        "exclude_columns": ["query_info.resource_warning", "query_info.query_hashes.normalized_literals", "query_info.performance_insights", "query_info.optimization_details", "folder_numbers"],
     },
     "jobs_by_organization": {
         "dir": "jobs",
         "url": "https://cloud.google.com/bigquery/docs/information-schema-jobs-by-organization",
+        "exclude_columns": ["query_info.resource_warning", "query_info.query_hashes.normalized_literals", "query_info.performance_insights", "query_info.optimization_details", "folder_numbers"],
     },
     # jobs timeline
     "jobs_timeline": {
         "dir": "jobs_timeline",
         "url": "https://cloud.google.com/bigquery/docs/information-schema-jobs-timeline",
-        "exclude_columns": ["folder_numbers"],
+        "exclude_columns": ["query_info.resource_warning", "query_info.query_hashes.normalized_literals", "query_info.performance_insights", "query_info.optimization_details", "folder_numbers"],
     },
     "jobs_timeline_by_user": {
         "dir": "jobs_timeline",
         "url": "https://cloud.google.com/bigquery/docs/information-schema-jobs-timeline-by-user",
-        "exclude_columns": ["folder_numbers"],
+        "exclude_columns": ["query_info.resource_warning", "query_info.query_hashes.normalized_literals", "query_info.performance_insights", "query_info.optimization_details", "folder_numbers"],
     },
     "jobs_timeline_by_folder": {
         "dir": "jobs_timeline",

diff --git a/documentation_parser/documentation_parser.py b/documentation_parser/documentation_parser.py
@@ -83,6 +83,42 @@ def update_column_list(input_columns: List[dict], exclude_columns: List[str]):
 
     return columns
 
+def generate_sql(url: str, column_names: List[str], table_name: str, required_role_str: str):
+    # Prepare the column names as a comma-separated string
+    columns_str = ", ".join(column_names)
+
+    # Prepare the base SQL string for the project list
+    project_sql = f"""
+    SELECT {columns_str}
+    FROM `{{{{ project | trim }}}}`.`region-{{{{ var('bq_region') }}}}`.`INFORMATION_SCHEMA`.`{table_name}`
+    """
+
+    # Prepare the base SQL string for when there's no project list
+    no_project_sql = f"""
+    SELECT {columns_str}
+    FROM `region-{{{{ var('bq_region') }}}}`.`INFORMATION_SCHEMA`.`{table_name}`
+    """
+
+    # Combine everything into the final SQL string
+    sql = f"""
+    {{# More details about base table in {url} -#}}
+    {required_role_str}
+    WITH base AS (
+    {{% if project_list()|length > 0 -%}}
+        {{% for project in project_list() -%}}
+        {project_sql}
+        {{% if not loop.last %}}UNION ALL{{% endif %}}
+        {{% endfor %}}
+    {{%- else %}}
+        {no_project_sql}
+    {{%- endif %}}
+    )
+    SELECT
+    {columns_str},
+    FROM
+    base
+    """
+    return sql
 
 def generate_files(
     filename: str, dir: str, url: str, exclude_columns: List[str], override_table_name: str = None
@@ -133,7 +169,7 @@ def generate_files(
     # Update the column list
     columns = update_column_list(columns, exclude_columns)
 
-    base_filename = f"{dir}/information_schema_{filename}"
+    base_filename = f"output/{dir}/information_schema_{filename}"
 
     # Create the YML file
     filename_yml = f"{base_filename}.yml"
@@ -149,7 +185,7 @@ def generate_files(
             "version": 2,
             "models": [
                 {
-                    "name": table_name.lower(),
+                    "name": "information_schema_" + table_name.lower(),
                     "description": "dataset details with related information",
                     "columns": [
                         {
@@ -179,30 +215,8 @@ def generate_files(
 
     # Create the SQL file
     with open(filename_sql, "w") as f:
-        f.write(
-            f"{{# More details about base table in {url} -#}}\n"
-            + required_role_str
-            + "WITH base AS (\n"
-            + "{% if project_list()|length > 0 -%}\n"
-            + "  {% for project in project_list() -%}\n"
-            + "  SELECT * FROM `{{{{ project | trim }}}}`.`region-{{{{ var('bq_region') }}}}`.`INFORMATION_SCHEMA`.`{table_name}`\n".format(
-                table_name=table_name
-            )
-            + "  {% if not loop.last %}UNION ALL{% endif %}\n"
-            + "  {% endfor %}\n"
-            + "{%- else %}\n"
-            + "  SELECT * FROM `region-{{{{ var('bq_region') }}}}`.`INFORMATION_SCHEMA`.`{table_name}`\n".format(
-                table_name=table_name
-            )
-            + "{%- endif %}\n"
-            + ")\n"
-            + "SELECT\n"
-        )
-
         column_names = [column["name"].lower() for column in columns]
-        f.write(",\n".join(column_names) + ",\n")
-
-        f.write("FROM\n" + "  base\n")
+        f.write(generate_sql(url, column_names, table_name, required_role_str))
 
     print(f"Files '{filename_sql}' and '{filename_yml}' have been created.")
 

diff --git a/documentation_parser/test_documentation_parser.py b/documentation_parser/test_documentation_parser.py
@@ -1,6 +1,6 @@
 import pytest
 from bs4 import BeautifulSoup
-from documentation_parser import parse_required_role, parse_table_name, update_column_list
+from documentation_parser import parse_required_role, parse_table_name, update_column_list, generate_sql
 
 def test_parse_required_role():
     # Test case: Required role is present
@@ -97,3 +97,9 @@ def test_update_column_list():
         {'name': 'column3', 'type': 'RECORD', 'description': 'column3.subcolumn1 : Subcolumn 1 of Column 3\ncolumn3.subcolumn2 : Subcolumn 2 of Column 3'},
     ]
     assert result == expected_columns
+
+def test_generate_sql():
+    # Test generate_sql function
+    result = generate_sql("https://cloud.google.com/bigquery/docs/information-schema-jobs", ["field1", "field2", "field3"], "jobs", "jobs.admin")
+    expected = "\n    {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-jobs -#}\n    jobs.admin\n    WITH base AS (\n    {% if project_list()|length > 0 -%}\n        {% for project in project_list() -%}\n        \n    SELECT field1, field2, field3\n    FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`jobs`\n    \n        {% if not loop.last %}UNION ALL{% endif %}\n        {% endfor %}\n    {%- else %}\n        \n    SELECT field1, field2, field3\n    FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`jobs`\n    \n    {%- endif %}\n    )\n    SELECT\n    field1, field2, field3,\n    FROM\n    base\n    "
+    assert result == expected
diff --git a/models/base/google/access_control/information_schema_object_privileges.sql b/models/base/google/access_control/information_schema_object_privileges.sql
@@ -1,25 +1,30 @@
-{# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-object-privileges -#}
-{# Required role/permissions: To query the INFORMATION_SCHEMA.OBJECT_PRIVILEGES view, you need following
+
+    {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-object-privileges -#}
+    {# Required role/permissions: To query the INFORMATION_SCHEMA.OBJECT_PRIVILEGES view, you need following
 Identity and Access Management (IAM) permissions:
 bigquery.datasets.get for datasets.
 bigquery.tables.getIamPolicy for tables and views.
 For more information about BigQuery permissions, see
 Access control with IAM. -#}
-WITH base AS (
-{% if project_list()|length > 0 -%}
-  {% for project in project_list() -%}
-  SELECT * FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`OBJECT_PRIVILEGES`
-  {% if not loop.last %}UNION ALL{% endif %}
-  {% endfor %}
-{%- else %}
-  SELECT * FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`OBJECT_PRIVILEGES`
-{%- endif %}
-)
-SELECT
-object_catalog,
-object_schema,
-object_name,
-object_type,
-grantee,
-FROM
-  base
+
+    WITH base AS (
+    {% if project_list()|length > 0 -%}
+        {% for project in project_list() -%}
+
+    SELECT object_catalog, object_schema, object_name, object_type, privilege_type, grantee
+    FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`OBJECT_PRIVILEGES`
+
+        {% if not loop.last %}UNION ALL{% endif %}
+        {% endfor %}
+    {%- else %}
+
+    SELECT object_catalog, object_schema, object_name, object_type, privilege_type, grantee
+    FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`OBJECT_PRIVILEGES`
+
+    {%- endif %}
+    )
+    SELECT
+    object_catalog, object_schema, object_name, object_type, privilege_type, grantee,
+    FROM
+    base
+
diff --git a/models/base/google/access_control/information_schema_object_privileges.yml b/models/base/google/access_control/information_schema_object_privileges.yml
@@ -13,9 +13,12 @@ models:
   - description: The resource type, such as SCHEMA (dataset), TABLE, VIEW, and EXTERNAL.
     name: OBJECT_TYPE
     type: STRING
+  - description: The role ID, such as roles/bigquery.dataEditor.
+    name: PRIVILEGE_TYPE
+    type: STRING
   - description: The user type and user that the role is granted to.
     name: GRANTEE
     type: STRING
   description: dataset details with related information
-  name: object_privileges
+  name: information_schema_object_privileges
 version: 2
diff --git a/models/base/google/bi_engine/information_schema_bi_capacities.sql b/models/base/google/bi_engine/information_schema_bi_capacities.sql
@@ -1,19 +1,24 @@
-{# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-bi-capacities -#}
-WITH base AS (
-{% if project_list()|length > 0 -%}
-  {% for project in project_list() -%}
-  SELECT * FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`BI_CAPACITIES`
-  {% if not loop.last %}UNION ALL{% endif %}
-  {% endfor %}
-{%- else %}
-  SELECT * FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`BI_CAPACITIES`
-{%- endif %}
-)
-SELECT
-project_id,
-project_number,
-bi_capacity_name,
-size,
-preferred_tables,
-FROM
-  base
+
+    {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-bi-capacities -#}
+
+    WITH base AS (
+    {% if project_list()|length > 0 -%}
+        {% for project in project_list() -%}
+
+    SELECT project_id, project_number, bi_capacity_name, size, preferred_tables
+    FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`BI_CAPACITIES`
+
+        {% if not loop.last %}UNION ALL{% endif %}
+        {% endfor %}
+    {%- else %}
+
+    SELECT project_id, project_number, bi_capacity_name, size, preferred_tables
+    FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`BI_CAPACITIES`
+
+    {%- endif %}
+    )
+    SELECT
+    project_id, project_number, bi_capacity_name, size, preferred_tables,
+    FROM
+    base
+
diff --git a/models/base/google/bi_engine/information_schema_bi_capacities.yml b/models/base/google/bi_engine/information_schema_bi_capacities.yml
@@ -20,5 +20,5 @@ models:
     name: preferred_tables
     type: REPEATED STRING
   description: dataset details with related information
-  name: bi_capacities
+  name: information_schema_bi_capacities
 version: 2