From 86fbe0cedbdae8c6fa75c31c81ac8bec56d8edc7 Mon Sep 17 00:00:00 2001
From: mirnawong1 <mirna.wong@fishtownanalytics.com>
Date: Fri, 21 Jun 2024 12:59:07 +0100
Subject: [PATCH] add

---
 website/docs/reference/about-resources.md     |  43 +++--
 website/scripts/update_node_resource_table.py | 169 ++++++++----------
 2 files changed, 91 insertions(+), 121 deletions(-)
 mode change 100644 => 100755 website/scripts/update_node_resource_table.py

diff --git a/website/docs/reference/about-resources.md b/website/docs/reference/about-resources.md
index d1eeb74c482..8b7a020ccfa 100644
--- a/website/docs/reference/about-resources.md
+++ b/website/docs/reference/about-resources.md
@@ -9,28 +9,26 @@ dbt supports different types of resource types. Each one has a set of supported
 
 ## Resource types
 
-
-The following tables describes each resource type, its identifier, and a brief description of its purpose. 
-
-
-| Resource type     | Identifier         | Description |
-|-------------------|--------------------|-------------|
-| Analysis          | `analysis`         | Similar to models, but usually used for exploratory data analysis and not directly integrated into the dbt DAG (Directed Acyclic Graph). |
-| Documentation     | `doc`              | Used for documenting the project, including descriptions of models, columns, and more. |
-| Exposure          | `exposure`         | Document downstream use of dbt models in dashboards, reports, or analysis. |
-| Group             | `group`            | Used for grouping and organizing other dbt resources, potentially for managing permissions or categorization. |
-| Macro             | `macro`            | Reusable code snippets in Jinja, allowing for custom logic and SQL reuse. |
-| Metric            | `metric`           | Define business metrics using a standardized syntax, which can be used across models and analyses. |
-| Model             | `model`            | Central to dbt projects, models are SQL files that define transformations and datasets. These are typically the primary analytical outputs. |
-| Operation         | `operation`        | Custom operations that can be run from the dbt command line, often for administrative or maintenance tasks. |
-| RPC Call   | `rpc`    | Deprecated in dbt v1.6. Run a`rpc` to execute dbt commands over a network. Deprecated in dbt v1.6. |
-| Saved Query       | `saved_query`      | Represent saved SQL queries, potentially for reuse or tracking purposes. |
-| Seed              | `seed`             | CSV files that are loaded into the database as static tables, useful for small reference data. |
-| Semantic model    | `semantic_model`   | A higher-level abstraction of data models, focusing on the semantic layer or business logic representation. |
-| Snapshot          | `snapshot`         | Capture data at a specific point in time to track changes, useful for historical reporting and auditing. |
-| Source            | `source`           | Represents raw data sources, helping to define and document the initial state of data that dbt interacts with. |
-| SQL operation     | `sql_operation`    | Custom SQL operations that can be executed within dbt workflows. |
-| Test              | `test`             | Define tests to ensure data quality and integrity, such as uniqueness, not-null constraints, and custom data validation. |
+| Resource type | Executable | Refable | Versioned | Description |
+|---------------|------------|---------|-----------| ----------- |
+| `analysis` | ✅ | ❌ | ❌ |  Similar to models, but usually used for exploratory data analysis and not directly integrated into the dbt DAG (Directed Acyclic Graph). |
+| `doc` | ✅ | ❌ | ❌ | Used for documenting the project, including descriptions of models, columns, and more. |
+| `exposure` | ❌ | ❌ | ❌ | Document downstream use of dbt models in dashboards, reports, or analysis. |
+| `fixture` | ❌ | ❌ | ❌ |
+| `group` | ❌ | ❌ | ❌ | Used for grouping and organizing other dbt resources, potentially for managing permissions or categorization. |
+| `macro` | ✅ | ❌ | ❌ | Reusable code snippets in Jinja, allowing for custom logic and SQL reuse. |
+| `metric` | ❌ | ❌ | ❌ | Define business metrics using a standardized syntax, which can be used across models and analyses. |
+| `model` | ✅ | ✅ | ✅ |  Central to dbt projects, models are SQL files that define transformations and datasets. These are typically the primary analytical outputs. |
+| `operation` | ✅ | ❌ | ❌ | Custom operations that can be run from the dbt command line, often for administrative or maintenance tasks. |
+| `rpc` | ✅ | ❌ | ❌ | Deprecated in dbt v1.6. Run a`rpc` to execute dbt commands over a network. Deprecated in dbt v1.6. |
+| `saved_query` | ❌ | ❌ | ❌ | Represent saved SQL queries, potentially for reuse or tracking purposes. |
+| `seed` | ✅ | ✅ | ❌ |  CSV files that are loaded into the database as static tables, useful for small reference data. |
+| `semantic_model` | ❌ | ❌ | ❌ |  A higher-level abstraction of data models, focusing on the semantic layer or business logic representation. |
+| `snapshot` | ✅ | ✅ | ❌ | Capture data at a specific point in time to track changes, useful for historical reporting and auditing. |
+| `source` | ❌ | ❌ | ❌ | Represents raw data sources, helping to define and document the initial state of data that dbt interacts with. |
+| `sql_operation` | ✅ | ❌ | ❌ | Custom SQL operations that can be executed within dbt workflows. |
+| `test` | ✅ | ❌ | ❌ | Define tests to ensure data quality and integrity, such as uniqueness, not-null constraints, and custom data validation. |
+| `unit_test` | ❌ | ❌ | ❌ | Define unit tests to test your projects during development, before pushing to production. |
 
 ## Supported properties and configs table
 
@@ -64,4 +62,3 @@ Properties or configurations support different resource types and are applied in
 ## Related docs
 - [About resource paths](/reference/resource-configs/resource-path)
 - [About configs and properties](/reference/configs-and-properties)
-
diff --git a/website/scripts/update_node_resource_table.py b/website/scripts/update_node_resource_table.py
old mode 100644
new mode 100755
index 47b402e6a4b..15878509f07
--- a/website/scripts/update_node_resource_table.py
+++ b/website/scripts/update_node_resource_table.py
@@ -1,112 +1,85 @@
 import requests
-import ast
+import re
 
-# URLs of the Python files in the dbt core repository
-resources_url = "https://raw.githubusercontent.com/dbt-labs/dbt-core/main/core/dbt/artifacts/resources/types.py"
+# URLs of the source files
 node_types_url = "https://raw.githubusercontent.com/dbt-labs/dbt-core/main/core/dbt/node_types.py"
+resource_types_url = "https://raw.githubusercontent.com/dbt-labs/dbt-core/main/core/dbt/artifacts/resources/types.py"
 
-# Fetch the content of the files
-resources_response = requests.get(resources_url)
-node_types_response = requests.get(node_types_url)
+# Function to fetch and parse the Python files
+def fetch_and_parse(url):
+    response = requests.get(url)
+    response.raise_for_status()
+    return response.text
 
-# Parse the content of the files
-resources_tree = ast.parse(resources_response.text)
-node_types_tree = ast.parse(node_types_response.text)
+# Fetch the content of the Python files
+node_types_content = fetch_and_parse(node_types_url)
+resource_types_content = fetch_and_parse(resource_types_url)
 
-def extract_resource_types(tree):
-    resource_types = {}
-    for node in tree.body:
-        if isinstance(node, ast.ClassDef) and node.name == "NodeType":
-            for item in node.body:
-                if isinstance(item, ast.Assign):
-                    for target in item.targets:
-                        if isinstance(target, ast.Name) and isinstance(item.value, ast.Constant):
-                            resource_types[target.id.lower()] = item.value.value.lower()
-    return resource_types
-
-def extract_node_type_lists(tree):
-    executable_nodes = []
-    refable_nodes = []
-    versioned_nodes = []
-    for node in tree.body:
-        if isinstance(node, ast.Assign):
-            for target in node.targets:
-                if isinstance(target, ast.Name):
-                    if target.id == "EXECUTABLE_NODE_TYPES":
-                        executable_nodes = [elt.attr for elt in node.value.elts if isinstance(elt, ast.Attribute)]
-                    elif target.id == "REFABLE_NODE_TYPES":
-                        refable_nodes = [elt.attr for elt in node.value.elts if isinstance(elt, ast.Attribute)]
-                    elif target.id == "VERSIONED_NODE_TYPES":
-                        versioned_nodes = [elt.attr for elt in node.value.elts if isinstance(elt, ast.Attribute)]
-    # Print debug information
-    print("Raw Executable Nodes:", executable_nodes)
-    print("Raw Refable Nodes:", refable_nodes)
-    print("Raw Versioned Nodes:", versioned_nodes)
-    
-    return executable_nodes, refable_nodes, versioned_nodes
-
-# Extract resource types and node types
-resource_types = extract_resource_types(resources_tree)
-executable_nodes, refable_nodes, versioned_nodes = extract_node_type_lists(node_types_tree)
-
-# Create a dictionary for node types
+# Extract resource types from the node_types.py file
+node_types_lines = node_types_content.split('\n')
 node_types = {}
-for node_type in resource_types.values():
-    node_types[node_type] = {
-        "executable": node_type in executable_nodes,
-        "ref": node_type in refable_nodes,
-        "versioned": node_type in versioned_nodes
-    }
-
-# Debugging output
-print("Resource Types:", resource_types)
-print("Executable Nodes:", executable_nodes)
-print("Refable Nodes:", refable_nodes)
-print("Versioned Nodes:", versioned_nodes)
-print("Node Types:", node_types)
-
-# Generate the markdown content
-def generate_markdown(resource_types, node_types):
-    markdown_content = "## Resource types\n\n"
-    markdown_content += "| Node | Executed in DAG | Exists in database | Created by execution | Upstream lineage | Downstream lineage | Versioned/Refable/Executable |\n"
-    markdown_content += "|:----:|:---------------:|:------------------:|:--------------------:|:----------------:|:------------------:|:-----------------------------:|\n"
-
-    for resource, value in resource_types.items():
-        executed = "✅" if node_types.get(value, {}).get('executable', False) else ""
-        exists = "✅" if value in ["source", "snapshot", "model", "seed", "export"] else ""
-        created = "✅" if value in ["snapshot", "model", "seed"] else ""
-        upstream = "←" if value in ["snapshot", "model", "seed", "analysis", "semantic_model", "metric", "saved_query", "exposure", "test", "unit", "fixture"] else ""
-        downstream = "→" if value in ["source", "snapshot", "model", "seed", "semantic_model", "metric", "fixture", "group"] else ""
-        versioned_ref_exec = []
-        if node_types.get(value, {}).get('versioned', False):
-            versioned_ref_exec.append("Versioned")
-        if node_types.get(value, {}).get('ref', False):
-            versioned_ref_exec.append("Refable")
-        if node_types.get(value, {}).get('executable', False):
-            versioned_ref_exec.append("Executable")
-        versioned_ref_exec_str = ", ".join(versioned_ref_exec)
-        markdown_content += f"| {value} | {executed} | {exists} | {created} | {upstream} | {downstream} | {versioned_ref_exec_str} |\n"
-
-    return markdown_content
-
-# Generate the markdown table content
-markdown_table_content = generate_markdown(resource_types, node_types)
-
-# Read the existing content of the markdown file
+for line in node_types_lines:
+    match = re.search(r'(\w+)\s*=\s*"(\w+)"', line)
+    if match:
+        node_types[match.group(2)] = {'executable': False, 'refable': False, 'versioned': False}
+
+# Extract resource types from the resource_types.py file
+resource_types_lines = resource_types_content.split('\n')
+resource_types = {}
+for line in resource_types_lines:
+    match = re.search(r'(\w+)\s*=\s*"(\w+)"', line)
+    if match:
+        resource_type = match.group(2)
+        resource_types[resource_type] = {'executable': False, 'refable': False, 'versioned': False}
+
+# Define the properties for each resource type
+resource_properties = {
+    "model": {"executable": True, "refable": True, "versioned": True},
+    "analysis": {"executable": True, "refable": False, "versioned": False},
+    "test": {"executable": True, "refable": False, "versioned": False},
+    "snapshot": {"executable": True, "refable": True, "versioned": False},
+    "operation": {"executable": True, "refable": False, "versioned": False},
+    "seed": {"executable": True, "refable": True, "versioned": False},
+    "rpc": {"executable": True, "refable": False, "versioned": False},
+    "sql_operation": {"executable": True, "refable": False, "versioned": False},
+    "doc": {"executable": True, "refable": False, "versioned": False},
+    "source": {"executable": False, "refable": False, "versioned": False},
+    "macro": {"executable": True, "refable": False, "versioned": False},
+    "exposure": {"executable": False, "refable": False, "versioned": False},
+    "metric": {"executable": False, "refable": False, "versioned": False},
+    "group": {"executable": False, "refable": False, "versioned": False},
+    "saved_query": {"executable": False, "refable": False, "versioned": False},
+    "semantic_model": {"executable": False, "refable": False, "versioned": False},
+    "unit_test": {"executable": False, "refable": False, "versioned": False},
+    "fixture": {"executable": False, "refable": False, "versioned": False},
+}
+
+# Update the properties of resource types based on the definitions
+for resource_type in resource_types.keys():
+    if resource_type in resource_properties:
+        resource_types[resource_type] = resource_properties[resource_type]
+
+# Sort the resource types alphabetically
+sorted_resource_types = dict(sorted(resource_types.items()))
+
+# Generate the Markdown table
+markdown_table = "| Resource type | Executable | Refable | Versioned |\n"
+markdown_table += "|---------------|------------|---------|-----------|\n"
+for resource_type, properties in sorted_resource_types.items():
+    markdown_table += f"| {resource_type} | {'✅' if properties['executable'] else '❌'} | {'✅' if properties['refable'] else '❌'} | {'✅' if properties['versioned'] else '❌'} |\n"
+
+# Define the file path
 file_path = "../docs/reference/about-resources.md"
+
+# Read the existing content of the file
 with open(file_path, "r") as file:
     content = file.read()
 
-# Insert the markdown table content under the specified header
-header = "## Resource types"
-if header in content:
-    header_index = content.index(header) + len(header)
-    content = content[:header_index] + f"\n\n{markdown_table_content}" + content[header_index:]
-else:
-    content += f"\n{header}\n\n{markdown_table_content}"
+# Replace the content under the "## Resource types" header
+new_content = re.sub(r'(## Resource types\s*\n)(.*?)(\n## )', rf'\1\n{markdown_table}\3', content, flags=re.DOTALL)
 
-# Write the updated content back to the markdown file
+# Write the updated content back to the file
 with open(file_path, "w") as file:
-    file.write(content)
+    file.write(new_content)
 
-print(f"Updated {file_path} with the new resource types table.")
+print(f"The Markdown table has been added to {file_path} under the '## Resource types' header.")