Skip to content

Commit

Permalink
feat: Add prefix/suffix and convert column names to snake case option
Browse files Browse the repository at this point in the history
  • Loading branch information
H-Max committed Sep 29, 2023
1 parent a27dbb3 commit 682a12b
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 3 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ There is a single option (for now), to output the data types in lowercase in the
# TODO

- Error handling
- Unit testing
- Merging with existing yaml definition files
- Generate the files for a complete dataset rather than a single table
- Option to output to stdout
Expand Down
33 changes: 30 additions & 3 deletions bq2dbt/bq2dbt.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import argparse
import os
import logging
import re

from google.cloud import bigquery

Expand All @@ -18,15 +19,34 @@
)

logger = logging.getLogger(__name__)
case_convert_regex = re.compile(r'(?<!^)(?=[A-Z])')


def convert_to_snake_case(input_string: str) -> str:
"""
Converts a string from CamelCase to snake_case.
Args:
input_string (str): The CamelCase string to be converted.
Returns:
str: The string converted to snake_case.
"""
return case_convert_regex.sub('_', input_string).lower()


def bq2dbt():
parser = argparse.ArgumentParser(description="Generate YAML and SQL output for a BigQuery table.")
parser.add_argument("table_id", help="Complete BigQuery table ID (project.dataset.table)")
parser.add_argument("-l", "--lower", action="store_true", help="Lowercase type names in YAML file")
parser.add_argument("--snake", action="store_true", help="Convert field names to snake_case")
parser.add_argument("--prefix", help="Prefix to add to columns names", default=None)
parser.add_argument("--suffix", help="Suffix to add to column names", default=None)
args = parser.parse_args()

project_id, dataset_id, table_name = args.table_id.split(".")
prefix = args.prefix
suffix = args.suffix

logger.info(f"Starting generation of YAML and SQL for table {args.table_id}...")

Expand Down Expand Up @@ -91,8 +111,12 @@ def bq2dbt():
# Iterate through the query results and add them to the YAML data
for field in fields:
data_type = field.data_type.split('<')[0]

destination = convert_to_snake_case(field.field_path) if args.snake else field.field_path
destination = "_".join(filter(None, [prefix, destination, suffix]))

field_info = {
"name": field.field_path,
"name": destination,
"data_type": data_type.lower() if args.lower else data_type,
"description": field.description
}
Expand All @@ -105,7 +129,10 @@ def bq2dbt():

yaml_data["models"][0]["columns"].append(field_info)
if '.' not in field.field_path:
sql_columns.append(f"`{field.field_path}`")
if destination != field.field_path:
sql_columns.append(f"`{field.field_path}` AS `{destination}`")
else:
sql_columns.append(f"`{field.field_path}`")

# Generate the YAML output
yaml_output = yaml.dump(yaml_data, default_flow_style=False, sort_keys=False)
Expand All @@ -114,7 +141,7 @@ def bq2dbt():
sql_columns_statement = "\n , ".join(sql_columns)
sql_output = f"""
SELECT
{sql_columns_statement}
{sql_columns_statement}
FROM
`{project_id}.{dataset_id}.{table_name}` -- Don't leave this in your DBT Statement
"""
Expand Down

0 comments on commit 682a12b

Please sign in to comment.