Skip to content

Commit

Permalink
Merge branch 'development-refactor-bigquery-field-name-function' into…
Browse files Browse the repository at this point in the history
… development

# Conflicts:
#	.github/workflows/python-package.yml
  • Loading branch information
RuslanBergenov committed Oct 1, 2021
2 parents 5f6e6f2 + 178247e commit 3fd9fec
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 46 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ env:

on:
push:
branches: [ development-update-requirements, development, master ]
branches: [ development, master ]
pull_request:
branches: [ master ]

Expand Down
61 changes: 19 additions & 42 deletions target_bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ def cleanup_record(schema, record):
elif isinstance(record, dict):
nr = {}
for key, value in record.items():
nkey = bigquery_transformed_key(key)
nkey = create_valid_bigquery_field_name(key)
nr[nkey] = cleanup_record(schema, value)
return nr

else:
raise Exception(f"unhandled instance of record: {record}")


def bigquery_transformed_key(key):
def create_valid_bigquery_field_name(field_name):
"""
Clean up / prettify field names, make sure they match BigQuery naming conventions.
Expand All @@ -56,45 +56,22 @@ def bigquery_transformed_key(key):
:param key: JSON field name
:return: cleaned up JSON field name
"""
remove_list = [" ",
"!",
"\"",
"#",
"$",
"%",
"&",
"'",
"(",
")",
"*",
"+",
",",
"-",
".",
"/",
":",
";",
"<",
"=",
">",
"?",
"@",
"\\",
"]",
"^",
"`",
"|",
"}",
"~"]

for c in remove_list:
key = key.replace(c, "_")

if re.match(r"^\d", key):
key = "_" + key

return key

cleaned_up_field_name = ""

# if char is alphanumeric (either letters or numbers), append char to our string
for char in field_name:
if char.isalnum():
cleaned_up_field_name += char
else:
# otherwise, replace it with underscore
cleaned_up_field_name += "_"

# if field starts with digit, prepend it with underscore
if cleaned_up_field_name[0].isdigit():
cleaned_up_field_name = "_%s" % cleaned_up_field_name

return cleaned_up_field_name[:300] # trim the string to the first x chars

def prioritize_one_data_type_from_multiple_ones_in_any_of(field_property):
"""
Expand Down Expand Up @@ -248,7 +225,7 @@ def build_field(field_name, field_property):
if not ("items" in field_property and "properties" in field_property["items"]) and not (
"properties" in field_property):

return (SchemaField(name=bigquery_transformed_key(field_name),
return (SchemaField(name=create_valid_bigquery_field_name(field_name),
field_type=convert_field_type(field_property),
mode=determine_field_mode(field_name, field_property),
description=None,
Expand All @@ -266,7 +243,7 @@ def build_field(field_name, field_property):
).items():
processed_subfields.append(build_field(subfield_name, subfield_property))

return (SchemaField(name=bigquery_transformed_key(field_name),
return (SchemaField(name=create_valid_bigquery_field_name(field_name),
field_type=convert_field_type(field_property),
mode=determine_field_mode(field_name, field_property),
description=None,
Expand Down
6 changes: 3 additions & 3 deletions target_bigquery/validate_json_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re
import singer
from target_bigquery.schema import bigquery_transformed_key
from target_bigquery.schema import create_valid_bigquery_field_name

LOGGER = singer.get_logger()

Expand Down Expand Up @@ -107,7 +107,7 @@ def build_field_list(schema):
for field_name, field_property in schema.get("properties", schema.get("items", {}).get("properties", {})).items():
if not ("items" in field_property and "properties" in field_property["items"]) \
and not ("properties" in field_property):
key = bigquery_transformed_key(field_name.upper())
key = create_valid_bigquery_field_name(field_name.upper())
if not f_dict.get(key):
f_dict[key] = [field_name]
else:
Expand All @@ -116,7 +116,7 @@ def build_field_list(schema):
elif ("items" in field_property and "properties" in field_property["items"]) \
or ("properties" in field_property):
nd = build_field_list(field_property)
key = bigquery_transformed_key(field_name.upper())
key = create_valid_bigquery_field_name(field_name.upper())
for k, v in nd.items():
if not f_dict.get(f"{key}.{k}"):
f_dict[f"{key}.{k}"] = [f"{field_name}.{i}" for i in v]
Expand Down

0 comments on commit 3fd9fec

Please sign in to comment.