Skip to content

Commit

Permalink
Merge pull request #33 from adswerve/hotfix/issue32
Browse files Browse the repository at this point in the history
Issue32 fix
  • Loading branch information
Ruslan Bergenov authored Apr 8, 2022
2 parents 29128d3 + 243e4a7 commit 9490fec
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 8 deletions.
32 changes: 24 additions & 8 deletions target_bigquery/simplify_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ def is_iterable(schema):
"""

return not _is_ref(schema) \
and ARRAY in get_type(schema) \
and 'items' in schema
and ARRAY in get_type(schema)
# and 'items' in schema # commented out to allow "members": {"type": "array"}


def is_nullable(schema):
Expand All @@ -217,21 +217,31 @@ def is_literal(schema):
def is_datetime(schema):
"""
Given a JSON Schema compatible dict, returns True when schema's type allows being a date-time
Two cases make a datetime type:
a) string in type and format date-time (this is per JSON schema standards)
b) date-time is in type (this is for simplicity)
:param schema: dict, JSON Schema
:return: Boolean
"""

return STRING in get_type(schema) and schema.get('format') == DATE_TIME_FORMAT
return \
(STRING in get_type(schema) and schema.get('format') == DATE_TIME_FORMAT) \
or (DATE_TIME_FORMAT in get_type(schema) and schema.get('format') is None)


def is_date(schema):
"""
Given a JSON Schema compatible dict, returns True when schema's type allows being a date-time
Given a JSON Schema compatible dict, returns True when schema's type allows being a date
Two cases make a date type:
a) string in type and format date (this is per JSON schema standards)
b) date is in type (this is for simplicity)
:param schema: dict, JSON Schema
:return: Boolean
"""

return STRING in get_type(schema) and schema.get('format') == DATE_FORMAT
return \
(STRING in get_type(schema) and schema.get('format') == DATE_FORMAT) \
or (DATE_FORMAT in get_type(schema) and schema.get('format') is None)


def is_bq_geography(schema):
Expand Down Expand Up @@ -417,15 +427,21 @@ def _simplify__implicit_anyof(root_schema, schema):
'format': DATE_TIME_FORMAT
}))

types.remove(STRING)
if DATE_TIME_FORMAT in types:
types.remove(DATE_TIME_FORMAT)
else:
types.remove(STRING)

if is_date(schema):
schemas.append(Cachable({
'type': [STRING],
'format': DATE_FORMAT
}))

types.remove(STRING)
if DATE_FORMAT in types:
types.remove(DATE_FORMAT)
else:
types.remove(STRING)

if is_bq_geography(schema):
schemas.append(Cachable({
Expand Down Expand Up @@ -485,7 +501,7 @@ def _simplify__implicit_anyof(root_schema, schema):
if is_iterable(schema):
schemas.append({
'type': [ARRAY],
'items': _helper_simplify(root_schema, schema.get('items', {}))
'items': _helper_simplify(root_schema, schema.get('items', {"type": STRING}))
})

types.remove(ARRAY)
Expand Down
53 changes: 53 additions & 0 deletions tests/test_schema_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,59 @@ class TestSchemaConversion(unittestcore.BaseUnitTest):
def setUp(self):
super(TestSchemaConversion, self).setUp()

def test_flat_simplify_and_build(self):
schema = {
"properties": {
"new_status": {
"type": ["string", "null"]
},
"previous_status": {
"type": ["number", "null"]
},
"new_assignee": {
"type": ["integer", "null"]
},
"previous_assignee": {
"type": ["boolean", "null"]
},
"new_due_date": {
"type": ["date", "null"]
},
"previous_due_date": {
"type": ["date-time", "null"]
},
"members": {
"type": "array" # shorted array definition, by default we treat this as array of strings
}
}
}

schema_simplified = simplify(schema)
schema_bq = build_schema(schema_simplified, key_properties={}, add_metadata=False)

for f in schema_bq:
if f.name == "new_status":
self.assertEqual(f.field_type.upper(), "STRING")

elif f.name == "previous_status":
self.assertEqual(f.field_type.upper(), "FLOAT")

elif f.name == "new_assignee":
self.assertEqual(f.field_type.upper(), "INTEGER")

elif f.name == "previous_assignee":
self.assertEqual(f.field_type.upper(), "BOOLEAN")

elif f.name == "new_due_date":
self.assertEqual(f.field_type.upper(), "DATE")

elif f.name == "previous_due_date":
self.assertEqual(f.field_type.upper(), "TIMESTAMP")

elif f.name == "members":
self.assertEqual(f.field_type.upper(), "STRING")
self.assertEqual(f.mode, "REPEATED")

def test_flat_schema(self):

schema_0_input = schema_simple_1
Expand Down

0 comments on commit 9490fec

Please sign in to comment.