From 946a5a3e7b78f2fb7265f97385d6fe0cd1bbfb4c Mon Sep 17 00:00:00 2001 From: Geoiv Date: Fri, 6 Aug 2021 16:56:19 -0500 Subject: [PATCH] Fixed regex syntax --- CHANGELOG.md | 3 +++ honeycomb/__danger.py | 2 +- honeycomb/hive.py | 4 ++-- honeycomb/orc.py | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b73905..73b897b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `insert_into_orc_table` function can now use overwrite option +### Changed +- Fixed incorrect regex matching syntax in detection of `INSERT OVERWRITE` commands + ## [1.6.3] 2021-06-14 ### Changed diff --git a/honeycomb/__danger.py b/honeycomb/__danger.py index fe1903d..4c4189e 100644 --- a/honeycomb/__danger.py +++ b/honeycomb/__danger.py @@ -44,7 +44,7 @@ def __nuke_partition(table_name, schema, partition_values): '{}=\'{}\''.format(partition_key, partition_value) for partition_key, partition_value in partition_values.items()]) partition_metadata = hive.run_lake_query( - 'DESCRIBE FORMATTED {}.{} PARTITON ({})'.format( + 'DESCRIBE FORMATTED {}.{} PARTITION ({})'.format( schema, table_name, partition_string), engine='hive' ) diff --git a/honeycomb/hive.py b/honeycomb/hive.py index f2d1e73..d5b72ec 100644 --- a/honeycomb/hive.py +++ b/honeycomb/hive.py @@ -47,8 +47,8 @@ def run_lake_query(query, engine='hive', complex_join=False): 'honeycomb for bucket integrity reasons.' ) schema, table_name = re.search( - r'INSERT *OVERWRITE *TABLE *(\w+)\.(\w+)', query, - flags=re.IGNORECASE).groups() + r'INSERT *OVERWRITE *(TABLE)? *(\w+)\.(\w+)', query, + flags=re.IGNORECASE).groups()[1:] _, table_s3_path = get_table_s3_location(table_name, schema) if not _hive_check_valid_table_path(table_s3_path): diff --git a/honeycomb/orc.py b/honeycomb/orc.py index d96efac..82e6a1a 100644 --- a/honeycomb/orc.py +++ b/honeycomb/orc.py @@ -203,8 +203,8 @@ def insert_into_orc_table(table_name, schema, source_table_name, source_schema, ['source_table.{}="{}"'.format(partition_key, partition_value) for partition_key, partition_value in partition_values.items()]) insert_command = ( - 'INSERT {} {}.{}{}\n'.format(insert_type, schema, table_name, - partition_strings) + + 'INSERT {} TABLE {}.{}{}\n'.format(insert_type, schema, table_name, + partition_strings) + 'SELECT\n' ' {}\n'.format(',\n '.join(col_names)) + 'FROM {}.{} source_table'.format(source_schema, source_table_name) +