From ff385edbb1b6f0bb6de5f55cb6b30d8db9d1f13c Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Tue, 17 Dec 2024 03:49:47 -0800 Subject: [PATCH] build(ingest/sqlglot): Bump pin to support snowflake CREATE ... WITH TAG (#12003) --- metadata-ingestion/setup.py | 2 +- .../src/datahub/testing/compare_metadata_json.py | 2 +- .../sql_parsing/aggregator_goldens/test_table_rename.json | 2 +- .../unit/sql_parsing/aggregator_goldens/test_table_swap.json | 4 ++-- .../aggregator_goldens/test_table_swap_with_temp.json | 2 +- .../goldens/test_bigquery_information_schema_query.json | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 415871d30175f8..31db711592eb14 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -101,7 +101,7 @@ # We heavily monkeypatch sqlglot. # Prior to the patching, we originally maintained an acryl-sqlglot fork: # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:main?expand=1 - "sqlglot[rs]==25.26.0", + "sqlglot[rs]==25.32.1", "patchy==2.8.0", } diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py index bedc5bc8fcd5e5..9dbadd4804997d 100644 --- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py +++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py @@ -117,7 +117,7 @@ def diff_metadata_json( ignore_paths: Sequence[str] = (), ignore_order: bool = True, ) -> Union[DeepDiff, MCPDiff]: - ignore_paths = (*ignore_paths, *default_exclude_paths, r"root\[\d+].delta_info") + ignore_paths = [*ignore_paths, *default_exclude_paths, r"root\[\d+].delta_info"] try: if ignore_order: golden_map = get_aspects_by_urn(golden) diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json index 2d32e1328fbb4f..fd8475090f009e 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json @@ -185,7 +185,7 @@ "aspect": { "json": { "statement": { - "value": "ALTER TABLE dev.public.foo_staging RENAME TO foo", + "value": "ALTER TABLE dev.public.foo_staging RENAME TO foo /* Datahub generated query text-- */", "language": "SQL" }, "source": "SYSTEM", diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json index af0fca485777ff..d9d46a4b14a146 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json @@ -185,7 +185,7 @@ "aspect": { "json": { "statement": { - "value": "ALTER TABLE dev.public.person_info_swap SWAP WITH dev.public.person_info", + "value": "ALTER TABLE dev.public.person_info_swap SWAP WITH dev.public.person_info /* Datahub generated query text-- */", "language": "SQL" }, "source": "SYSTEM", @@ -438,7 +438,7 @@ "aspect": { "json": { "statement": { - "value": "ALTER TABLE dev.public.person_info SWAP WITH dev.public.person_info_swap", + "value": "ALTER TABLE dev.public.person_info SWAP WITH dev.public.person_info_swap /* Datahub generated query text-- */", "language": "SQL" }, "source": "SYSTEM", diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json index ceaaf8f6887c7c..b4eaf76a149337 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json @@ -175,7 +175,7 @@ "aspect": { "json": { "statement": { - "value": "CREATE TABLE person_info_swap CLONE person_info;\n\nCREATE TABLE person_info_incremental AS\nSELECT\n *\nFROM person_info_dep;\n\nINSERT INTO person_info_swap\nSELECT\n *\nFROM person_info_incremental;\n\nALTER TABLE dev.public.person_info_swap SWAP WITH dev.public.person_info", + "value": "CREATE TABLE person_info_swap CLONE person_info;\n\nCREATE TABLE person_info_incremental AS\nSELECT\n *\nFROM person_info_dep;\n\nINSERT INTO person_info_swap\nSELECT\n *\nFROM person_info_incremental;\n\nALTER TABLE dev.public.person_info_swap SWAP WITH dev.public.person_info /* Datahub generated query text-- */", "language": "SQL" }, "source": "SYSTEM", diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json index f5f573f3d51136..9621b7d1c265b4 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_information_schema_query.json @@ -1,7 +1,7 @@ { "query_type": "SELECT", "query_type_props": {}, - "query_fingerprint": "c721ce16410601b36e5f32bd9c5c28488500a93e617363739faebfe71496f163", + "query_fingerprint": "a204522c98a01568d8575a98a715de98985aeef0e822feb8450153f71891d6c6", "in_tables": [ "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMNS,PROD)", "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS,PROD)" @@ -178,6 +178,6 @@ ], "debug_info": { "confidence": 0.2, - "generalized_statement": "SELECT c.table_catalog AS table_catalog, c.table_schema AS table_schema, c.table_name AS table_name, c.column_name AS column_name, c.ordinal_position AS ordinal_position, cfp.field_path AS field_path, c.is_nullable AS is_nullable, CASE WHEN CONTAINS_SUBSTR(cfp.field_path, ?) THEN NULL ELSE c.data_type END AS data_type, description AS comment, c.is_hidden AS is_hidden, c.is_partitioning_column AS is_partitioning_column, c.clustering_ordinal_position AS clustering_ordinal_position FROM `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMNS AS c JOIN `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS AS cfp ON cfp.table_name = c.table_name AND cfp.column_name = c.column_name ORDER BY table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC" + "generalized_statement": "SELECT c.table_catalog AS table_catalog, c.table_schema AS table_schema, c.table_name AS table_name, c.column_name AS column_name, c.ordinal_position AS ordinal_position, cfp.field_path AS field_path, c.is_nullable AS is_nullable, CASE WHEN CONTAINS_SUBSTR(cfp.field_path, ?) THEN NULL ELSE c.data_type END AS data_type, description AS comment, c.is_hidden AS is_hidden, c.is_partitioning_column AS is_partitioning_column, c.clustering_ordinal_position AS clustering_ordinal_position FROM `acryl-staging-2`.`smoke_test_db_4`.`INFORMATION_SCHEMA.COLUMNS` AS c JOIN `acryl-staging-2`.`smoke_test_db_4`.`INFORMATION_SCHEMA.COLUMN_FIELD_PATHS` AS cfp ON cfp.table_name = c.table_name AND cfp.column_name = c.column_name ORDER BY table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC" } } \ No newline at end of file