From a164b70e1d913dedf02d616c58761ab1e7ad076d Mon Sep 17 00:00:00 2001 From: dushayntAW <158567391+dushayntAW@users.noreply.github.com> Date: Wed, 15 May 2024 15:27:13 +0200 Subject: [PATCH 1/3] chore(ingest/presto-on-hive) Set enable_properties_merge to True by default (#10469) --- .../ingestion/source/sql/hive_metastore.py | 4 +- .../datahub/testing/compare_metadata_json.py | 12 + .../hive_metastore_mces_golden_1.json | 586 ++++++++++++++---- .../hive_metastore_mces_golden_2.json | 586 ++++++++++++++---- .../hive_metastore_mces_golden_3.json | 586 ++++++++++++++---- .../hive_metastore_mces_golden_4.json | 586 ++++++++++++++---- .../hive_metastore_mces_golden_5.json | 586 ++++++++++++++---- .../hive-metastore/test_hive_metastore.py | 24 +- .../tests/test_helpers/mce_helpers.py | 2 + 9 files changed, 2328 insertions(+), 644 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py index db702df9ddc922..944b8a080cb579 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py @@ -130,8 +130,8 @@ class HiveMetastore(BasicSQLAlchemyConfig): ) enable_properties_merge: bool = Field( - default=False, - description="By default, the connector overwrites properties every time. Set this to True to enable merging of properties with what exists on the server.", + default=True, + description="By default, the connector enables merging of properties with what exists on the server. Set this to False to enable the default connector behavior of overwriting properties on each ingestion.", ) simplify_nested_field_paths: bool = Field( diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py index 91f5d6f914676d..90d80dbeec8b23 100644 --- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py +++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py @@ -5,6 +5,7 @@ import os import pathlib import pprint +import re import shutil import tempfile from typing import Any, Dict, List, Sequence, Union @@ -40,6 +41,7 @@ def assert_metadata_files_equal( update_golden: bool, copy_output: bool, ignore_paths: Sequence[str] = (), + ignore_paths_v2: Sequence[str] = (), ignore_order: bool = True, ) -> None: golden_exists = os.path.isfile(golden_path) @@ -70,6 +72,16 @@ def assert_metadata_files_equal( logger.info(f"Error reformatting golden file as MCP/MCEs: {e}") golden = load_json_file(golden_path) + if ignore_paths_v2: + golden_json = load_json_file(golden_path) + for i, obj in enumerate(golden_json): + aspect_json = obj.get("aspect", {}).get("json", []) + for j, item in enumerate(aspect_json): + if isinstance(item, dict): + if item.get("path") in ignore_paths_v2: + json_path = f"root[{i}]['aspect']['json'][{j}]['value']" + ignore_paths = (*ignore_paths, re.escape(json_path)) + diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order) if diff and update_golden: if isinstance(diff, MCPDiff): diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json index 3b07d651d0dcf6..2fad0643e5027f 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_2.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_2.json index 8dfed3de760cc1..58e1e11c8dd764 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_2.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_2.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json index b88149cd333e9b..78db5068686790 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_4.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_4.json index aecb60f6347d31..193e1e23b9de46 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_4.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_4.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json index dff32615d1bdf4..ce7ebdd299579f 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "foo", + "fieldPath": "baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "baz", + "fieldPath": "foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/test_hive_metastore.py b/metadata-ingestion/tests/integration/hive-metastore/test_hive_metastore.py index 2ed0e6198dc000..dbc1d0706c4b6b 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/test_hive_metastore.py +++ b/metadata-ingestion/tests/integration/hive-metastore/test_hive_metastore.py @@ -1,6 +1,6 @@ import re import subprocess -from typing import Dict +from typing import Dict, Sequence import pytest import requests @@ -120,18 +120,28 @@ def test_hive_metastore_ingest( # config_file = (test_resources_dir / "presto_on_hive_to_file.yml").resolve() # run_datahub_cmd(["ingest", "-c", f"{config_file}"]) + ignore_paths: Sequence[str] = [ + r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['transient_lastDdlTime'\]", + r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['numfiles'\]", + r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['totalsize'\]", + r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['create_date'\]", + ] + + ignore_paths_v2: Sequence[str] = [ + "/customProperties/create_date", + "/customProperties/transient_lastDdlTime", + "/customProperties/numfiles", + "/customProperties/totalsize", + ] + # Verify the output. mce_helpers.check_golden_file( pytestconfig, output_path=f"hive_metastore_mces{test_suffix}.json", golden_path=test_resources_dir / f"hive_metastore_mces_golden{test_suffix}.json", - ignore_paths=[ - r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['transient_lastDdlTime'\]", - r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['numfiles'\]", - r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['totalsize'\]", - r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['create_date'\]", - ], + ignore_paths=ignore_paths, + ignore_paths_v2=ignore_paths_v2, ) diff --git a/metadata-ingestion/tests/test_helpers/mce_helpers.py b/metadata-ingestion/tests/test_helpers/mce_helpers.py index 1445f402f78b9f..563ccbee03c274 100644 --- a/metadata-ingestion/tests/test_helpers/mce_helpers.py +++ b/metadata-ingestion/tests/test_helpers/mce_helpers.py @@ -81,6 +81,7 @@ def check_golden_file( output_path: Union[str, os.PathLike], golden_path: Union[str, os.PathLike], ignore_paths: Sequence[str] = (), + ignore_paths_v2: Sequence[str] = (), ) -> None: update_golden = pytestconfig.getoption("--update-golden-files") copy_output = pytestconfig.getoption("--copy-output-files") @@ -90,6 +91,7 @@ def check_golden_file( update_golden=update_golden, copy_output=copy_output, ignore_paths=ignore_paths, + ignore_paths_v2=ignore_paths_v2, ) From e3503874f253feb59e3eec59bac67f989a0fee1b Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 15 May 2024 18:59:25 +0530 Subject: [PATCH 2/3] fix(ci): documentation build fix (#10507) --- docs-website/sidebars.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 64ded2f61c16f2..cf5f675c2f8850 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -67,6 +67,12 @@ module.exports = { id: "docs/managed-datahub/observe/freshness-assertions", className: "saasOnly", }, + { + label: "Schema Assertions", + type: "doc", + id: "docs/managed-datahub/observe/schema-assertions", + className: "saasOnly", + }, { label: "Volume Assertions", type: "doc", From 66473dbae73f6a442207e71e08caf712363ffa69 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 15 May 2024 19:22:36 +0530 Subject: [PATCH 3/3] docs: 0.3.2 Acryl (#10377) Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com> Co-authored-by: jayacryl <159848059+jayacryl@users.noreply.github.com> Co-authored-by: John Joyce Co-authored-by: Chris Collins --- docs-website/sidebars.js | 1 + docs/managed-datahub/release-notes/v_0_3_2.md | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 docs/managed-datahub/release-notes/v_0_3_2.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index cf5f675c2f8850..0424741858fa59 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -262,6 +262,7 @@ module.exports = { }, { "Managed DataHub Release History": [ + "docs/managed-datahub/release-notes/v_0_3_2", "docs/managed-datahub/release-notes/v_0_3_1", "docs/managed-datahub/release-notes/v_0_2_16", "docs/managed-datahub/release-notes/v_0_2_15", diff --git a/docs/managed-datahub/release-notes/v_0_3_2.md b/docs/managed-datahub/release-notes/v_0_3_2.md new file mode 100644 index 00000000000000..39402c374272fc --- /dev/null +++ b/docs/managed-datahub/release-notes/v_0_3_2.md @@ -0,0 +1,34 @@ +# v0.3.2 +--- + +Release Availability Date +--- +15-May-2024 + +Recommended CLI/SDK +--- +- `v0.13.2.2` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.13.2.2 + +If you are using an older CLI/SDK version then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, github actions, airflow, in python SDK somewhere, Java SKD etc. This is a strong recommendation to upgrade as we keep on pushing fixes in the CLI and it helps us support you better. + +## Release Changelog +--- +- Since `v0.3.1` these changes from OSS DataHub https://github.com/datahub-project/datahub/compare/57de905c66b6992aefb2051708fa83898fa82cec...6ed21bd1bc70a3ceb7dddb43ea7db4ca56874547 have been pulled in. +- OpenAPI Entitites v3 (Entity-registry generated endpoints) +- Business Attributes Support (disabled by default) +- Graph Retriever (validators, mutators, side-effects) +- New Lineage Graphql Endpoints (optimizations for the UI) +- Metadata Tests Tracks Unique Hash +- SCIM Support +- V3 CLL Across DataJob Nodes Upgrade +- Subscribe to assertion errors: Get notified when assertion errors occur +- Group owners are labeled in slack incident notifications +- Field assertions now raise incidents when they go in error (if configured to do so) +- Assertion timeline viz handles missing data more gracefully +- Freshness assertions on Snowflake using the AUDIT_LOG will no longer consider no-op queries as valid 'updates' to the dataset +- [NEW] Schema Assertions: Allows users to configure a set of columns and expected types and verify that they actually exist in the tabe schema. Disabled by default. Please reach out to Acryl team if you wish to have this enabled. +- Adding polish and fixing bugs throughout the new UI +- Ability to add and remove lineage manually in the new UI +- Ability to configure the default time filter on lineage +- Ability to collapse lineage in the lineage graph (new UI) +- Support rich text on metadata form descriptions