From 189f8cefa71b1137801368edf26f633c80599016 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 28 Nov 2024 14:53:46 -0500
Subject: [PATCH 01/10] feat(ingest): standardize sql type mappings (#11982)

---
 .../ingestion/source/dbt/dbt_common.py        | 68 ++--------------
 .../source/qlik_sense/data_classes.py         |  1 +
 .../ingestion/source/redshift/redshift.py     |  1 +
 .../source/snowflake/snowflake_schema_gen.py  |  1 +
 .../datahub/ingestion/source/sql/sql_types.py | 79 +++++++++++++++++--
 .../ingestion/source/unity/proxy_types.py     |  1 +
 .../tests/integration/dbt/test_dbt.py         | 69 ----------------
 .../tests/unit/test_sql_types.py              | 78 ++++++++++++++++++
 8 files changed, 161 insertions(+), 137 deletions(-)
 create mode 100644 metadata-ingestion/tests/unit/test_sql_types.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index 4598ae388b827d..499e7e1231d050 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -53,19 +53,7 @@
     make_assertion_from_test,
     make_assertion_result_from_test,
 )
-from datahub.ingestion.source.sql.sql_types import (
-    ATHENA_SQL_TYPES_MAP,
-    BIGQUERY_TYPES_MAP,
-    POSTGRES_TYPES_MAP,
-    SNOWFLAKE_TYPES_MAP,
-    SPARK_SQL_TYPES_MAP,
-    TRINO_SQL_TYPES_MAP,
-    VERTICA_SQL_TYPES_MAP,
-    resolve_athena_modified_type,
-    resolve_postgres_modified_type,
-    resolve_trino_modified_type,
-    resolve_vertica_modified_type,
-)
+from datahub.ingestion.source.sql.sql_types import resolve_sql_type
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StaleEntityRemovalHandler,
     StaleEntityRemovalSourceReport,
@@ -89,17 +77,11 @@
 from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
 from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
-    BooleanTypeClass,
-    DateTypeClass,
     MySqlDDL,
     NullTypeClass,
-    NumberTypeClass,
-    RecordType,
     SchemaField,
     SchemaFieldDataType,
     SchemaMetadata,
-    StringTypeClass,
-    TimeTypeClass,
 )
 from datahub.metadata.schema_classes import (
     DataPlatformInstanceClass,
@@ -804,28 +786,6 @@ def make_mapping_upstream_lineage(
     )
 
 
-# See https://github.com/fishtown-analytics/dbt/blob/master/core/dbt/adapters/sql/impl.py
-_field_type_mapping = {
-    "boolean": BooleanTypeClass,
-    "date": DateTypeClass,
-    "time": TimeTypeClass,
-    "numeric": NumberTypeClass,
-    "text": StringTypeClass,
-    "timestamp with time zone": DateTypeClass,
-    "timestamp without time zone": DateTypeClass,
-    "integer": NumberTypeClass,
-    "float8": NumberTypeClass,
-    "struct": RecordType,
-    **POSTGRES_TYPES_MAP,
-    **SNOWFLAKE_TYPES_MAP,
-    **BIGQUERY_TYPES_MAP,
-    **SPARK_SQL_TYPES_MAP,
-    **TRINO_SQL_TYPES_MAP,
-    **ATHENA_SQL_TYPES_MAP,
-    **VERTICA_SQL_TYPES_MAP,
-}
-
-
 def get_column_type(
     report: DBTSourceReport,
     dataset_name: str,
@@ -835,24 +795,10 @@ def get_column_type(
     """
     Maps known DBT types to datahub types
     """
-    TypeClass: Any = _field_type_mapping.get(column_type) if column_type else None
-
-    if TypeClass is None and column_type:
-        # resolve a modified type
-        if dbt_adapter == "trino":
-            TypeClass = resolve_trino_modified_type(column_type)
-        elif dbt_adapter == "athena":
-            TypeClass = resolve_athena_modified_type(column_type)
-        elif dbt_adapter == "postgres" or dbt_adapter == "redshift":
-            # Redshift uses a variant of Postgres, so we can use the same logic.
-            TypeClass = resolve_postgres_modified_type(column_type)
-        elif dbt_adapter == "vertica":
-            TypeClass = resolve_vertica_modified_type(column_type)
-        elif dbt_adapter == "snowflake":
-            # Snowflake types are uppercase, so we check that.
-            TypeClass = _field_type_mapping.get(column_type.upper())
-
-    # if still not found, report the warning
+
+    TypeClass = resolve_sql_type(column_type, dbt_adapter)
+
+    # if still not found, report a warning
     if TypeClass is None:
         if column_type:
             report.info(
@@ -861,9 +807,9 @@ def get_column_type(
                 context=f"{dataset_name} - {column_type}",
                 log=False,
             )
-        TypeClass = NullTypeClass
+        TypeClass = NullTypeClass()
 
-    return SchemaFieldDataType(type=TypeClass())
+    return SchemaFieldDataType(type=TypeClass)
 
 
 @platform_name("dbt")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/data_classes.py
index 672fcbceb0603b..a43f5f32493f2d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/data_classes.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/data_classes.py
@@ -15,6 +15,7 @@
     TimeType,
 )
 
+# TODO: Replace with standardized types in sql_types.py
 FIELD_TYPE_MAPPING: Dict[
     str,
     Type[
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
index 4bc4c1451c262f..06cbb7fbae27cc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
@@ -222,6 +222,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
     ```
     """
 
+    # TODO: Replace with standardized types in sql_types.py
     REDSHIFT_FIELD_TYPE_MAPPINGS: Dict[
         str,
         Type[
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
index d4442749a06224..2bd8e8017f5492 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
@@ -103,6 +103,7 @@
 logger = logging.getLogger(__name__)
 
 # https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html
+# TODO: Move to the standardized types in sql_types.py
 SNOWFLAKE_FIELD_TYPE_MAPPINGS = {
     "DATE": DateType,
     "BIGINT": NumberType,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
index 8ea4209784063f..89ca160ba1f487 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
@@ -1,5 +1,5 @@
 import re
-from typing import Any, Dict, ValuesView
+from typing import Any, Dict, Optional, Type, Union, ValuesView
 
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     ArrayType,
@@ -16,14 +16,28 @@
     UnionType,
 )
 
-# these can be obtained by running `select format_type(oid, null),* from pg_type;`
-# we've omitted the types without a meaningful DataHub type (e.g. postgres-specific types, index vectors, etc.)
-# (run `\copy (select format_type(oid, null),* from pg_type) to 'pg_type.csv' csv header;` to get a CSV)
+DATAHUB_FIELD_TYPE = Union[
+    ArrayType,
+    BooleanType,
+    BytesType,
+    DateType,
+    EnumType,
+    MapType,
+    NullType,
+    NumberType,
+    RecordType,
+    StringType,
+    TimeType,
+    UnionType,
+]
 
-# we map from format_type since this is what dbt uses
-# see https://github.com/fishtown-analytics/dbt/blob/master/plugins/postgres/dbt/include/postgres/macros/catalog.sql#L22
 
-# see https://www.npgsql.org/dev/types.html for helpful type annotations
+# These can be obtained by running `select format_type(oid, null),* from pg_type;`
+# We've omitted the types without a meaningful DataHub type (e.g. postgres-specific types, index vectors, etc.)
+# (run `\copy (select format_type(oid, null),* from pg_type) to 'pg_type.csv' csv header;` to get a CSV)
+# We map from format_type since this is what dbt uses.
+# See https://github.com/fishtown-analytics/dbt/blob/master/plugins/postgres/dbt/include/postgres/macros/catalog.sql#L22
+# See https://www.npgsql.org/dev/types.html for helpful type annotations
 POSTGRES_TYPES_MAP: Dict[str, Any] = {
     "boolean": BooleanType,
     "bytea": BytesType,
@@ -430,3 +444,54 @@ def resolve_vertica_modified_type(type_string: str) -> Any:
     "geography": None,
     "uuid": StringType,
 }
+
+
+_merged_mapping = {
+    "boolean": BooleanType,
+    "date": DateType,
+    "time": TimeType,
+    "numeric": NumberType,
+    "text": StringType,
+    "timestamp with time zone": DateType,
+    "timestamp without time zone": DateType,
+    "integer": NumberType,
+    "float8": NumberType,
+    "struct": RecordType,
+    **POSTGRES_TYPES_MAP,
+    **SNOWFLAKE_TYPES_MAP,
+    **BIGQUERY_TYPES_MAP,
+    **SPARK_SQL_TYPES_MAP,
+    **TRINO_SQL_TYPES_MAP,
+    **ATHENA_SQL_TYPES_MAP,
+    **VERTICA_SQL_TYPES_MAP,
+}
+
+
+def resolve_sql_type(
+    column_type: Optional[str],
+    platform: Optional[str] = None,
+) -> Optional[DATAHUB_FIELD_TYPE]:
+    # In theory, we should use the platform-specific mapping where available.
+    # However, the types don't ever conflict, so the merged mapping is fine.
+    TypeClass: Optional[Type[DATAHUB_FIELD_TYPE]] = (
+        _merged_mapping.get(column_type) if column_type else None
+    )
+
+    if TypeClass is None and column_type:
+        # resolve a modified type
+        if platform == "trino":
+            TypeClass = resolve_trino_modified_type(column_type)
+        elif platform == "athena":
+            TypeClass = resolve_athena_modified_type(column_type)
+        elif platform == "postgres" or platform == "redshift":
+            # Redshift uses a variant of Postgres, so we can use the same logic.
+            TypeClass = resolve_postgres_modified_type(column_type)
+        elif platform == "vertica":
+            TypeClass = resolve_vertica_modified_type(column_type)
+        elif platform == "snowflake":
+            # Snowflake types are uppercase, so we check that.
+            TypeClass = _merged_mapping.get(column_type.upper())
+
+    if TypeClass:
+        return TypeClass()
+    return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
index f84f6c1b0c08d6..9c5752c518df14 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
@@ -33,6 +33,7 @@
 
 logger = logging.getLogger(__name__)
 
+# TODO: (maybe) Replace with standardized types in sql_types.py
 DATA_TYPE_REGISTRY: dict = {
     ColumnTypeName.BOOLEAN: BooleanTypeClass,
     ColumnTypeName.BYTE: BytesTypeClass,
diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py
index 390d8d7698dd4c..c6a3dc4fd590bd 100644
--- a/metadata-ingestion/tests/integration/dbt/test_dbt.py
+++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py
@@ -11,12 +11,6 @@
 from datahub.ingestion.run.pipeline_config import PipelineConfig, SourceConfig
 from datahub.ingestion.source.dbt.dbt_common import DBTEntitiesEnabled, EmitDirective
 from datahub.ingestion.source.dbt.dbt_core import DBTCoreConfig, DBTCoreSource
-from datahub.ingestion.source.sql.sql_types import (
-    ATHENA_SQL_TYPES_MAP,
-    TRINO_SQL_TYPES_MAP,
-    resolve_athena_modified_type,
-    resolve_trino_modified_type,
-)
 from tests.test_helpers import mce_helpers, test_connection_helpers
 
 FROZEN_TIME = "2022-02-03 07:00:00"
@@ -362,69 +356,6 @@ def test_dbt_tests(test_resources_dir, pytestconfig, tmp_path, mock_time, **kwar
     )
 
 
-@pytest.mark.parametrize(
-    "data_type, expected_data_type",
-    [
-        ("boolean", "boolean"),
-        ("tinyint", "tinyint"),
-        ("smallint", "smallint"),
-        ("int", "int"),
-        ("integer", "integer"),
-        ("bigint", "bigint"),
-        ("real", "real"),
-        ("double", "double"),
-        ("decimal(10,0)", "decimal"),
-        ("varchar(20)", "varchar"),
-        ("char", "char"),
-        ("varbinary", "varbinary"),
-        ("json", "json"),
-        ("date", "date"),
-        ("time", "time"),
-        ("time(12)", "time"),
-        ("timestamp", "timestamp"),
-        ("timestamp(3)", "timestamp"),
-        ("row(x bigint, y double)", "row"),
-        ("array(row(x bigint, y double))", "array"),
-        ("map(varchar, varchar)", "map"),
-    ],
-)
-def test_resolve_trino_modified_type(data_type, expected_data_type):
-    assert (
-        resolve_trino_modified_type(data_type)
-        == TRINO_SQL_TYPES_MAP[expected_data_type]
-    )
-
-
-@pytest.mark.parametrize(
-    "data_type, expected_data_type",
-    [
-        ("boolean", "boolean"),
-        ("tinyint", "tinyint"),
-        ("smallint", "smallint"),
-        ("int", "int"),
-        ("integer", "integer"),
-        ("bigint", "bigint"),
-        ("float", "float"),
-        ("double", "double"),
-        ("decimal(10,0)", "decimal"),
-        ("varchar(20)", "varchar"),
-        ("char", "char"),
-        ("binary", "binary"),
-        ("date", "date"),
-        ("timestamp", "timestamp"),
-        ("timestamp(3)", "timestamp"),
-        ("struct<x timestamp(3), y timestamp>", "struct"),
-        ("array<struct<x bigint, y double>>", "array"),
-        ("map<varchar, varchar>", "map"),
-    ],
-)
-def test_resolve_athena_modified_type(data_type, expected_data_type):
-    assert (
-        resolve_athena_modified_type(data_type)
-        == ATHENA_SQL_TYPES_MAP[expected_data_type]
-    )
-
-
 @pytest.mark.integration
 @freeze_time(FROZEN_TIME)
 def test_dbt_tests_only_assertions(
diff --git a/metadata-ingestion/tests/unit/test_sql_types.py b/metadata-ingestion/tests/unit/test_sql_types.py
new file mode 100644
index 00000000000000..ebe5ade115cdd4
--- /dev/null
+++ b/metadata-ingestion/tests/unit/test_sql_types.py
@@ -0,0 +1,78 @@
+import pytest
+
+from datahub.ingestion.source.sql.sql_types import (
+    ATHENA_SQL_TYPES_MAP,
+    TRINO_SQL_TYPES_MAP,
+    resolve_athena_modified_type,
+    resolve_sql_type,
+    resolve_trino_modified_type,
+)
+from datahub.metadata.schema_classes import BooleanTypeClass, StringTypeClass
+
+
+@pytest.mark.parametrize(
+    "data_type, expected_data_type",
+    [
+        ("boolean", "boolean"),
+        ("tinyint", "tinyint"),
+        ("smallint", "smallint"),
+        ("int", "int"),
+        ("integer", "integer"),
+        ("bigint", "bigint"),
+        ("real", "real"),
+        ("double", "double"),
+        ("decimal(10,0)", "decimal"),
+        ("varchar(20)", "varchar"),
+        ("char", "char"),
+        ("varbinary", "varbinary"),
+        ("json", "json"),
+        ("date", "date"),
+        ("time", "time"),
+        ("time(12)", "time"),
+        ("timestamp", "timestamp"),
+        ("timestamp(3)", "timestamp"),
+        ("row(x bigint, y double)", "row"),
+        ("array(row(x bigint, y double))", "array"),
+        ("map(varchar, varchar)", "map"),
+    ],
+)
+def test_resolve_trino_modified_type(data_type, expected_data_type):
+    assert (
+        resolve_trino_modified_type(data_type)
+        == TRINO_SQL_TYPES_MAP[expected_data_type]
+    )
+
+
+@pytest.mark.parametrize(
+    "data_type, expected_data_type",
+    [
+        ("boolean", "boolean"),
+        ("tinyint", "tinyint"),
+        ("smallint", "smallint"),
+        ("int", "int"),
+        ("integer", "integer"),
+        ("bigint", "bigint"),
+        ("float", "float"),
+        ("double", "double"),
+        ("decimal(10,0)", "decimal"),
+        ("varchar(20)", "varchar"),
+        ("char", "char"),
+        ("binary", "binary"),
+        ("date", "date"),
+        ("timestamp", "timestamp"),
+        ("timestamp(3)", "timestamp"),
+        ("struct<x timestamp(3), y timestamp>", "struct"),
+        ("array<struct<x bigint, y double>>", "array"),
+        ("map<varchar, varchar>", "map"),
+    ],
+)
+def test_resolve_athena_modified_type(data_type, expected_data_type):
+    assert (
+        resolve_athena_modified_type(data_type)
+        == ATHENA_SQL_TYPES_MAP[expected_data_type]
+    )
+
+
+def test_resolve_sql_type() -> None:
+    assert resolve_sql_type("boolean") == BooleanTypeClass()
+    assert resolve_sql_type("varchar") == StringTypeClass()

From 0476bf1e32d8892aef4faf493a2703a34bad9d48 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 28 Nov 2024 21:41:53 -0500
Subject: [PATCH 02/10] feat(ingest): bump typing_extensions dep (#11965)

---
 metadata-ingestion/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 292038380e6a22..d7e056b31370df 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -14,8 +14,8 @@
 )
 
 base_requirements = {
-    # Typing extension should be >=3.10.0.2 ideally but we can't restrict due to a Airflow 2.1 dependency conflict.
-    "typing_extensions>=3.7.4.3",
+    # Our min version of typing_extensions is somewhat constrained by Airflow.
+    "typing_extensions>=3.10.0.2",
     # Actual dependencies.
     "typing-inspect",
     # pydantic 1.8.2 is incompatible with mypy 0.910.

From a92c6b2bb0768b33ba479b012d51ef20ae2194f2 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 28 Nov 2024 21:42:40 -0500
Subject: [PATCH 03/10] feat(ingest): add tests for colon characters in urns
 (#11976)

---
 .../src/datahub/utilities/urn_encoder.py      |  3 +-
 .../tests/unit/urns/test_urn.py               | 49 ++++++++++++++++---
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/metadata-ingestion/src/datahub/utilities/urn_encoder.py b/metadata-ingestion/src/datahub/utilities/urn_encoder.py
index 88c0a128b8e468..4f19eeff3e70f0 100644
--- a/metadata-ingestion/src/datahub/utilities/urn_encoder.py
+++ b/metadata-ingestion/src/datahub/utilities/urn_encoder.py
@@ -4,7 +4,8 @@
 # NOTE: Frontend relies on encoding these three characters. Specifically, we decode and encode schema fields for column level lineage.
 # If this changes, make appropriate changes to datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
 # We also rely on encoding these exact three characters when generating schemaField urns in our graphQL layer. Update SchemaFieldUtils if this changes.
-RESERVED_CHARS = {",", "(", ")"}
+# Also see https://datahubproject.io/docs/what/urn/#restrictions
+RESERVED_CHARS = {",", "(", ")", "␟"}
 RESERVED_CHARS_EXTENDED = RESERVED_CHARS.union({"%"})
 
 
diff --git a/metadata-ingestion/tests/unit/urns/test_urn.py b/metadata-ingestion/tests/unit/urns/test_urn.py
index 1bf48082fec8c9..73badb3d1b4234 100644
--- a/metadata-ingestion/tests/unit/urns/test_urn.py
+++ b/metadata-ingestion/tests/unit/urns/test_urn.py
@@ -1,6 +1,12 @@
 import pytest
 
-from datahub.metadata.urns import DatasetUrn, Urn
+from datahub.metadata.urns import (
+    CorpUserUrn,
+    DashboardUrn,
+    DataPlatformUrn,
+    DatasetUrn,
+    Urn,
+)
 from datahub.utilities.urns.error import InvalidUrnError
 
 pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
@@ -36,20 +42,51 @@ def test_url_encode_urn() -> None:
 
 def test_invalid_urn() -> None:
     with pytest.raises(InvalidUrnError):
-        Urn.create_from_string("urn:li:abc")
+        Urn.from_string("urn:li:abc")
 
     with pytest.raises(InvalidUrnError):
-        Urn.create_from_string("urn:li:abc:")
+        Urn.from_string("urn:li:abc:")
 
     with pytest.raises(InvalidUrnError):
-        Urn.create_from_string("urn:li:abc:()")
+        Urn.from_string("urn:li:abc:()")
 
     with pytest.raises(InvalidUrnError):
-        Urn.create_from_string("urn:li:abc:(abc,)")
+        Urn.from_string("urn:li:abc:(abc,)")
+
+    with pytest.raises(InvalidUrnError):
+        Urn.from_string("urn:li:corpuser:abc)")
+
+
+def test_urn_colon() -> None:
+    # Colon characters are valid in urns, and should not mess up parsing.
+
+    urn = Urn.from_string(
+        "urn:li:dashboard:(looker,dashboards.thelook::customer_lookup)"
+    )
+    assert isinstance(urn, DashboardUrn)
+
+    assert DataPlatformUrn.from_string("urn:li:dataPlatform:abc:def")
+    assert DatasetUrn.from_string(
+        "urn:li:dataset:(urn:li:dataPlatform:abc:def,table_name,PROD)"
+    )
+    assert Urn.from_string("urn:li:corpuser:foo:bar@example.com")
+
+    # I'm not sure why you'd ever want this, but technically it's a valid urn.
+    urn = Urn.from_string("urn:li:corpuser::")
+    assert isinstance(urn, CorpUserUrn)
+    assert urn.username == ":"
+    assert urn == CorpUserUrn(":")
+
+
+def test_urn_coercion() -> None:
+    urn = CorpUserUrn("foo␟bar")
+    assert urn.urn() == "urn:li:corpuser:foo%E2%90%9Fbar"
+
+    assert urn == Urn.from_string(urn.urn())
 
 
 def test_urn_type_dispatch() -> None:
-    urn = Urn.from_string("urn:li:dataset:(urn:li:dataPlatform:abc,def,prod)")
+    urn = Urn.from_string("urn:li:dataset:(urn:li:dataPlatform:abc,def,PROD)")
     assert isinstance(urn, DatasetUrn)
 
     with pytest.raises(InvalidUrnError, match="Passed an urn of type corpuser"):

From a46de1ecf9b67e3520c991a51c3f0d9a4f9051a1 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 28 Nov 2024 21:42:55 -0500
Subject: [PATCH 04/10] feat(ingest/athena): handle partition fetching errors
 (#11966)

---
 .../datahub/ingestion/source/sql/athena.py    | 68 +++++++++++++------
 .../tests/unit/test_athena_source.py          | 45 +++++++++++-
 2 files changed, 88 insertions(+), 25 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
index 71cfd0268ee6b5..6f7decc79b1df2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
@@ -26,6 +26,7 @@
     platform_name,
     support_status,
 )
+from datahub.ingestion.api.source import StructuredLogLevel
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.aws.s3_util import make_s3_urn
 from datahub.ingestion.source.common.subtypes import DatasetContainerSubTypes
@@ -35,6 +36,7 @@
     register_custom_type,
 )
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, make_sqlalchemy_uri
+from datahub.ingestion.source.sql.sql_report import SQLSourceReport
 from datahub.ingestion.source.sql.sql_utils import (
     add_table_to_schema_container,
     gen_database_container,
@@ -48,6 +50,15 @@
     get_schema_fields_for_sqlalchemy_column,
 )
 
+try:
+    from typing_extensions import override
+except ImportError:
+    _F = typing.TypeVar("_F", bound=typing.Callable[..., typing.Any])
+
+    def override(f: _F, /) -> _F:  # noqa: F811
+        return f
+
+
 logger = logging.getLogger(__name__)
 
 assert STRUCT, "required type modules are not available"
@@ -322,12 +333,15 @@ class AthenaSource(SQLAlchemySource):
     - Profiling when enabled.
     """
 
-    table_partition_cache: Dict[str, Dict[str, Partitionitem]] = {}
+    config: AthenaConfig
+    report: SQLSourceReport
 
     def __init__(self, config, ctx):
         super().__init__(config, ctx, "athena")
         self.cursor: Optional[BaseCursor] = None
 
+        self.table_partition_cache: Dict[str, Dict[str, Partitionitem]] = {}
+
     @classmethod
     def create(cls, config_dict, ctx):
         config = AthenaConfig.parse_obj(config_dict)
@@ -452,6 +466,7 @@ def add_table_to_schema_container(
         )
 
     # It seems like database/schema filter in the connection string does not work and this to work around that
+    @override
     def get_schema_names(self, inspector: Inspector) -> List[str]:
         athena_config = typing.cast(AthenaConfig, self.config)
         schemas = inspector.get_schema_names()
@@ -459,34 +474,42 @@ def get_schema_names(self, inspector: Inspector) -> List[str]:
             return [schema for schema in schemas if schema == athena_config.database]
         return schemas
 
-    # Overwrite to get partitions
+    @classmethod
+    def _casted_partition_key(cls, key: str) -> str:
+        # We need to cast the partition keys to a VARCHAR, since otherwise
+        # Athena may throw an error during concatenation / comparison.
+        return f"CAST({key} as VARCHAR)"
+
+    @override
     def get_partitions(
         self, inspector: Inspector, schema: str, table: str
-    ) -> List[str]:
-        partitions = []
-
-        athena_config = typing.cast(AthenaConfig, self.config)
-
-        if not athena_config.extract_partitions:
-            return []
+    ) -> Optional[List[str]]:
+        if not self.config.extract_partitions:
+            return None
 
         if not self.cursor:
-            return []
+            return None
 
         metadata: AthenaTableMetadata = self.cursor.get_table_metadata(
             table_name=table, schema_name=schema
         )
 
-        if metadata.partition_keys:
-            for key in metadata.partition_keys:
-                if key.name:
-                    partitions.append(key.name)
-
-            if not partitions:
-                return []
+        partitions = []
+        for key in metadata.partition_keys:
+            if key.name:
+                partitions.append(key.name)
+        if not partitions:
+            return []
 
-            # We create an artiificaial concatenated partition key to be able to query max partition easier
-            part_concat = "|| '-' ||".join(partitions)
+        with self.report.report_exc(
+            message="Failed to extract partition details",
+            context=f"{schema}.{table}",
+            level=StructuredLogLevel.WARN,
+        ):
+            # We create an artifical concatenated partition key to be able to query max partition easier
+            part_concat = " || '-' || ".join(
+                self._casted_partition_key(key) for key in partitions
+            )
             max_partition_query = f'select {",".join(partitions)} from "{schema}"."{table}$partitions" where {part_concat} = (select max({part_concat}) from "{schema}"."{table}$partitions")'
             ret = self.cursor.execute(max_partition_query)
             max_partition: Dict[str, str] = {}
@@ -500,9 +523,8 @@ def get_partitions(
                 partitions=partitions,
                 max_partition=max_partition,
             )
-            return partitions
 
-        return []
+        return partitions
 
     # Overwrite to modify the creation of schema fields
     def get_schema_fields_for_column(
@@ -551,7 +573,9 @@ def generate_partition_profiler_query(
         if partition and partition.max_partition:
             max_partition_filters = []
             for key, value in partition.max_partition.items():
-                max_partition_filters.append(f"CAST({key} as VARCHAR) = '{value}'")
+                max_partition_filters.append(
+                    f"{self._casted_partition_key(key)} = '{value}'"
+                )
             max_partition = str(partition.max_partition)
             return (
                 max_partition,
diff --git a/metadata-ingestion/tests/unit/test_athena_source.py b/metadata-ingestion/tests/unit/test_athena_source.py
index 875cf3800daf88..f8b6220d182735 100644
--- a/metadata-ingestion/tests/unit/test_athena_source.py
+++ b/metadata-ingestion/tests/unit/test_athena_source.py
@@ -93,7 +93,8 @@ def test_athena_get_table_properties():
             "CreateTime": datetime.now(),
             "LastAccessTime": datetime.now(),
             "PartitionKeys": [
-                {"Name": "testKey", "Type": "string", "Comment": "testComment"}
+                {"Name": "year", "Type": "string", "Comment": "testComment"},
+                {"Name": "month", "Type": "string", "Comment": "testComment"},
             ],
             "Parameters": {
                 "comment": "testComment",
@@ -112,8 +113,18 @@ def test_athena_get_table_properties():
         response=table_metadata
     )
 
+    # Mock partition query results
+    mock_cursor.execute.return_value.description = [
+        ["year"],
+        ["month"],
+    ]
+    mock_cursor.execute.return_value.__iter__.return_value = [["2023", "12"]]
+
     ctx = PipelineContext(run_id="test")
     source = AthenaSource(config=config, ctx=ctx)
+    source.cursor = mock_cursor
+
+    # Test table properties
     description, custom_properties, location = source.get_table_properties(
         inspector=mock_inspector, table=table, schema=schema
     )
@@ -124,13 +135,35 @@ def test_athena_get_table_properties():
         "last_access_time": "2020-04-14 07:00:00",
         "location": "s3://testLocation",
         "outputformat": "testOutputFormat",
-        "partition_keys": '[{"name": "testKey", "type": "string", "comment": "testComment"}]',
+        "partition_keys": '[{"name": "year", "type": "string", "comment": "testComment"}, {"name": "month", "type": "string", "comment": "testComment"}]',
         "serde.serialization.lib": "testSerde",
         "table_type": "testType",
     }
-
     assert location == make_s3_urn("s3://testLocation", "PROD")
 
+    # Test partition functionality
+    partitions = source.get_partitions(
+        inspector=mock_inspector, schema=schema, table=table
+    )
+    assert partitions == ["year", "month"]
+
+    # Verify the correct SQL query was generated for partitions
+    expected_query = """\
+select year,month from "test_schema"."test_table$partitions" \
+where CAST(year as VARCHAR) || '-' || CAST(month as VARCHAR) = \
+(select max(CAST(year as VARCHAR) || '-' || CAST(month as VARCHAR)) \
+from "test_schema"."test_table$partitions")"""
+    mock_cursor.execute.assert_called_once()
+    actual_query = mock_cursor.execute.call_args[0][0]
+    assert actual_query == expected_query
+
+    # Verify partition cache was populated correctly
+    assert source.table_partition_cache[schema][table].partitions == partitions
+    assert source.table_partition_cache[schema][table].max_partition == {
+        "year": "2023",
+        "month": "12",
+    }
+
 
 def test_get_column_type_simple_types():
     assert isinstance(
@@ -214,3 +247,9 @@ def test_column_type_complex_combination():
     assert isinstance(
         result._STRUCT_fields[2][1].item_type._STRUCT_fields[1][1], types.String
     )
+
+
+def test_casted_partition_key():
+    from datahub.ingestion.source.sql.athena import AthenaSource
+
+    assert AthenaSource._casted_partition_key("test_col") == "CAST(test_col as VARCHAR)"

From c42f77985947418de108e7e5b515aafdbf638ed3 Mon Sep 17 00:00:00 2001
From: sagar-salvi-apptware
 <159135491+sagar-salvi-apptware@users.noreply.github.com>
Date: Fri, 29 Nov 2024 21:28:31 +0530
Subject: [PATCH 05/10] fix: Add option for disabling ownership extraction
 (#11970)

Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
---
 .../app/ingest/source/builder/sources.json    |  2 +-
 .../docs/sources/dremio/dremio_recipe.yml     |  2 ++
 .../ingestion/source/dremio/dremio_aspects.py | 34 +++++++++++--------
 .../ingestion/source/dremio/dremio_config.py  |  5 +++
 .../ingestion/source/dremio/dremio_source.py  |  2 ++
 5 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json
index 70d9baabdb4bc6..44b8a37f14655d 100644
--- a/datahub-web-react/src/app/ingest/source/builder/sources.json
+++ b/datahub-web-react/src/app/ingest/source/builder/sources.json
@@ -309,7 +309,7 @@
         "displayName": "Dremio",
         "description": "Import Spaces, Sources, Tables and statistics from Dremio.",
         "docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
-        "recipe": "source:\n    type: dremio\n    config:\n        # Coordinates\n        hostname: null\n        port: null\n        #true if https, otherwise false\n        tls: true\n\n        #For cloud instance\n        #is_dremio_cloud: True\n        #dremio_cloud_project_id: <project_id>\n\n        #Credentials with personal access token\n        authentication_method: PAT\n        password: pass\n\n        #Or Credentials with basic auth\n        #authentication_method: password\n        #username: null\n        #password: null\n\n        stateful_ingestion:\n            enabled: true"
+        "recipe": "source:\n    type: dremio\n    config:\n        # Coordinates\n        hostname: null\n        port: null\n        #true if https, otherwise false\n        tls: true\n\n        #For cloud instance\n        #is_dremio_cloud: True\n        #dremio_cloud_project_id: <project_id>\n\n        #Credentials with personal access token\n        authentication_method: PAT\n        password: pass\n\n        #Or Credentials with basic auth\n        #authentication_method: password\n        #username: null\n        #password: null\n\n       ingest_owner: true\n\n        stateful_ingestion:\n            enabled: true"
     },
     {
         "urn": "urn:li:dataPlatform:cassandra",
diff --git a/metadata-ingestion/docs/sources/dremio/dremio_recipe.yml b/metadata-ingestion/docs/sources/dremio/dremio_recipe.yml
index 9dcd4f8b337d16..d18d19da2de84b 100644
--- a/metadata-ingestion/docs/sources/dremio/dremio_recipe.yml
+++ b/metadata-ingestion/docs/sources/dremio/dremio_recipe.yml
@@ -20,6 +20,8 @@ source:
 
     include_query_lineage: True
 
+    ingest_owner: true
+
     #Optional
     source_mappings:
       - platform: s3
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py
index b29fc91a25e74c..d9d85edbf4f7a0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py
@@ -142,6 +142,7 @@ def __init__(
         platform: str,
         ui_url: str,
         env: str,
+        ingest_owner: bool,
         domain: Optional[str] = None,
         platform_instance: Optional[str] = None,
     ):
@@ -150,6 +151,7 @@ def __init__(
         self.env = env
         self.domain = domain
         self.ui_url = ui_url
+        self.ingest_owner = ingest_owner
 
     def get_container_key(
         self, name: Optional[str], path: Optional[List[str]]
@@ -426,21 +428,23 @@ def _create_external_url(self, dataset: DremioDataset) -> str:
         return f'{self.ui_url}/{container_type}/{dataset_url_path}"{dataset.resource_name}"'
 
     def _create_ownership(self, dataset: DremioDataset) -> Optional[OwnershipClass]:
-        if not dataset.owner:
-            return None
-        owner = (
-            make_user_urn(dataset.owner)
-            if dataset.owner_type == "USER"
-            else make_group_urn(dataset.owner)
-        )
-        return OwnershipClass(
-            owners=[
-                OwnerClass(
-                    owner=owner,
-                    type=OwnershipTypeClass.TECHNICAL_OWNER,
-                )
-            ]
-        )
+        if self.ingest_owner and dataset.owner:
+            owner_urn = (
+                make_user_urn(dataset.owner)
+                if dataset.owner_type == "USER"
+                else make_group_urn(dataset.owner)
+            )
+            ownership: OwnershipClass = OwnershipClass(
+                owners=[
+                    OwnerClass(
+                        owner=owner_urn,
+                        type=OwnershipTypeClass.TECHNICAL_OWNER,
+                    )
+                ]
+            )
+            return ownership
+
+        return None
 
     def _create_glossary_terms(self, entity: DremioDataset) -> GlossaryTermsClass:
         return GlossaryTermsClass(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py
index d966d575c03320..b3f2107a1dfaa7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py
@@ -174,3 +174,8 @@ def is_profiling_enabled(self) -> bool:
         default=False,
         description="Whether to include query-based lineage information.",
     )
+
+    ingest_owner: bool = Field(
+        default=True,
+        description="Ingest Owner from source. This will override Owner info entered from UI",
+    )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py
index 5b96845ec04961..5535a406177016 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py
@@ -97,6 +97,7 @@ class DremioSource(StatefulIngestionSourceBase):
     - Ownership and Glossary Terms:
         - Metadata related to ownership of datasets, extracted from Dremio’s ownership model.
         - Glossary terms and business metadata associated with datasets, providing additional context to the data.
+        - Note: Ownership information will only be available for the Cloud and Enterprise editions, it will not be available for the Community edition.
 
     - Optional SQL Profiling (if enabled):
         - Table, row, and column statistics can be profiled and ingested via optional SQL queries.
@@ -123,6 +124,7 @@ def __init__(self, config: DremioSourceConfig, ctx: PipelineContext):
         self.dremio_aspects = DremioAspects(
             platform=self.get_platform(),
             domain=self.config.domain,
+            ingest_owner=self.config.ingest_owner,
             platform_instance=self.config.platform_instance,
             env=self.config.env,
             ui_url=dremio_api.ui_url,

From 74a84885df604436df182b01810f287f152ccb73 Mon Sep 17 00:00:00 2001
From: Jonny Dixon <45681293+acrylJonny@users.noreply.github.com>
Date: Fri, 29 Nov 2024 19:46:29 +0000
Subject: [PATCH 06/10] feat(ingest/dremio): Retrieve default_schema for SQL
 views (#11832)

---
 .../src/datahub/ingestion/source/dremio/dremio_api.py | 11 +++++++++++
 .../ingestion/source/dremio/dremio_entities.py        |  4 ++++
 .../datahub/ingestion/source/dremio/dremio_source.py  |  1 +
 3 files changed, 16 insertions(+)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py
index 7b9ccb52acbef4..7f4e0f520b7a5e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py
@@ -774,3 +774,14 @@ def process_source_and_containers(source):
                 containers.extend(future.result())
 
         return containers
+
+    def get_context_for_vds(self, resource_id: str) -> str:
+        context_array = self.get(
+            url=f"/catalog/{resource_id}",
+        ).get("sqlContext")
+        if context_array:
+            return ".".join(
+                f'"{part}"' if "." in part else f"{part}" for part in context_array
+            )
+        else:
+            return ""
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py
index 16774c2e4a816f..b80d7b8e0f9123 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_entities.py
@@ -200,6 +200,7 @@ class DremioDataset:
     columns: List[DremioDatasetColumn]
     sql_definition: Optional[str]
     dataset_type: DremioDatasetType
+    default_schema: Optional[str]
     owner: Optional[str]
     owner_type: Optional[str]
     created: str
@@ -235,6 +236,9 @@ def __init__(
 
         if self.sql_definition:
             self.dataset_type = DremioDatasetType.VIEW
+            self.default_schema = api_operations.get_context_for_vds(
+                resource_id=self.resource_id
+            )
         else:
             self.dataset_type = DremioDatasetType.TABLE
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py
index 5535a406177016..f814108c377605 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py
@@ -417,6 +417,7 @@ def process_dataset(
                     view_urn=dataset_urn,
                     view_definition=dataset_info.sql_definition,
                     default_db=self.default_db,
+                    default_schema=dataset_info.default_schema,
                 )
 
         elif dataset_info.dataset_type == DremioDatasetType.TABLE:

From 337f2b95f552c38577117ea8cdc5228c719d7001 Mon Sep 17 00:00:00 2001
From: Jonny Dixon <45681293+acrylJonny@users.noreply.github.com>
Date: Fri, 29 Nov 2024 19:59:18 +0000
Subject: [PATCH 07/10] fix(docs): fix sample business glossary (#11669)

---
 .../datahub-business-glossary.md              | 20 +++++++++----------
 .../banking_business_glossary.yaml            |  4 ++--
 .../bootstrap_data/business_glossary.yml      | 12 +++++------
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/metadata-ingestion/docs/sources/business-glossary/datahub-business-glossary.md b/metadata-ingestion/docs/sources/business-glossary/datahub-business-glossary.md
index 3d2a0509492bd5..3433a853ea9b05 100644
--- a/metadata-ingestion/docs/sources/business-glossary/datahub-business-glossary.md
+++ b/metadata-ingestion/docs/sources/business-glossary/datahub-business-glossary.md
@@ -7,7 +7,7 @@ The business glossary source file should be a .yml file with the following top-l
 Example **Glossary**:
 
 ```yaml
-version: 1                                     			# the version of business glossary file config the config conforms to. Currently the only version released is `1`.
+version: "1"                                     			# the version of business glossary file config the config conforms to. Currently the only version released is `1`.
 source: DataHub                                			# the source format of the terms. Currently only supports `DataHub`
 owners:                                        			# owners contains two nested fields
   users:                                       		    # (optional) a list of user IDs
@@ -60,7 +60,7 @@ Example **GlossaryTerm**:
     - Shipping.CountryCode
     - Shipping.StreetAddress
   custom_properties:                                                        # (optional) a map of key/value pairs of arbitrary custom properties
-    - is_used_for_compliance_tracking: true
+    - is_used_for_compliance_tracking: "true"
   knowledge_links:                                                          # (optional) a list of **KnowledgeCard** related to this term. These appear as links on the glossary node's page
     - url: "https://en.wikipedia.org/wiki/Address"
       label: Wiki link
@@ -73,7 +73,7 @@ To see how these all work together, check out this comprehensive example busines
 <summary>Example business glossary file</summary>
 
 ```yaml
-version: 1
+version: "1"
 source: DataHub
 owners:
   users:
@@ -89,15 +89,15 @@ nodes:
       - name: Sensitive
         description: Sensitive Data
         custom_properties:
-          is_confidential: false
+          is_confidential: "false"
       - name: Confidential
         description: Confidential Data
         custom_properties:
-          is_confidential: true
+          is_confidential: "true"
       - name: HighlyConfidential
         description: Highly Confidential Data
         custom_properties:
-          is_confidential: true
+          is_confidential: "true"
         domain: Marketing
   - name: PersonalInformation
     description: All terms related to personal information
@@ -148,7 +148,7 @@ nodes:
         related_terms:
           - Housing.Kitchen.Cutlery
         custom_properties:
-          - is_used_for_compliance_tracking: true
+          - is_used_for_compliance_tracking: "true"
         knowledge_links:
           - url: "https://en.wikipedia.org/wiki/Address"
             label: Wiki link
@@ -237,7 +237,7 @@ Source file linked [here](https://github.com/datahub-project/datahub/blob/master
 
 ## Generating custom IDs for your terms
 
-IDs are normally inferred from the glossary term/node's name, see the `enable_auto_id` config. But, if you need a stable 
+IDs are normally inferred from the glossary term/node's name, see the `enable_auto_id` config. But, if you need a stable
 identifier, you can generate a custom ID for your term. It should be unique across the entire Glossary.
 
 Here's an example ID:
@@ -247,5 +247,5 @@ A note of caution: once you select a custom ID, it cannot be easily changed.
 
 ## Compatibility
 
-Compatible with version 1 of business glossary format. 
-The source will be evolved as we publish newer versions of this format.
\ No newline at end of file
+Compatible with version 1 of business glossary format.
+The source will be evolved as we publish newer versions of this format.
diff --git a/metadata-ingestion/examples/bootstrap_data/banking_business_glossary.yaml b/metadata-ingestion/examples/bootstrap_data/banking_business_glossary.yaml
index d0fea81748da57..a1adec58b7b5ea 100644
--- a/metadata-ingestion/examples/bootstrap_data/banking_business_glossary.yaml
+++ b/metadata-ingestion/examples/bootstrap_data/banking_business_glossary.yaml
@@ -1,4 +1,4 @@
-version: 1
+version: "1"
 source: DataHub
 owners:
   users:
@@ -68,4 +68,4 @@ nodes:
           - name: Auto Loan
             description: "A type of loan used to finance the purchase of a vehicle, with the vehicle serving as collateral for the loan."
           - name: Interest Rate
-            description: "The rate at which interest is charged on a loan or paid on an investment, expressed as a percentage of the principal amount."
\ No newline at end of file
+            description: "The rate at which interest is charged on a loan or paid on an investment, expressed as a percentage of the principal amount."
diff --git a/metadata-ingestion/examples/bootstrap_data/business_glossary.yml b/metadata-ingestion/examples/bootstrap_data/business_glossary.yml
index 327246863b0ab0..20d1011b966893 100644
--- a/metadata-ingestion/examples/bootstrap_data/business_glossary.yml
+++ b/metadata-ingestion/examples/bootstrap_data/business_glossary.yml
@@ -1,4 +1,4 @@
-version: 1
+version: "1"
 source: DataHub
 owners:
   users:
@@ -11,20 +11,20 @@ nodes:
       - label: Wiki link for classification
         url: "https://en.wikipedia.org/wiki/Classification"
     custom_properties:
-      is_confidential: true
+      is_confidential: "true"
     terms:
       - name: Sensitive
         description: Sensitive Data
         custom_properties:
-          is_confidential: false
+          is_confidential: "false"
       - name: Confidential
         description: Confidential Data
         custom_properties:
-          is_confidential: true
+          is_confidential: "true"
       - name: HighlyConfidential
         description: Highly Confidential Data
         custom_properties:
-          is_confidential: true
+          is_confidential: "true"
         domain: Marketing
   - name: PersonalInformation
     description: All terms related to personal information
@@ -72,7 +72,7 @@ nodes:
           - Shipping.CountryCode
           - Shipping.StreetAddress
         custom_properties:
-          is_used_for_compliance_tracking: true
+          is_used_for_compliance_tracking: "true"
         knowledge_links:
           - url: "https://en.wikipedia.org/wiki/Address"
             label: Wiki link

From 02198f7b27515c78d9ff3dcb1fc651a6d0442017 Mon Sep 17 00:00:00 2001
From: Shirshanka Das <shirshanka@apache.org>
Date: Sat, 30 Nov 2024 01:36:10 -0800
Subject: [PATCH 08/10] fix(java-sdk): custom properties patch client (#11984)

---
 .../builder/CustomPropertiesPatchBuilder.java |  13 +-
 .../builder/DataFlowInfoPatchBuilder.java     |  25 +-
 .../builder/DataJobInfoPatchBuilder.java      |   8 -
 .../DatasetPropertiesPatchBuilder.java        |   8 -
 .../IntermediatePatchBuilder.java             |   9 -
 .../builder/DataFlowInfoPatchBuilderTest.java | 280 ++++++++++++++++++
 .../DataJobInputOutputPatchBuilderTest.java   | 237 +++++++++++++++
 7 files changed, 533 insertions(+), 47 deletions(-)
 create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/builder/DataFlowInfoPatchBuilderTest.java
 create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/builder/DataJobInputOutputPatchBuilderTest.java

diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/CustomPropertiesPatchBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/CustomPropertiesPatchBuilder.java
index e4143851afbe51..b78d563147e636 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/CustomPropertiesPatchBuilder.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/CustomPropertiesPatchBuilder.java
@@ -17,10 +17,16 @@ public class CustomPropertiesPatchBuilder<T extends AbstractMultiFieldPatchBuild
   public static final String CUSTOM_PROPERTIES_BASE_PATH = "/customProperties";
 
   private final T parent;
-  private final List<ImmutableTriple<String, String, JsonNode>> operations = new ArrayList<>();
+  private final List<ImmutableTriple<String, String, JsonNode>> operations;
 
   public CustomPropertiesPatchBuilder(T parentBuilder) {
     this.parent = parentBuilder;
+    if (parentBuilder != null) {
+      // If a parent builder is provided, we use the same path operations list.
+      this.operations = parentBuilder.getPathValues();
+    } else {
+      this.operations = new ArrayList<>();
+    }
   }
 
   /**
@@ -72,9 +78,4 @@ public CustomPropertiesPatchBuilder<T> setProperties(Map<String, String> propert
   public T getParent() {
     return parent;
   }
-
-  @Override
-  public List<ImmutableTriple<String, String, JsonNode>> getSubPaths() {
-    return operations;
-  }
 }
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataFlowInfoPatchBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataFlowInfoPatchBuilder.java
index 6a114d90875fe3..231956a2fcec81 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataFlowInfoPatchBuilder.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataFlowInfoPatchBuilder.java
@@ -4,12 +4,10 @@
 import static com.linkedin.metadata.Constants.DATA_FLOW_ENTITY_NAME;
 import static com.linkedin.metadata.Constants.DATA_FLOW_INFO_ASPECT_NAME;
 
-import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.linkedin.common.TimeStamp;
 import com.linkedin.metadata.aspect.patch.PatchOperationType;
 import com.linkedin.metadata.aspect.patch.builder.subtypesupport.CustomPropertiesPatchBuilderSupport;
-import java.util.List;
 import java.util.Map;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
@@ -87,28 +85,23 @@ public DataFlowInfoPatchBuilder setCreated(@Nullable TimeStamp created) {
   }
 
   public DataFlowInfoPatchBuilder setLastModified(@Nullable TimeStamp lastModified) {
+    ObjectNode lastModifiedNode = instance.objectNode();
     if (lastModified == null) {
       pathValues.add(
           ImmutableTriple.of(
               PatchOperationType.REMOVE.getValue(), BASE_PATH + LAST_MODIFIED_KEY, null));
+    } else {
+      lastModifiedNode.put(TIME_KEY, lastModified.getTime());
+      if (lastModified.getActor() != null) {
+        lastModifiedNode.put(ACTOR_KEY, lastModified.getActor().toString());
+      }
+      pathValues.add(
+          ImmutableTriple.of(
+              PatchOperationType.ADD.getValue(), BASE_PATH + LAST_MODIFIED_KEY, lastModifiedNode));
     }
-    ObjectNode lastModifiedNode = instance.objectNode();
-    lastModifiedNode.put(TIME_KEY, lastModified.getTime());
-    if (lastModified.getActor() != null) {
-      lastModifiedNode.put(ACTOR_KEY, lastModified.getActor().toString());
-    }
-    pathValues.add(
-        ImmutableTriple.of(
-            PatchOperationType.ADD.getValue(), BASE_PATH + LAST_MODIFIED_KEY, lastModifiedNode));
     return this;
   }
 
-  @Override
-  protected List<ImmutableTriple<String, String, JsonNode>> getPathValues() {
-    pathValues.addAll(customPropertiesPatchBuilder.getSubPaths());
-    return pathValues;
-  }
-
   @Override
   protected String getAspectName() {
     return DATA_FLOW_INFO_ASPECT_NAME;
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataJobInfoPatchBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataJobInfoPatchBuilder.java
index 99c0ac6c15eb1a..dd17fbacf338eb 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataJobInfoPatchBuilder.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DataJobInfoPatchBuilder.java
@@ -4,13 +4,11 @@
 import static com.linkedin.metadata.Constants.DATA_JOB_ENTITY_NAME;
 import static com.linkedin.metadata.Constants.DATA_JOB_INFO_ASPECT_NAME;
 
-import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.linkedin.common.TimeStamp;
 import com.linkedin.common.urn.DataFlowUrn;
 import com.linkedin.metadata.aspect.patch.PatchOperationType;
 import com.linkedin.metadata.aspect.patch.builder.subtypesupport.CustomPropertiesPatchBuilderSupport;
-import java.util.List;
 import java.util.Map;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
@@ -113,12 +111,6 @@ public DataJobInfoPatchBuilder setLastModified(@Nullable TimeStamp lastModified)
     return this;
   }
 
-  @Override
-  protected List<ImmutableTriple<String, String, JsonNode>> getPathValues() {
-    pathValues.addAll(customPropertiesPatchBuilder.getSubPaths());
-    return pathValues;
-  }
-
   @Override
   protected String getAspectName() {
     return DATA_JOB_INFO_ASPECT_NAME;
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DatasetPropertiesPatchBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DatasetPropertiesPatchBuilder.java
index 31e181fc244fba..60d52c7c720881 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DatasetPropertiesPatchBuilder.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/DatasetPropertiesPatchBuilder.java
@@ -4,10 +4,8 @@
 import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME;
 import static com.linkedin.metadata.Constants.DATASET_PROPERTIES_ASPECT_NAME;
 
-import com.fasterxml.jackson.databind.JsonNode;
 import com.linkedin.metadata.aspect.patch.PatchOperationType;
 import com.linkedin.metadata.aspect.patch.builder.subtypesupport.CustomPropertiesPatchBuilderSupport;
-import java.util.List;
 import java.util.Map;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
@@ -116,12 +114,6 @@ public DatasetPropertiesPatchBuilder setCustomProperties(Map<String, String> pro
     return this;
   }
 
-  @Override
-  protected List<ImmutableTriple<String, String, JsonNode>> getPathValues() {
-    pathValues.addAll(customPropertiesPatchBuilder.getSubPaths());
-    return pathValues;
-  }
-
   @Override
   protected String getAspectName() {
     return DATASET_PROPERTIES_ASPECT_NAME;
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/subtypesupport/IntermediatePatchBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/subtypesupport/IntermediatePatchBuilder.java
index d891a6b9673da0..cd74818c24e191 100644
--- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/subtypesupport/IntermediatePatchBuilder.java
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/subtypesupport/IntermediatePatchBuilder.java
@@ -1,9 +1,6 @@
 package com.linkedin.metadata.aspect.patch.builder.subtypesupport;
 
-import com.fasterxml.jackson.databind.JsonNode;
 import com.linkedin.metadata.aspect.patch.builder.AbstractMultiFieldPatchBuilder;
-import java.util.List;
-import org.apache.commons.lang3.tuple.ImmutableTriple;
 
 /**
  * Used for supporting intermediate subtypes when constructing a patch for an aspect that includes
@@ -15,10 +12,4 @@ public interface IntermediatePatchBuilder<T extends AbstractMultiFieldPatchBuild
 
   /** Convenience method to return parent patch builder in functional callstack */
   T getParent();
-
-  /**
-   * Exposes subpath values to parent patch builder in Op, Path, Value triples. Should usually only
-   * be called by the parent patch builder class when constructing the path values.
-   */
-  List<ImmutableTriple<String, String, JsonNode>> getSubPaths();
 }
diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/builder/DataFlowInfoPatchBuilderTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/builder/DataFlowInfoPatchBuilderTest.java
new file mode 100644
index 00000000000000..612282b7c0238c
--- /dev/null
+++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/builder/DataFlowInfoPatchBuilderTest.java
@@ -0,0 +1,280 @@
+package com.linkedin.metadata.aspect.patch.builder;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.linkedin.common.TimeStamp;
+import com.linkedin.common.urn.Urn;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.commons.lang3.tuple.ImmutableTriple;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class DataFlowInfoPatchBuilderTest {
+
+  private TestableDataFlowInfoPatchBuilder builder;
+  private static final String TEST_URN = "urn:li:dataFlow:(test,flow1,PROD)";
+
+  // Test helper class to expose protected method
+  private static class TestableDataFlowInfoPatchBuilder extends DataFlowInfoPatchBuilder {
+    public List<ImmutableTriple<String, String, JsonNode>> getTestPathValues() {
+      return getPathValues();
+    }
+  }
+
+  @BeforeMethod
+  public void setup() throws URISyntaxException {
+    builder = new TestableDataFlowInfoPatchBuilder();
+    builder.urn(Urn.createFromString(TEST_URN));
+  }
+
+  @Test
+  public void testBuildDoesNotAffectPathValues() throws URISyntaxException {
+    String testName = "testFlow";
+    String testDescription = "Test description";
+
+    builder.setName(testName).setDescription(testDescription).addCustomProperty("key1", "value1");
+
+    // First call build()
+    builder.build();
+
+    // Then verify we can still access pathValues and they're correct
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 3);
+
+    // Verify the operations are still intact
+    assertEquals(pathValues.get(0).getLeft(), "add");
+    assertEquals(pathValues.get(0).getMiddle(), "/name");
+    assertEquals(pathValues.get(0).getRight().asText(), testName);
+
+    assertEquals(pathValues.get(1).getLeft(), "add");
+    assertEquals(pathValues.get(1).getMiddle(), "/description");
+    assertEquals(pathValues.get(1).getRight().asText(), testDescription);
+
+    assertEquals(pathValues.get(2).getLeft(), "add");
+    assertTrue(pathValues.get(2).getMiddle().startsWith("/customProperties/"));
+    assertEquals(pathValues.get(2).getRight().asText(), "value1");
+
+    // Verify we can call build() again without issues
+    builder.build();
+
+    // And verify pathValues are still accessible and correct
+    pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 3);
+  }
+
+  @Test
+  public void testSetName() {
+    String testName = "testFlow";
+    builder.setName(testName);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "add");
+    assertEquals(operation.getMiddle(), "/name");
+    assertEquals(operation.getRight().asText(), testName);
+  }
+
+  @Test
+  public void testSetDescription() {
+    String testDescription = "Test description";
+    builder.setDescription(testDescription);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "add");
+    assertEquals(operation.getMiddle(), "/description");
+    assertEquals(operation.getRight().asText(), testDescription);
+  }
+
+  @Test
+  public void testSetDescriptionNull() {
+    builder.setDescription(null);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "remove");
+    assertEquals(operation.getMiddle(), "/description");
+    assertNull(operation.getRight());
+  }
+
+  @Test
+  public void testSetProject() {
+    String testProject = "testProject";
+    builder.setProject(testProject);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "add");
+    assertEquals(operation.getMiddle(), "/project");
+    assertEquals(operation.getRight().asText(), testProject);
+  }
+
+  @Test
+  public void testSetProjectNull() {
+    builder.setProject(null);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "remove");
+    assertEquals(operation.getMiddle(), "/project");
+    assertNull(operation.getRight());
+  }
+
+  @Test
+  public void testSetCreated() throws URISyntaxException {
+    long time = System.currentTimeMillis();
+    String actor = "urn:li:corpuser:testUser";
+    TimeStamp created = new TimeStamp();
+    created.setTime(time);
+    created.setActor(Urn.createFromString(actor));
+
+    builder.setCreated(created);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "add");
+    assertEquals(operation.getMiddle(), "/created");
+    JsonNode createdNode = operation.getRight();
+    assertTrue(createdNode.isObject());
+    assertEquals(createdNode.get("time").asLong(), time);
+    assertEquals(createdNode.get("actor").asText(), actor);
+  }
+
+  @Test
+  public void testSetCreatedNull() {
+    builder.setCreated(null);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "remove");
+    assertEquals(operation.getMiddle(), "/created");
+    assertNull(operation.getRight());
+  }
+
+  @Test
+  public void testSetLastModified() throws URISyntaxException {
+    long time = System.currentTimeMillis();
+    String actor = "urn:li:corpuser:testUser";
+    TimeStamp lastModified = new TimeStamp();
+    lastModified.setTime(time);
+    lastModified.setActor(Urn.createFromString(actor));
+
+    builder.setLastModified(lastModified);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "add");
+    assertEquals(operation.getMiddle(), "/lastModified");
+    JsonNode lastModifiedNode = operation.getRight();
+    assertTrue(lastModifiedNode.isObject());
+    assertEquals(lastModifiedNode.get("time").asLong(), time);
+    assertEquals(lastModifiedNode.get("actor").asText(), actor);
+  }
+
+  @Test
+  public void testSetLastModifiedNull() {
+    builder.setLastModified(null);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "remove");
+    assertEquals(operation.getMiddle(), "/lastModified");
+    assertNull(operation.getRight());
+  }
+
+  @Test
+  public void testAddCustomProperties() {
+    builder.addCustomProperty("key1", "value1").addCustomProperty("key2", "value2");
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 2);
+
+    pathValues.forEach(
+        operation -> {
+          assertEquals(operation.getLeft(), "add");
+          assertTrue(operation.getMiddle().startsWith("/customProperties/"));
+          assertTrue(operation.getRight().isTextual());
+        });
+  }
+
+  @Test
+  public void testRemoveCustomProperty() {
+    builder.removeCustomProperty("key1");
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "remove");
+    assertEquals(operation.getMiddle(), "/customProperties/key1");
+    assertNull(operation.getRight());
+  }
+
+  @Test
+  public void testSetCustomProperties() {
+    Map<String, String> properties = new HashMap<>();
+    properties.put("key1", "value1");
+    properties.put("key2", "value2");
+
+    builder.setCustomProperties(properties);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "add");
+    assertEquals(operation.getMiddle(), "/customProperties");
+    assertTrue(operation.getRight().isObject());
+  }
+}
diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/builder/DataJobInputOutputPatchBuilderTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/builder/DataJobInputOutputPatchBuilderTest.java
new file mode 100644
index 00000000000000..dc141863e24438
--- /dev/null
+++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/builder/DataJobInputOutputPatchBuilderTest.java
@@ -0,0 +1,237 @@
+package com.linkedin.metadata.aspect.patch.builder;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertThrows;
+import static org.testng.Assert.assertTrue;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.linkedin.common.Edge;
+import com.linkedin.common.urn.DataJobUrn;
+import com.linkedin.common.urn.DatasetUrn;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.metadata.graph.LineageDirection;
+import java.net.URISyntaxException;
+import java.util.List;
+import org.apache.commons.lang3.tuple.ImmutableTriple;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class DataJobInputOutputPatchBuilderTest {
+
+  private TestableDataJobInputOutputPatchBuilder builder;
+  private static final String TEST_DATAJOB_URN =
+      "urn:li:dataJob:(urn:li:dataFlow:(test,flow1,PROD),job1)";
+  private static final String TEST_DATASET_URN =
+      "urn:li:dataset:(urn:li:dataPlatform:hive,SampleTable,PROD)";
+  private static final String TEST_DATASET_FIELD_URN =
+      "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,SampleTable,PROD),id)";
+
+  // Test helper class to expose protected method
+  private static class TestableDataJobInputOutputPatchBuilder
+      extends DataJobInputOutputPatchBuilder {
+    public List<ImmutableTriple<String, String, JsonNode>> getTestPathValues() {
+      return getPathValues();
+    }
+  }
+
+  @BeforeMethod
+  public void setup() throws URISyntaxException {
+    builder = new TestableDataJobInputOutputPatchBuilder();
+    builder.urn(Urn.createFromString(TEST_DATAJOB_URN));
+  }
+
+  @Test
+  public void testBuildDoesNotAffectPathValues() throws URISyntaxException {
+    DataJobUrn dataJobUrn = DataJobUrn.createFromString(TEST_DATAJOB_URN);
+    DatasetUrn datasetUrn = DatasetUrn.createFromString(TEST_DATASET_URN);
+
+    builder
+        .addInputDatajobEdge(dataJobUrn)
+        .addInputDatasetEdge(datasetUrn)
+        .addOutputDatasetEdge(datasetUrn);
+
+    // First call build()
+    builder.build();
+
+    // Then verify we can still access pathValues and they're correct
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 3);
+
+    // Verify we can call build() again without issues
+    builder.build();
+
+    // And verify pathValues are still accessible and correct
+    pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 3);
+  }
+
+  @Test
+  public void testAddInputDatajobEdge() throws URISyntaxException {
+    DataJobUrn dataJobUrn = DataJobUrn.createFromString(TEST_DATAJOB_URN);
+    builder.addInputDatajobEdge(dataJobUrn);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "add");
+    assertTrue(operation.getMiddle().startsWith("/inputDatajobEdges/"));
+    assertTrue(operation.getRight().isObject());
+    assertEquals(operation.getRight().get("destinationUrn").asText(), dataJobUrn.toString());
+  }
+
+  @Test
+  public void testRemoveInputDatajobEdge() throws URISyntaxException {
+    DataJobUrn dataJobUrn = DataJobUrn.createFromString(TEST_DATAJOB_URN);
+    builder.removeInputDatajobEdge(dataJobUrn);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "remove");
+    assertTrue(operation.getMiddle().startsWith("/inputDatajobEdges/"));
+    assertNull(operation.getRight());
+  }
+
+  @Test
+  public void testAddInputDatasetEdge() throws URISyntaxException {
+    DatasetUrn datasetUrn = DatasetUrn.createFromString(TEST_DATASET_URN);
+    builder.addInputDatasetEdge(datasetUrn);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "add");
+    assertTrue(operation.getMiddle().startsWith("/inputDatasetEdges/"));
+    assertTrue(operation.getRight().isObject());
+    assertEquals(operation.getRight().get("destinationUrn").asText(), datasetUrn.toString());
+  }
+
+  @Test
+  public void testRemoveInputDatasetEdge() throws URISyntaxException {
+    DatasetUrn datasetUrn = DatasetUrn.createFromString(TEST_DATASET_URN);
+    builder.removeInputDatasetEdge(datasetUrn);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "remove");
+    assertTrue(operation.getMiddle().startsWith("/inputDatasetEdges/"));
+    assertNull(operation.getRight());
+  }
+
+  @Test
+  public void testAddOutputDatasetEdge() throws URISyntaxException {
+    DatasetUrn datasetUrn = DatasetUrn.createFromString(TEST_DATASET_URN);
+    builder.addOutputDatasetEdge(datasetUrn);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "add");
+    assertTrue(operation.getMiddle().startsWith("/outputDatasetEdges/"));
+    assertTrue(operation.getRight().isObject());
+    assertEquals(operation.getRight().get("destinationUrn").asText(), datasetUrn.toString());
+  }
+
+  @Test
+  public void testAddInputDatasetField() throws URISyntaxException {
+    Urn fieldUrn = Urn.createFromString(TEST_DATASET_FIELD_URN);
+    builder.addInputDatasetField(fieldUrn);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "add");
+    assertTrue(operation.getMiddle().startsWith("/inputDatasetFields/"));
+    assertTrue(operation.getRight().isTextual());
+    assertEquals(operation.getRight().asText(), fieldUrn.toString());
+  }
+
+  @Test
+  public void testRemoveInputDatasetField() throws URISyntaxException {
+    Urn fieldUrn = Urn.createFromString(TEST_DATASET_FIELD_URN);
+    builder.removeInputDatasetField(fieldUrn);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "remove");
+    assertTrue(operation.getMiddle().startsWith("/inputDatasetFields/"));
+    assertNull(operation.getRight());
+  }
+
+  @Test
+  public void testAddOutputDatasetField() throws URISyntaxException {
+    Urn fieldUrn = Urn.createFromString(TEST_DATASET_FIELD_URN);
+    builder.addOutputDatasetField(fieldUrn);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "add");
+    assertTrue(operation.getMiddle().startsWith("/outputDatasetFields/"));
+    assertTrue(operation.getRight().isTextual());
+    assertEquals(operation.getRight().asText(), fieldUrn.toString());
+  }
+
+  @Test
+  public void testAddEdgeWithDirection() throws URISyntaxException {
+    DatasetUrn datasetUrn = DatasetUrn.createFromString(TEST_DATASET_URN);
+    Edge edge = new Edge();
+    edge.setDestinationUrn(datasetUrn);
+
+    builder.addEdge(edge, LineageDirection.UPSTREAM);
+    builder.build();
+
+    List<ImmutableTriple<String, String, JsonNode>> pathValues = builder.getTestPathValues();
+    assertNotNull(pathValues);
+    assertEquals(pathValues.size(), 1);
+
+    ImmutableTriple<String, String, JsonNode> operation = pathValues.get(0);
+    assertEquals(operation.getLeft(), "add");
+    assertTrue(operation.getMiddle().startsWith("/inputDatasetEdges/"));
+    assertTrue(operation.getRight().isObject());
+    assertEquals(operation.getRight().get("destinationUrn").asText(), datasetUrn.toString());
+  }
+
+  @Test
+  public void testInvalidEntityTypeThrowsException() throws URISyntaxException {
+    Urn invalidUrn = Urn.createFromString("urn:li:glossaryTerm:invalid");
+    Edge edge = new Edge();
+    edge.setDestinationUrn(invalidUrn);
+
+    assertThrows(
+        IllegalArgumentException.class,
+        () -> {
+          builder.addEdge(edge, LineageDirection.UPSTREAM);
+        });
+  }
+}

From a31c88e622cf960ee99182553c10f21b525eea67 Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Mon, 2 Dec 2024 10:03:06 +0100
Subject: [PATCH 09/10] fix[ingest/build]: Disable preflight script as it is
 not needed anymore (#11989)

---
 metadata-ingestion/build.gradle               |  10 +-
 .../scripts/datahub_preflight.sh              | 108 ------------------
 2 files changed, 1 insertion(+), 117 deletions(-)
 delete mode 100755 metadata-ingestion/scripts/datahub_preflight.sh

diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle
index 4e3f1ca91766c2..4e03dd6e2faaf2 100644
--- a/metadata-ingestion/build.gradle
+++ b/metadata-ingestion/build.gradle
@@ -30,15 +30,7 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
     "touch ${sentinel_file}"
 }
 
-task runPreFlightScript(type: Exec, dependsOn: environmentSetup) {
-  def sentinel_file = ".preflight_sentinel"
-  outputs.file(sentinel_file)
-  commandLine 'bash', '-c',
-    "scripts/datahub_preflight.sh && " +
-    "touch ${sentinel_file}"
-}
-
-task installPackageOnly(type: Exec, dependsOn: runPreFlightScript) {
+task installPackageOnly(type: Exec, dependsOn: environmentSetup) {
   def sentinel_file = "${venv_name}/.build_install_package_only_sentinel"
   inputs.file file('setup.py')
   outputs.file(sentinel_file)
diff --git a/metadata-ingestion/scripts/datahub_preflight.sh b/metadata-ingestion/scripts/datahub_preflight.sh
deleted file mode 100755
index 9676964f4d49d1..00000000000000
--- a/metadata-ingestion/scripts/datahub_preflight.sh
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/bin/bash -e
-
-#From https://stackoverflow.com/questions/4023830/how-to-compare-two-strings-in-dot-separated-version-format-in-bash
-verlte() {
-  [  "$1" == "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
-}
-
-brew_install() {
-    package=${1}
-    required_version=${2}
-    printf '\n🔎 Checking if %s installed\n' "${package}"
-    version=$(brew list --version|grep "$1"|awk '{ print $2 }')
-
-    if [ -n "${version}" ]; then
-      if [ -n "$2" ] && ! verlte "${required_version}" "${version}"; then
-        printf '🔽 %s is installed but its version %s is lower than the required %s\n' "${package}" "${version}" "${required_version}. Updating version..."
-        brew update && brew upgrade "$1" && printf '✅ %s is installed\n' "${package}"
-      else
-        printf '✅ %s is already installed\n' "${package} with version ${version}"
-      fi
-    else
-        brew install "$1" && printf '✅ %s is installed\n' "${package}"
-    fi
-}
-
-arm64_darwin_preflight() {
-  printf "✨ Creating/activating Virtual Environment\n"
-  python3 -m venv venv
-  source venv/bin/activate
-
-  printf "🔎 Checking if Scipy installed\n"
-  if pip list | grep -F scipy; then
-  	printf "✅ Scipy already installed\n"
-  else
-  	printf "Scipy not installed\n"
-  	printf "⛅ Installing prerequisities for scipy"
-  	brew install openblas
-  	OPENBLAS="$(brew --prefix openblas)"
-  	export OPENBLAS
-  	##preinstall numpy and pythran from source
-  	pip3 uninstall -y numpy pythran
-  	pip3 install cython pybind11
-  	pip3 install --no-use-pep517 numpy
-  	pip3 install pythran
-  	pip3 install --no-use-pep517 scipy
-  fi
-
-  brew_install "openssl@1.1"
-  brew install "postgresql@14"
-
-  # postgresql installs libs in a strange way
-  # we first symlink /opt/postgresql@14 to /opt/postgresql
-  if [ ! -z $(brew --prefix)/opt/postgresql ]; then
-    printf "✨ Symlinking postgresql@14 to postgresql\n"
-    ln -sf $(brew --prefix postgresql@14) $(brew --prefix)/opt/postgresql
-  fi
-  # we then symlink all libs under /opt/postgresql@14/lib/postgresql@14 to /opt/postgresql@14/lib
-  if [ ! -z $(brew --prefix postgresql@14)/lib/postgresql@14 ]; then
-    printf "✨ Patching up libs in $(brew --prefix postgresql@14)/lib/postgresql@14)\n"
-    ln -sf $(brew --prefix postgresql@14)/lib/postgresql@14/* $(brew --prefix postgresql@14)/lib/
-  fi
-
-  printf "\e[38;2;0;255;0m✅ Done\e[38;2;255;255;255m\n"
-
-  printf "✨ Setting up environment variable:\n"
-  GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1
-  export GRPC_PYTHON_BUILD_SYSTEM_OPENSSL
-  GRPC_PYTHON_BUILD_SYSTEM_ZLIB=1
-  export GRPC_PYTHON_BUILD_SYSTEM_ZLIB
-  CPPFLAGS="-I$(brew --prefix openssl@1.1)/include"
-  export CPPFLAGS
-  LDFLAGS="-L$(brew --prefix openssl@1.1)/lib"
-  export LDFLAGS
-
-cat << EOF
-  export GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1
-  export GRPC_PYTHON_BUILD_SYSTEM_ZLIB=1
-  export CPPFLAGS="-I$(brew --prefix openssl@1.1)/include"
-  export LDFLAGS="-L$(brew --prefix openssl@1.1)/lib -L$(brew --prefix postgresql@14)/lib/postgresql@14"
-
-EOF
-
-  if pip list | grep -F confluent-kafka; then
-    printf "✅ confluent-kafka already installed\n"
-  else
-    pip3 install confluent-kafka
-  fi
-
-  printf "✨ Setting up prerequisities\n"
-  # none for now, since jq was removed
-
-  printf "\e[38;2;0;255;0m✅ Done\e[38;2;255;255;255m\n"
-}
-
-
-printf "🔎 Checking if current directory is metadata-ingestion folder\n"
-if [ "$(basename "$(pwd)")"	 != "metadata-ingestion" ]; then
-	printf "💥 You should run this script in Datahub\'s metadata-ingestion folder but your folder is %s\n" "$(pwd)"
-	exit 123
-fi
-printf '✅ Current folder is metadata-ingestion (%s) folder\n' "$(pwd)"
-if [[ $(uname -m) == 'arm64' && $(uname) == 'Darwin' ]]; then
-  printf "👟 Running preflight for m1 mac\n"
-  arm64_darwin_preflight
-fi
-
-
-printf "\n\e[38;2;0;255;0m✅ Preflight was successful\e[38;2;255;255;255m\n"

From dc87b51369030ace07f48d857dc02c1c64bc911d Mon Sep 17 00:00:00 2001
From: k-bartlett <keith_bartlett@ymail.com>
Date: Mon, 2 Dec 2024 04:23:28 -0500
Subject: [PATCH 10/10] feat(ingest): connector for Neo4j (#11526)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: kbartlett <keith.bartlett@fullsight.org>
Co-authored-by: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Co-authored-by: Jay Feldman <8128360+feldjay@users.noreply.github.com>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Co-authored-by: Shirshanka Das <shirshanka@apache.org>
Co-authored-by: deepgarg-visa <149145061+deepgarg-visa@users.noreply.github.com>
Co-authored-by: Felix Lüdin <13187726+Masterchen09@users.noreply.github.com>
---
 .../app/ingest/source/builder/constants.ts    |   4 +
 .../app/ingest/source/builder/sources.json    |   8 +
 datahub-web-react/src/images/neo4j.png        | Bin 0 -> 12968 bytes
 .../docs/sources/neo4j/neo4j.md               |  20 ++
 .../docs/sources/neo4j/neo4j_recipe.yml       |  12 +
 metadata-ingestion/setup.py                   |   3 +
 .../ingestion/source/common/subtypes.py       |   2 +
 .../ingestion/source/neo4j/__init__.py        |   0
 .../ingestion/source/neo4j/neo4j_source.py    | 331 ++++++++++++++++++
 .../tests/unit/test_neo4j_source.py           | 221 ++++++++++++
 .../bootstrap_mcps/data-platforms.yaml        |  11 +
 11 files changed, 612 insertions(+)
 create mode 100644 datahub-web-react/src/images/neo4j.png
 create mode 100644 metadata-ingestion/docs/sources/neo4j/neo4j.md
 create mode 100644 metadata-ingestion/docs/sources/neo4j/neo4j_recipe.yml
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/neo4j/__init__.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py
 create mode 100644 metadata-ingestion/tests/unit/test_neo4j_source.py

diff --git a/datahub-web-react/src/app/ingest/source/builder/constants.ts b/datahub-web-react/src/app/ingest/source/builder/constants.ts
index f892f0ed525d25..58525b3e88f975 100644
--- a/datahub-web-react/src/app/ingest/source/builder/constants.ts
+++ b/datahub-web-react/src/app/ingest/source/builder/constants.ts
@@ -38,6 +38,7 @@ import sigmaLogo from '../../../../images/sigmalogo.png';
 import sacLogo from '../../../../images/saclogo.svg';
 import cassandraLogo from '../../../../images/cassandralogo.png';
 import datahubLogo from '../../../../images/datahublogo.png';
+import neo4j from '../../../../images/neo4j.png';
 
 export const ATHENA = 'athena';
 export const ATHENA_URN = `urn:li:dataPlatform:${ATHENA}`;
@@ -137,6 +138,8 @@ export const DATAHUB_GC = 'datahub-gc';
 export const DATAHUB_LINEAGE_FILE = 'datahub-lineage-file';
 export const DATAHUB_BUSINESS_GLOSSARY = 'datahub-business-glossary';
 export const DATAHUB_URN = `urn:li:dataPlatform:${DATAHUB}`;
+export const NEO4J = 'neo4j';
+export const NEO4J_URN = `urn:li:dataPlatform:${NEO4J}`;
 
 export const PLATFORM_URN_TO_LOGO = {
     [ATHENA_URN]: athenaLogo,
@@ -180,6 +183,7 @@ export const PLATFORM_URN_TO_LOGO = {
     [SAC_URN]: sacLogo,
     [CASSANDRA_URN]: cassandraLogo,
     [DATAHUB_URN]: datahubLogo,
+    [NEO4J_URN]: neo4j,
 };
 
 export const SOURCE_TO_PLATFORM_URN = {
diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json
index 44b8a37f14655d..776b6703895c35 100644
--- a/datahub-web-react/src/app/ingest/source/builder/sources.json
+++ b/datahub-web-react/src/app/ingest/source/builder/sources.json
@@ -325,5 +325,13 @@
         "description": "Ingest databases and tables from any Iceberg catalog implementation",
         "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/iceberg",
         "recipe": "source:\n type: \"iceberg\"\n config:\n   env: dev\n   # each thread will open internet connections to fetch manifest files independently, \n   # this value needs to be adjusted with ulimit\n   processing_threads: 1 \n   # a single catalog definition with a form of a dictionary\n   catalog: \n     demo: # name of the catalog\n       type: \"rest\" # other types are available\n       uri: \"uri\"\n       s3.access-key-id: \"access-key\"\n       s3.secret-access-key: \"secret-access-key\"\n       s3.region: \"aws-region\"\n   profiling:\n     enabled: false\n"
+    },
+    {
+        "urn": "urn:li:dataPlatform:neo4j",
+        "name": "neo4j",
+        "displayName": "Neo4j",
+        "description": "Import Nodes and Relationships from Neo4j.",
+        "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/neo4j/",
+        "recipe": "source:\n    type: 'neo4j'\n    config:\n        uri: 'neo4j+ssc://host:7687'\n        username: 'neo4j'\n        password: 'password'\n        env: 'PROD'\n\nsink:\n  type: \"datahub-rest\"\n  config:\n    server: 'http://localhost:8080'"
     }
 ]
diff --git a/datahub-web-react/src/images/neo4j.png b/datahub-web-react/src/images/neo4j.png
new file mode 100644
index 0000000000000000000000000000000000000000..b03b2a4532b3ba0329fdaccbb60a3a57b111ed4f
GIT binary patch
literal 12968
zcmW+-1y~#177XqV1&TYxrMP>s;!xb7xNC5Cid%7aEACzhUfiWXad&(Bzx@(6Azyay
z-h0lRnK_$C<*zbm$RCkGAP}1D7fDrM{{%eM5TSv;eA%%Izz)hqRYn|Yd6dZ?cmr=D
zFCz);f$dSE1NaXzt(Am?vX!YF2;})QJJmy7eepy1RF{W${sfyVfp@JcJmw)uk(LU9
z1w6qRXaIEyg^~eNMY8Bq5G))j0h*MF6fO#s3KUp0C;&TH#kR~$dO^;|%j<k^?|wvR
zbH(!9<$f8w*~9{(W))#!B}s)QkEPo}0Jl$56&4&W`GH^rdqFOs^r|Y<cg%QY^2C-7
zMj`pXuy0be;ZW2D4uoL1*~6{(q|c1OtiWJ#FR{7lqG@&nu}b$5IR@{vvD9|e22l$K
z$MI?CO}yCYSQ=(5bEVv|IR#=zKGgFbpt^t59T%e9NMQaOL2!xV<j0+sM76U@xgdvE
z;w1H=Nt`uoYylNO8F>ng&L}%*K}mZl5v(uOi3=kBbcqu89t8Ju9{RFa?+@p~MO&mS
zbUUehuD!htBoFN5tjY%+=tQ2B(*L*du;$Q;P+{XE=)?JoHpkA&tdvwUcS}<<cQYIN
zdx7%1+qHw-+NzW{;Z2(*X7xG7s^MetB2;fW&b-AvS@Q!jh`$+%-2WUSYk>kpE#+%N
z_OK)T`LdH536{{kq=b);k5&&IBB?jMc`HKl7S`>V8~M<>36b#FrDh8)hi-4RuezCY
zrO3@m=`(ZsH|$Tk<4^ps=tThMg3arLu7r!llF7lssxoI!n#H(Pa{W(l!{2&G1ob)T
z!+{R9+$)`EEEkk2jC#P!8iFz|rKkydlzZOh^L{u6LSO-xxSg}pnFz|$z1p_PB~^6R
z1K%VUZ}KGeRU`iWU@na)4D=6J^*oSzAni2Fa4+-{2)qTm7Knie$|pyzf@b;wI}0NF
z8*poc@*5%4h-V3bK@6=7AvO>0Cx~S0b1R4?_;?GR0Q$a<J{>gG%jAi+Pl4DkZWaOk
zN!0oWw;7_gDC&2@zvz!*eag7F2sGk?Ul6Oqo5e$wk<^jq#bc*ira%3IcE$S`awcv%
z{rQIRhE&=JTOm}#h-3-ZqIa^6l^D4;1ZP{plldpYc@Siq|AAl)-7uhbK;nY_1vN(k
zBrf?qg5M;D4O5F66e*33DcpedNp$Q7e-YfFxOal?Z+ed4P6=b+<Rs=X4KiN&{mM-0
zwANG=h#HanQPwiIdG`6k%8g}8-#K&$?IM~bz=c#Zk^hKYh1*fr1MDRX#WC{frfb;=
zP|*sb>-#*73)n`}-O?$?p%fVR1uZZ+<1z+J>XE-mRKakAAcM$tk4wlW>?eFDiYKsl
z%yAKF1KL}Hjx)TZEcjYsHvQQ{>w}6r`<JSocAX@`psC^7KK$L|OZ^9^7g)a>a>Z#H
zUQBVM(GcbkXK|Em>}{xRnuITN<SW?05qdioCVZE?m&i^r4{_8I9OP|@CSSy7S!S_j
zy=L`i<vB8{B-|-wlHmU8|CPRyx}v($MT!cE3~7PYHA#|8k?a^$j66v6l(mz!9Mwp)
zp@C2<$)SIl{IXr_TBKK;u1Zu~QS?<d_REB<TG2!?OwpgB=%TEmS~;arBXd`?HS;A7
zFOG0?ma!N$%qZHSl>P$S;yE=I71}a_3YPh-Oa<m5KZ(=Q$+5BDI%8pwn$+q<Au0hm
ziSPWM9dsC~)7^u(6~L^elm|>B39FwLDLvHAVmlK%lLTeDMY}cpzPxoo>-XtXNEyeA
zhxbcJB^Lym{4j~z5gU?^;*?XT0n?<a2F@ZCgU9L`0-fuw`1c0Je8*fEDj0GY2pRAg
z@HH&dtt!4&j8+s^SZb8c(_2I4TCIz%59V@a%?~GU)V=w-!i6hqs?Lg8)N3`)wQ?7y
z=Mn$>p8r1YStdJe_MP^7s<pe#q;<K~qfPaq^+J1T?4reYI;-==vw6S5!b+U{M*UI=
zOF1orMO}|-`^GaEp@y77gA#-64Ljc$VF?kb_kcGvn5Et^^3l98qh3tzU)+B`lClwh
z7AIQ%co}mPBON0<JUP5PJTYua`x!?hNiPXK>7YcXWTphH09F*q(9S5%oX=Y4Nn^if
zpW-^_c%Sd@4Ki0TcLF1MMUEsY%JZjn)_wl<_Z#W2PE)D+tCkh}dAr-0mElZ>brT`G
zLUSBTH9OA!JPv3=sTjuQ@4EP_ip`~Vo6Fj(NGD&HUQHZqb4@nQM4Hq+K3Q75&N?BW
zA(Ti|5B%eJYG^eNPmSfT5a1v=CMFZG;;+yC_>(sKHajUho=?Ps-DSJuS6io3lgs*+
z_Xe8NwJW3Ju}7K{hD)uh|KI%Wgnu(9?8jDnZIjoX_$!b_oA||umM9)b#|(I3WY{uT
zDV}wdXRu}VzCJgQY_k?Zc6WcddHinFcIUit6$irHBB|rqGS0I%ve>#8OV8`gtH`74
z+wVIL220RW)Ke%@)Jo`!M@yWCABG|gv=7h>nD!G5Y)ZCDB}>LgXG+^gbdnYm48<PB
z3`>X0@JLQoI~vwI3`xdlDEN(E$gQVTY8mJkw=%fRXM7v|W^ppg>#8TIr?!S*0cwon
z!#^i6BoTo)o1a<=nF`d)FaIG%_=;r7(MOR_ZOkx3;pDL6nrN!3J&U@SqC4uFaFU{z
z^g>HRzV~HJ+*;Dw<|7L;%PFo>SX%$Ftd^o-ws5*}dO|LHrd3)St4r!R#Nl^PgQkTW
za#=xcxp01tW0oTmSleCWZuRHI&!oy?t!vHuYW8o9E)#!s;+qU>C8|xUj}2<O{UA^E
zXp=<NJmN&VJnjy&YsoEECXEOCg}R8ks?9GyY*KX-pRH{M7it7kIw!n#R|Q+R4&?t{
zU&}PFI@B%)uJo<X^tg;pMrG$BFTpJ$Pas2(MR4n*YJAP#G#<53I<Vbu@pbXZ)?$|@
zR)so8oTg^54-=bccEPXr&1tjpC~}6U(d!;f;|*07BE~(A@88=Dy(~jDht8rfQe;yk
zG*~pM=f~y+DmRIZ$XdOo4~^tw<rIx4o+Z{KJgkLmR*zke#?VdCEy?^;$qY81xpt3-
z;fKQd!{TK;Wu)Wb$xf|)S=rjE+B4XPWra@s88L}le;7W^n*6Yvx}vFLv+d-l=lOEj
z;<SmLdm%LAo9NWACvbXRu}W_TcdGoHdAA%_6gM+jHrb~pT-x4o(y`Lb^Z54L>h_O@
zrP&m2F3rygfeuIe)3&y2!UIbdY6F7vxYKTb_N$%KoV=Vj`R=qDy}MSQ`o*^UHN0@f
z(%Kb$A-7nk>fQF++dJXP?q;$ozwgH_XAWd_g#S8?!G$)Chg-{8aXhSR&K+)#&G)Cr
z5|D8C@PV-Nn8BEs4dcy>_AKxFp{^6~^*xV>s0h6${cYVz!c}2A*{#1VNfW6M=RWts
zJ@4PkSEq%C#@q(KC$@z9ke4D`Rl5eKF_WX4Un4iMrHnaW1tFf?B1!L!H&gRdw>kdz
z(SKNd8xtCFSRYtF_%VBzUtIlWglMh11iHi<ly>F#t6gYrpU!y1cyygpZ75A1`1JbG
zzn8t&A19rcBA{5Ip!7i9hg?6?uS__DK=h=tl49zfE2q9*KDa~8@8xGXquq_L*r}N0
z4fOPqEU=W%lo0_}(e%y<6C)a41wmSHDd8wl5yE7iT})bVF#?WKtn_1)-w`oUqp6u>
z1FERasFKVq+AID1e=5vmoE=gp4Q*LYhGXOwsp-2_9O;&R&2~BZY#b99F6fGbZ5o3q
zPG}kv8I;GIN3V{FNC7Jk9dILaeL{iJCc*WsQ~?ta)A_`9cry4?IpMPT{DQJ!E|d_z
zQk#v@=D#nXyci_(L7aHk`)NfP>#8)p7AAv^+uikW?ItuyC`71X(R>S$T$$21&+ALV
z4U0c$gW#>R{Hz3ebC9y00lV?#ktyA5$TYbac`pGBIY>03Kv`3d`=};RVAFGU6(Wg<
z1qvDFKBmBEVq&K@3hC|S;U;@#USDHLW-s|2B~GYOCZ0jb2r{xKyW0&kReC$8D<EeZ
zPDeN+Z6}n?SHKNfmo0Ta6QT@-Tefotfy95lwXB+dV)ERb^d0T7j0h4XyzX6>;32~T
zfgZl!Qvig%X3lAt01>w8t{RNa3EB_r2)CEUmNH38!NIQykS#!`DojPBh~Z$9mHER8
zWg0~8iJCzA1oL}TbO*V%os|P*q^zRtwyp|S0CHQ=fmPL0DOYBQEC2>%O`$5$x&I4E
z^b50%=Xxt$2%9wG<sApuD<iPsBc#xHz|X)!u-ZGB5Q9s=$&qsySIRgBDTq9(fIcdS
z98;XWII5sbzMWDJEm@U})kVQfz$6XMWR8sj_Czk1g#?iTxtR3uEPtQI6^UY+Miv_u
z3WSV;5*-y)QCa!=`nuZUKt@0i5sD{;1sjOsr{z`<YyNbxB=j5?NX{nZuAL6TOc&;8
z-sg$W8kLxkAwH&`sWs@zOiyohIhfq^fA6ZR`=M&hoi#8paNfEbXZ5E<F^hX*Y6=6Z
z7TtAb0SOc%)->UM5cJNM<Tr}@&$2~;Du>;ITedmzssnzr`98ZiC~4&{1qK&0pWZ7j
z%~QMEv4QXHd}pVi)*krw=yr^;Rw0W!R&}iJP|lVyIV$K!Vh-0;{o+`SRwnc-eQ^v2
z2m~F1DTaAcyhz3Wi|^tMWu3oy;CA8Tw!wkH9fEV{T&9-|&52VLOu&Ri7N_M`6B9~M
zM8R+3!M35>fTQQeDd7(5c?OnUGzOVLGNCN?`xLa&NsEAGSXv}RsIgv_V0}`b5$3Ek
z5pVf4^wz&`59t|U%^M~-<>uo{-qZbvHhU*dshCZ`g|D``lv2xjes$9zy{x7m<D3#~
z^uD$BS;V{MiqI;fM^3dq4(~ws>v~^W{#iM%nN{1OwK=(sR0u!ulWmSVvX$ZOcf^;C
zGVgp-hk0Mq7!|p;I(&8ZiHXnt*2Bsi#vLpB^u<Te6BA2{eK|O}p4`4?Z3mjwI_4Z1
zC+=Y1jTCCx)V;yMK?G7yQ59SjTr{0+4$|CY4RxK0s<-ZqKtEw0VpxQQ{LuJ{uZ=W_
zL3xp5TDVdwxZ)9`X0r?AkOuF!OzW#7vvG??4NXmxV~fV+#l=OdkC~D6-VK&bI*g2r
zPHVu2xw*M&ffnXU5M~O=RCC!x_md@dw<C4nlq%&I6#P__lwY&?@o;g~sZ#WFwV{Bq
zXy+G*Qp-vzRp_e_m>1B0Ni153)kQq(luhPOR5t}pjE?eoUHx_Fc}>m9$w^95)6>&a
zQK5h|kRxtvY>0`8ovyaTM@1b0vHJe@3cf$H<IWP}I#%TH(u1gTre<YjB_=w*JX`|d
z_<FSm(W*6Q`o=`hN+XL5Y#12-eRZ?P-`4iTu4R4y-~b`+G^;jbxNvN~)!fpua+#5d
zsj8|<@=k$PO<7qv9G!$0+_K?aSzGJiQg>j@R-Y0R6SK6ml$eM;Euin1U#XAOD!5Rs
zd*9OYs+el7g1c@pe^|GQ?dZ|a2`y@Ge|859*J0D|=HJZK)zy<aC8D^3!gxBnX^B#<
zu<Ji%Q15bed`9zI3?3fd=~~-0gtbROPL6?@x!O`_z1@vS$lFz1ygveqs!m+)Q_kp`
z={O!34h~Mh{p8^a!WvJ^=luF~>%D+OLLy`_o*ohs(s9;aiL}gVM6fP>d3h-lM~npv
zWm)k?9YtCESy<Q)n6fpw-3mVKRFSubU7N)kCu&dtIbu|Sq_ni~MK8=lQ3?FQc*qz}
z1$|6CFcCx1_$_v;shrjBbxyY=S|aaBN&A@i`1eEjhDNXM9s27(h+?)ev}*LcfbipY
zKPiI{%wWD(X4VUwcV6$tiwx(ioVr)F<;QF@KBO{19;i7vuMbOe?Kiu9O_zv1f1d{C
zj%X@}eU+CgUg(YhdJ(A*u_G@pkIQDk=X!$YdVk^%3*QUZ=g&7s^OZ+@v)O$|M@JD6
zi0kHmY7IqhjW~ug@z1`xG`pX!OiwF!2f^hVQ=o=m-ga|)-~3CWQ)*{->3rv9ckl4M
zJ({;$`L)f)!4^0lUQZzrMl9(0gD*r+f?{tlXWrWPv}sXXTs+fyEew^=d8-$yWtlU_
z=C2ndmAU=fNO;JP^X})kp7b9-wkjOu<dBQiQ_W3V?@w2Oq)8eb3@u!?{UF6IES%Fs
zur~i~3IfUcY?atPfR@Qdw`0lW@Bbc4#ACvED=EktfOGwP-ZL^X!b4%oYZvn4AlIMt
zIqrSY|I^O<Zv$)Vqdc+TOrJGGiYejestF4ldhl(=v0CZZ$LHtcC{*H~X$Ia%#ya}?
z%#bT!jtfHtx}#MKzqW2VLztHY+?1e<{!QmI!+N;APh|_><KV!65~;cJOMWB#+e<UF
zU?4Q~XY&OnwsrM0v#C$XNIaS0w{PG4zwp)HKOW?ovdQiS)<8D?%@m>_BY%wdtobn3
z0A!!jadiuSppuZG=}4RgE*u@oyJ4dRTI-K9w4e^}2p!zB&iDUEi~V{#MUkE&=&X4~
zR?66_&SN+FYXna^caw9Q*R@5}a&LnT5c|mWJwVVxCMRp?isnvyFZvLylRx8teV=Yd
z$H#4{cjj%UGPzLnDo?PmumBfEFS1|j4b^d^$-e%ZiK!`CT@4onO3>EcUNKiY(Kx^6
z+jcnFN2M&CwwJ$2N}{_#>=vk?&d$zhY#mEkx<Pz={Kazm%H`EgFHT;QzT;6+ib0_}
z6BCoG>+3d8=kvjM+n)Eg=Pey`d|2Q>ep{iB$X?H<ExM>@ie#X1VhdXp+%y(_AevNe
zk`5;cGs_AJ3uR&m*uPKb<p_bp!oo5YXywX>Oxk=Noq^Ts3r6sN+{4eBDDjh0{Ab|x
zayf#4_7N8cr|>w+6dk9QTDIrq3PKex7*tqTt@zq0pT+{@smF%b0dvdRY>mDED=RCg
ztfFFUYD!67US3X)A3%w(U(tC_@~fIP@b#I?%gZr@Zl-xQeFpRp1k0D7FGt7@rgCd_
z^KroX9qt;9mfTr{rWzU=ZEbCqQ#nFD4;R}5;V395`msa}X*oH<z&xM#yl<Fs5ITON
zTa}3?^}QOUb0ZQLcp6J#1m4ERHi$5|7V{MeL@m8O-v>u)Kt}$9j*&Q$579%d{Xi@V
zT@K<zaO-P_R>u%>hLQbkx&p=KOG!!Tx4SxNVEFUeueFj0xF@Hj5pim&s$Kz+g+V5~
zcId{HkLi0kgg+mosEgN-mX)=dFM+UY58eB4x$AMZ=JopIW_nj+(A91{5cc-^yg0gn
zqa@gOl#gZ|%GjW<r?=Jx_He9jQ2NO`Q~o9*+0MhhR&PCL29H5nwd!vMfk2MeJvR4P
z0mSK8XMr>@L3o22n06)B%m{(FB_ZN<%&W}r3sa78Hto!0)C;LemVc|$ZLtRc`rw?B
z=xfYnc;`1DGR(}1#3l;pY3b-VQ?xbHx}MKErqa94);rFZ>I=nAmTj9oHhp=>$$LHV
zPL~^H(u8)~hVyf{?J`+bm-GPM{9k09?<v7~==E*--e*HC0L<$CY#m}5y&x4;kT-ww
zbbAa8Xudk*9y5GiR&JZ=?dFftuu&$PF?2ik(I~+kz9l;y9l}s&rFMLDv;>ni4Shq4
z%_58G9j#)fy1M#Yp3^RTB@W%!Jm2y8>D#k)A{d7ws;zm3=PZYAtIkOPmPS60REQw0
z)oXn>U(EIt!vyegh@erY`j$P_#ofuZ#;_-@qw!3KGK89jrhX;W^QWhj%FfOX>OOcZ
z`P16vAeNe?e}W^IriKOpT$6c`wLNc{^BX>M$B%cb4hV}+AZbMIWT6+bWgv~u{*=ax
z;Q%ou{O2ULreY{^qs1Y&@(<fW8$hK>N{7~O91n)W$Hx_7WM{LlEOWfV*a0?i#AkWb
zy$#A1^qK|0Fa_WAa%ZTjxf0L+?d|y-Er8dk7laD!GC*CL^ENv>+wJ>oP1J7~j!L-m
z>(TKJU>U}}26}ow=dGpmi^Wd(kfKrWS$^5AYT8`)Y5te28XD$;ta|39$@_bI`7oRF
zdI~v$8Ew4SFN9lhB+myT??@~!W(f%i=H}*Q-?<T0tGIXYSq-Y}paqsomB?;&D%6;1
zLmr-<fPBg1>VK))KLtP$7`D-MKEnvFr31h)+JmJLk&uzwJugjmmn8__Zx;=Tsw#nc
zf9+M}QlL`rTHNV&y3$lvH!W-KQ4z8}=Q#4%%l5exeXF*%^9q^sv+~?BEV`kE1?}*f
zwQOP$r{&~@aew{f<m7N;W(}rJyQ`IHt7?3=SOD(Z6M$_TDZLVREPAb5r>Cb;T|66}
zg|bV#h*^SO4UBgHotQ+$8fMh)_Tg*gCW<I{xSkT>T$I0=0(r=l&sz^sCAbG)-`xp&
zoDomfga4GIF5GW`YZ;+>>{hv10x#a<g`d7K&U(5UeEk}yGx8mEdpIQ_Az$O*ArcdV
znV3I$_V#!&IQq2?$o{>(y`NpulUU+(e0<u+E<=W^&353^6|NHe@G^rbx15T_D(wN(
zw0~sK_q6hrA@f+`&d$y(hTW)5R8RGfey>mYZVRV~m_Asm?Lcbj<MH}FxmM@-iBHoO
zR@QB}ObG$lp<a4xg_<AD%)lVmF{362MJ^=t`to@Fvcgfp2r!qzX4c;u;~AVV(9mT^
z{PL#zh+SIs_4SBwZrQ(mjE%SJO-Jfjzn(kCt7>X4oGdqb;m?JT76WHHFE7pM&myb}
ztf{mpE*Yl`Gj-h?OR1<}&>y`_>v@4A#-AkOgnh8LwPljNv_Z)$i5`+vNjB%Bp@IAC
zVSTYk$Y}uz7&XJrjF@%`p673h`2Iy!cH0wMYR0;$q@*Nhcqn>f=aX6q+elNS^iCKU
zfS&tJf05R2@1vumc^>bGUd^kgz(O-IFw7db4j)Fu#ck9X!_-VIz_=Ebpi=fe{+^uN
zFWgnn^Els7oW+zI1n6^UXei!?4;8EZWB{~y6<~{D0#ueoza2L7lcZ<$xB8t4o=sbE
zijdeHJE{tggm4F8pTQcvww*yz4l+EvJ%-*o_Cf^EKp2{fTj<Tr;U8r%4colNF~IG}
zgnhm}v!wE5C;)4Sf`rtk9H&XMB8Gpaz?zf)reN6NuBc0Te6rToD7K^~6bVD~`7@AY
z&0ig50E)Wj4QI;4!-HRoc;t>2sx7Z7Ee-GTSHUa>Akw<knxzJLMJE6>0GrcnN*J~#
z9tp{MzC`%RiEVY=@9}YUgei7l6adXR$@bWo(Wm=;vC?D%6w5&vQysj8KBTkG)v{$6
zC?ilvgy3@`#q&4mdvqFPKEIT5*)N!|cA>w6hfEU8Ge!gfc@YqehLEZ5cyy-&F%T1j
z+KE&hTmGXl-tKW;qftmCp+|Rle}4{K2FqHleut*MK9E8aN>ucVe`aStFnu%1YoMd1
zX43gphF5X&liE!LN;F>q8&1X50vCq}b7J1UG#@tL=*Zf!o&{BWgGlao9rs34n69>_
z%tlH|N}x^aA9w>|d%=8>=w01=bY66$4)@c{U?^K5CMMJyi;9MZS(AECFRy&fuw6B6
z?Z>TPjMTI=8&lGHDt2~uGM@`5t>rAoNNx&QWdVm`YUEUg<G19-PY0}chP9}7_lIQb
zRu3dyK`*|a%t3Cs89Jy!@Pc(K9w+Rct+$9osEtl0N-Zee-Q5$(q%a^?=2uWavpPDG
zTsgID^ao~sr=1YSlywNi=6InRkWbZW<_FaED||X4FLztVWN*)FdxAgkMD3(;JEVW<
za=7elo)JxWwD6nzR8pF{71I!E(xH@LfN!Uy=ajj7n~Zi!eVC|ZXH%JVx$RcWsv0ne
zdF$qZS+gz@C|1A?m9lB+zQ5$zI%a-)#!@Y8=Y-b|rHQY8Ihgr19_ycr0-7i}=Ei?~
zcnCQ5NoH<4rPO2dB_4roIFNi;Y-niMH^P9@S5e7d_3gxyA|3?;)<lliJ|Rqs@BVlO
z0P(gH3<7kKyjOBHEA7W!7#^7`Vaj9lEG8qCqOgs!wnSIkmf)3exaz0c_wYSkuaO_h
za`}lB(L_n3xF0{Rm_*2E|K@T&`wfdP4E5`Xd!$ZT&c(&WEgZ7Bxj8iz&)2{TXoN#R
zYUs?w`5w+!E&>k3ob0l$1@$^rq$&gAUvc-oC`Ez6AEKeEavHq*9i*lvSzGhEJ-$YN
z^ZbPP8IzH#NsqUB)d0TXmhyG@o2RFzvvWgyyi#<v!7zbzpi2{*pr9V302YsxmDO}n
zGgVC!AVQCyC>>kbbXmRs0m;D09CsO6TYKJYw+amd^Qgt6(*~PNZ=Oi)^RPZOZ;q$>
z{Ric-&wVi9$R$atlsDL~xm~>Hc&Do%S9h5HbP@jooExAtxmj72T<ri>1EdhGuva%d
zood+<V6s8M!2qlLb&@UY+n$$4E-3g@hWK;0{x3fOzJRC{*d{nWUv8u`RZzigT=qGf
zEpD`&vX0Gf72fnZ$X<2m>AI~17908+BS#Q}Hj>@i5oDemxa{+A<8}0feXZi28DTYf
z7C7*F8CS9aAkX*mbtV)Q@K}eGLX3@#g}`^Pu~cf9Aq@>2F^skhV0`XCug>0~?!Xp>
zAES^oRxK?p8&fp;Vi*MUpCbOkJVT-|RB@g>v4omwuHK7i*sy_1=52XJ#FBljq+<l>
ziU%zCW_x(^7slzC2DnnPiAM^avuhz2{a}Um?hV%NhoI6F8+&NHtse;LFl?D$c&5Jt
zitc4y+Io0Afv>HzleloBNrk_F9{uCz2(U)AW@Y8d1Jdg07uC0dzO_$HNG-H2VytY#
zu#^!6pt#xx!kAR*$P}8rO}bp^TWS2khiQw(?Rg^7_05M1sC&fKp$Ey*grWLpfD8O5
z1J)2eBAGkVH^IB0Hjx-SG_rudzia?3*4moEqpv;Iv*bzjzf1z^<J>2gK8i!Y<eKmB
za3EzOBqm1nu9xPu&-H(6(*dRuP&AS%b6oGcp75}+UlPr@vuF@5Ipe|K66s9~=q1Bj
zWaS~0!-i%<_1yAAJwm+i8{2~kOj$fJJphNxnAitAi?Xt^tu3RD^_;9M@YBuV`1rWD
zUr)Xj65Z{jz<Eq$q^y#XhchxIw1MBF`S`=`Twxftimw+WV9uK511Y!dH-Hx>$&)eu
zT0Ld$ynx;~v%{_e)K$;h({X#L5-45Ji(?;dC!Y(*pqXJ2j$t$HuV21=>+$pb@+ATs
zvt9mQ{m#=q`nDZ~#|Vg;-ku?Hh%;b@B+|&Qk%oWdc`j;w>$oEg^!8McGR)mAKWIJ?
z7JNQ4=I*fNUjLF1%;*3>fXn_kkd|prLt|rOMMXtGajaiqUtpGqDgf|x;wPWW<JI2R
zE(zF^^$rifgad5Jrqxq>GyI?xs~12FWo2idQVTp{4h;<%zfn0xxJ#)(LqlU@V=q)`
z@8a?WpZEgOFN?<^hto1{!a}WQm3*U)B5CvnU|$8_7th=*8Xxn+Nq2VCRFdoJ>K+~*
z0A9SJQFR*#gOF(!=Zf`m?*G1eIC|Q$+3k9$x@NFu1@E!c5H(hIs5Gz@UaWGxUBhSg
zu%`2VmMKq+5sGykV%#V(EbPrdmBnt+w3^NGu@JLi`4q4N=dA&@u9+2UD4ieM9Y3J%
z(F=GvNKI3^yvW%C9yL`}W)<MK<k^#xlips@Q{jljfvKr;An45P*9l82vho0U5#(!1
z1vvdl9G2RBsA^_T&e`c{wao_{*uIewgsG}qh0Qz*jKH{)YJ;wiH#O+nN#$y#93!U`
zN3F^o4x7S`POFIOkM!%bFk2OjBg6{>;TT$suT}Xi8OkUaH`mK`f3dJcn^zQQW!HtF
zFr}cDP3RrQ_Z#Gng_LUfoc9ZsFG<YgG76NB>L}8wM<KQxS&9IEw&lPIN+5`x6<()J
zRsaM(fa!Dgth|^XqRp@V0vu_SHVH`vM|}abZvB``VA{&8*XC>vXc8J48eYAx9>Rze
z0C$dBum}%o2Gnh%#YF$B{;HgY#(dZP#xuZqvnDKbo|BH+Ps-mzG9yRLbbwL}M40Q|
zXLJbg@p;jNnp`-bM!|QsYlOehtdx?_XkY}(7!MA5hkt_LhSTX0qr_!~?jB9%{lgzU
zm!lzNKD+%`DR^z2;w)WXD=XmS;D-Mqqf|Fr=uZ;8ZaN6wza3bmIg`kXW5NZhx}V9t
ziFll3-HMRK=yg7w72Bk}oYRSr2vEr;(j&|5PU@avxk%nI>$iWVq!br0`<7Q%=LndC
z0R0-E!rGMRZHh#Lkf$dmCCS8oOi4`*^E9b|VFawzJ@8qZB_QRajs*T~O<OA08+7}4
z0h#4qIt&d&^X_o$YO52KoPg;auO>(|u_!7kN-Fw;(CwVspcE6R*md$J9{{OE$x(5*
zw|d21=Uw+<=p@!Ht6AKfo)EQI#c*D6B$j<WUmHk`KV9~CmnJd?oC+ca+DC7j_=h*4
zXK4>)lp9kJ+|ZtEDXWj$*3#%7{6O~e>0V)ds)GIjpYalGc@0g2na7{dHrj9jRGrfK
z<dLTE>rWe;{;`L|HE)x}V1-3ng&;QTxw2fpC)w&SN^gFTv!B2<cB{S-;~m#_KOF(m
zZ!y+==lBA6;lCY<?`bX7v=2fjE<4x7{qUhix8*FBXQQm9X5nb?+s5MJ{a_@{%*+h2
z($SgY9k=aLolzh3I(5U(F4-hnX5Ho(|9w$N4*ygF69g!ThV>>x8Jw1`*C{&#5fKra
ze$Qy<7-gq9F9Eumns<vmZ*B*Z*;}pTCUxgruH46JsHMOGjS(<N|6&JXnl$M=-=F8E
zrj~ptTq*}xYymvtkprjGo^+~B1AA(+7O_xP{r$f~Dod6RDMzJn)y*p#kFG6K7j0}e
zwQLW@1iUct;{k!3Mi&E)i^NuceSnA4UqLJ2F$cWxAz&+9e+V7<!%1k`Ky_LOo>N}W
zo}VPBcia|t#PXT4TruNN%-}EwC|vrRUK-G|@qgG3r_1pS^5>BCfHIZH;<hsh)28PI
zr#BChOYy#mIPDAqk`9qo+G^CgDTm+fsJdn2BbyN@Jlfv>@)0mI+?IDMMBdu{UcJW`
zHm^cVqmNow+riAWTokamg@idt;-5@oK;`x0oS!L>LVm2v(v`lx>I+Z|ZWo)xsGJ2l
zGkpU6P)O`r!(MONorz_Idweu8KSO#5lk;dQ$4wh6l1=h^*hbLGXq|5W$W;%(1OPu9
z1p{M5feY}u0ZEg_YS`_0Nm-KQ?k%ObGzgf*ZyVrq<8kuVGp@USlX}2o+X0LrH(F!_
zkcdbRsgSqh9epu?1Awsw7+yWVcQt%}Dr-fU;Mgow^$kL`*MsziO*KA5?*TOB70`OB
zYHHV4R~OvztCEO)wG02=G)UaTSR8K;9`d$l*O$|Et|ems;06Q)ifytmS|r_Uq=E6P
zSD4@*HzNw?sD{%=sh6YW4E;=o<z0Y2B4s=HeO9RT+KMU!aJJ&fM1((m8pSK>HP+&~
z+rSwi5rkoz0E~TLiOL2r7fTG9_J3q$0Y?22Aoo5e3I%igZY!Xl5_x~AtnIe$#PvGI
z9id~rJOJ7P!w;%v+ILr1Iz4``(X8ENjI7PJ%N&kdqFEDRt)<CN&(9vGD;n$3@O@ni
zabAU$yfl*6o$l|7iDmRRGvM&|%|-B({uoZ@IJ`FjMF>OkgSUSJ&*3>IU8W+o=A22s
z#b82ufa*tF+?(9@7di@xZNPx`pm}-tcWe;7zuHx!Ji>3T4VamC!?8qSG-Z0#j*~eB
z^neOq1GwK@xmrA_P}ITiu!fNo%=KnW-Kc^p0F;*-%wN>HYD(fED0(6<u(n3d)gs&X
zrLHoC04|r6@y2$pQs6z2lIh>Kmb`~x8ugCTTHW*tE8F3~N|Z8z%wsZ&Vmb@PJ2P*f
zm~9i)sf2lXJr`^AfqEcm*4n@rjG~*x7#X+1_U1Meh4=on*y9K~#vzM7q4)swl<v;f
zp8*wBu(76mhZ+!livSPrcQYdcP`d#tj&jHVVnr|-hLJm~dKgaF-eHaG-=6mNOzVRC
zk9eGa5dVQpxx9VMR)2U8%V3L}WxeWHNkL4rIeW2KWoH3zPUQ9G&t$g1ZXOS9CR##B
ze=Q)z!FMNKfO9$XPy79wBi9ZL3=Gf~xr_IIp#u6vmnZoU_+=t?lR>~~wVcQt3Whrx
zCH*dGOf|}zkJ!S&hh(Uw+IlAD=y3A0Y)w^TDiJjxqM$ZjD=FOp&BqPi!D$c%R1(b#
z#0pTVE-cu;cK&<#*u)Mqc<F1sb1@5efA)Y&AuB5j*oJN}3VGs!a@{gyzBfR6pnPDu
zxVw|S3uzbrQf7`poFxjZ7PtM{!yhG)+tXG2sS-5vsdB%T(bML&t)!%xAA}IY5)N@P
zem7D!Na&?KBB)`Og_^CxRQG!n1Ua1%cm2wGQ#V^nuq`JlPzKQY|8)!0R8;|%x=<*C
zQ6h&phDB8#!3y7`@e62S)pp%|)WpWb>{W8J4~S&_Vpa<n+JK_%yWAZC{Is=lHM%n?
zg!vZQrjx)iY$-%SLC=dYG$K`1)&0rrXp+B?vYC=Mzr<%vL^)WtZz`W6L=o#D4YG-M
zxnG|5Br~@T6s<@g5{cA0_Jln4U+Q{l3zXLi*N%~(cZ$;t#wI4*oK;oj02jy;`D1Y5
zHd+mRakBgIf*mkhQ$qJ^K(=)Pz#|cko-N|P;kg$Q7B)OMh)Tq5+aH4D<mB|vVkF6s
z>ip9+T;vJ+u=oAhE8uPdu;c@@$vQmF-vAcR$-%+&AzG|C2yjr>y^pJ@Wa54TKCQvZ
z*2y}q$LW!X=B<w(%$biWR@2%|oV3SWj(xIm48t6?EZmSeil&`ONQ7jbXdo1SQ$3g4
zUI9J5dq-N&c_YU91e=uhZu<y97MpPp7$%>kw+SD%3y^)SPP-RboXgA0goK3JfLZYP
z2tXd-GK63PAUHiejZVVv3YcvS49FtM;UOz1audVBk!2^Ptudd9DJUoae=eKfjiLBv
zwpczRBLg@B&;exz`iT`4*sv7j<m8_}1I&Yx;lB;Jw%An2lYy)Gb<=gw0qZ+balALB
zy%`o&i1HX>IaGTAy@qB-%QCgBH(w_b794Lv4%c%E+Ci-zjj+YC^}r`+*<@vHGh7u#
zL@db}`^MnntFAQsySS(=C)l&w9}cYWv=QDqM0wm5t$k2U2fzQnJv?}tT0D5+Cd~a(
zRfCYWz)<GJ8Xhd5P8!(2@Gp-1B>jj%mFocjAED&0;QjM+44u|W5@Jx@Zg0KPJt<#G
zhMJB!+6DLh>59{fq5ZS`xxEU>suwsTK+L@9R>5MjROF@oa#bSLrr0PImZ8ZC2k6U1
z&ui)#P#Cs=D7MVuRg7W+tLW#Ox9akm{QZ6I)+N~A4XAj$U-Bn+b%N0&e2Bf8j=teG
zwQ1Rrk)w<F1ZTQ*K-TCFak=tXrms%8!Y;DZ+8Ht!@rlkqfp7r<gohWHQxb>tb#97=
zYoW3B!xY$UAZMUl6r_NS2(6sycQ&g9_G5>82n~k|eSGE!=)a0u>&Gr@fDT?-5{{3F
zCt9erG<#-~#jLQsllKBRK%+YupWDk?F^7B#eU$zxY<ySad7+w&l$&j(G6QlMK3Crt
zu8|kMdF`o*{Gs1S3)QhgJWsP-;Cpt{VWSF+TNgPifDWLx2jl^eUd8Rtp$f#rPu;<;
zEX+i;ZyY=Ii!5el@)Uu+0R<`~i2F$icyMNLxX2*#$RKKLZS7mbdsaWAAhiIr(x{4~
zMCWMj#Fa336B<D>HO5?cj3yA51$e~cGOo`wUZ@$M52XaKH<i@1w6xUJjv^l~AYfFZ
z4WcYoy>adqq~_HV&_8Ktas^{lQ{)Kk{#Iyl(PniAgYK<_$N_ruh5|%%_5pRn>dX7f
zyZiDsw`g<gx-+r%aP^h8!02RNRBD~=*n5Iny*8jll^d{83|7H)=s4vrkW<G1C5o3*
zN4?APw;dQo;ePzyvh46p{dX*IUpaeTOjII$nx5fjn^`KH@a2;RS!^1yPGbbgdf#0X
zxqyWDf&tW%!1nN8>SXGUT(Cl<7L9FHRVP6EkA<1pEoyhf(A30aI<!FZ(K;Zvl?RES
zlAWrZR*XDqej#7&Iqg89&#fOnw`Mhlw>g;BDbnkolJr*@YyF)966VgbRDh@glYvgi
z@wYb+rv2UuWVACF>4~BCN{3XtnmslSrKaySMSvrjps6Wxbus+|B%-YngH!0;dzw#6
z(Rr4hp8f!|YO%!uuNP?HhV!7go?9A!#x;0xgU%0`oQYy!L=-0(#_rrYIOaU9f%2K~
zVmr;>r&S+JN=jnCCdBp*EsrK%!w|d>1)A(m*=6qxY{LqP1pkg<l0Q<VVfNeJ)nJ`I
zNlIah+ee}Hl0M;JMyVSb8Y&15*Lr`)X!X=3XJR@5kWXvVvt0$G2D3KPg|lQ%*@}8m
z8%b5SeEx5FdDT`mnO6P_aL3hrxaYT?$}D&Sc*>|9pqEcYMU`Oy%?gE)gb_=ET7Xy+
z8v+YLWE&>^$oUYDta1M%{Ffy0%t}<j!}GKM?Y!3RNW91GCoD0{2=T)U0dGYzfp07<
zoEg^r$o9C#s05rDiq1}tOAk;<J5QX$l6_Q*LURR@)}`hBY(#TgDu01m_cWa^@n0)L
ztlKLRQ=C9~Y{TDsA(ZivPIW3WKQIJw9BU%_6TGcdF}sj}gP(oP7GJ)1nK@D@s|CuO
zUojiNIsldr+-CsvQ8++lB5V~jk?gdHA*vrf5MK;*ObK5u$V6_wBR8NX!a_rhiKdF-
zmd23&vOheDvlC}i?hE$BXd<AP1)9PLs00C4pMhG`KQPeYcDzt)*duV(3Xm0O;xnJ!
z^N6+Il+vGYV4*=G@2PL|-G7s#m+?{=L=4aURIvVpVG#4xVekey3x2y-j#lwi*!U;E
zgP<0iC`qy&b@)U5aRHa~B+f%PWEk$u`?rMs5B_WX_rPeY+ms#tgvWQN?wX@xf5oR?
R|NX{9R_d!{jkr<J{{UuLjIjU!

literal 0
HcmV?d00001

diff --git a/metadata-ingestion/docs/sources/neo4j/neo4j.md b/metadata-ingestion/docs/sources/neo4j/neo4j.md
new file mode 100644
index 00000000000000..d4dab2c6c7e1f2
--- /dev/null
+++ b/metadata-ingestion/docs/sources/neo4j/neo4j.md
@@ -0,0 +1,20 @@
+## Integration Details
+
+<!-- Plain-language description of what this integration is meant to do.  -->
+<!-- Include details about where metadata is extracted from (ie. logs, source API, manifest, etc.)   -->
+Neo4j metadata will be ingested into DataHub using 
+`CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;`  
+The data that is returned will be parsed 
+and will be displayed as Nodes and Relationships in DataHub.  Each object will be tagged with describing what kind of DataHub
+object it is.  The defaults are 'Node' and 'Relationship'.  These tag values can be overwritten in the recipe.
+
+
+
+## Metadata Ingestion Quickstart
+
+### Prerequisites
+
+In order to ingest metadata from Neo4j, you will need:
+
+* Neo4j instance with APOC installed
+
diff --git a/metadata-ingestion/docs/sources/neo4j/neo4j_recipe.yml b/metadata-ingestion/docs/sources/neo4j/neo4j_recipe.yml
new file mode 100644
index 00000000000000..463d65e7ba323b
--- /dev/null
+++ b/metadata-ingestion/docs/sources/neo4j/neo4j_recipe.yml
@@ -0,0 +1,12 @@
+source:
+    type: 'neo4j'
+    config:
+        uri: 'neo4j+ssc://host:7687'
+        username: 'neo4j'
+        password: 'password'
+        env: 'PROD'
+
+sink:
+  type: "datahub-rest"
+  config:
+    server: 'http://localhost:8080'
\ No newline at end of file
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index d7e056b31370df..c6d55fb5bcc56e 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -525,6 +525,7 @@
     "qlik-sense": sqlglot_lib | {"requests", "websocket-client"},
     "sigma": sqlglot_lib | {"requests"},
     "sac": sac,
+    "neo4j": {"pandas", "neo4j"},
 }
 
 # This is mainly used to exclude plugins from the Docker image.
@@ -673,6 +674,7 @@
             "sigma",
             "sac",
             "cassandra",
+            "neo4j",
         ]
         if plugin
         for dependency in plugins[plugin]
@@ -792,6 +794,7 @@
         "sigma = datahub.ingestion.source.sigma.sigma:SigmaSource",
         "sac = datahub.ingestion.source.sac.sac:SACSource",
         "cassandra = datahub.ingestion.source.cassandra.cassandra:CassandraSource",
+        "neo4j = datahub.ingestion.source.neo4j.neo4j_source:Neo4jSource",
     ],
     "datahub.ingestion.transformer.plugins": [
         "pattern_cleanup_ownership = datahub.ingestion.transformer.pattern_cleanup_ownership:PatternCleanUpOwnership",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
index 9fbb15500a863c..a5eecf198a9b49 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
@@ -22,6 +22,8 @@ class DatasetSubTypes(StrEnum):
     SAC_MODEL = "Model"
     SAC_IMPORT_DATA_MODEL = "Import Data Model"
     SAC_LIVE_DATA_MODEL = "Live Data Model"
+    NEO4J_NODE = "Neo4j Node"
+    NEO4J_RELATIONSHIP = "Neo4j Relationship"
 
     # TODO: Create separate entity...
     NOTEBOOK = "Notebook"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/neo4j/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/neo4j/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py b/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py
new file mode 100644
index 00000000000000..2c9107b967e4f8
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py
@@ -0,0 +1,331 @@
+import logging
+import time
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, List, Optional, Type, Union
+
+import pandas as pd
+from neo4j import GraphDatabase
+from pydantic.fields import Field
+
+from datahub.configuration.source_common import EnvConfigMixin
+from datahub.emitter.mce_builder import make_data_platform_urn, make_dataset_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.decorators import (
+    SupportStatus,
+    config_class,
+    platform_name,
+    support_status,
+)
+from datahub.ingestion.api.source import Source, SourceReport
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.common.subtypes import DatasetSubTypes
+from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaFieldDataType
+from datahub.metadata.schema_classes import (
+    AuditStampClass,
+    BooleanTypeClass,
+    DatasetPropertiesClass,
+    DateTypeClass,
+    NullTypeClass,
+    NumberTypeClass,
+    OtherSchemaClass,
+    SchemaFieldClass,
+    SchemaMetadataClass,
+    StringTypeClass,
+    SubTypesClass,
+    UnionTypeClass,
+)
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+_type_mapping: Dict[Union[Type, str], Type] = {
+    "list": UnionTypeClass,
+    "boolean": BooleanTypeClass,
+    "integer": NumberTypeClass,
+    "local_date_time": DateTypeClass,
+    "float": NumberTypeClass,
+    "string": StringTypeClass,
+    "date": DateTypeClass,
+    "node": StringTypeClass,
+    "relationship": StringTypeClass,
+}
+
+
+class Neo4jConfig(EnvConfigMixin):
+    username: str = Field(description="Neo4j Username")
+    password: str = Field(description="Neo4j Password")
+    uri: str = Field(description="The URI for the Neo4j server")
+    env: str = Field(description="Neo4j env")
+
+
+@dataclass
+class Neo4jSourceReport(SourceReport):
+    obj_failures: int = 0
+    obj_created: int = 0
+
+
+@platform_name("Neo4j", id="neo4j")
+@config_class(Neo4jConfig)
+@support_status(SupportStatus.CERTIFIED)
+class Neo4jSource(Source):
+    NODE = "node"
+    RELATIONSHIP = "relationship"
+    PLATFORM = "neo4j"
+
+    def __init__(self, ctx: PipelineContext, config: Neo4jConfig):
+        self.ctx = ctx
+        self.config = config
+        self.report = Neo4jSourceReport()
+
+    @classmethod
+    def create(cls, config_dict, ctx):
+        config = Neo4jConfig.parse_obj(config_dict)
+        return cls(ctx, config)
+
+    def get_field_type(self, attribute_type: Union[type, str]) -> SchemaFieldDataType:
+        type_class: type = _type_mapping.get(attribute_type, NullTypeClass)
+        return SchemaFieldDataType(type=type_class())
+
+    def get_schema_field_class(
+        self, col_name: str, col_type: str, **kwargs: Any
+    ) -> SchemaFieldClass:
+        if kwargs["obj_type"] == self.NODE and col_type == self.RELATIONSHIP:
+            col_type = self.NODE
+        else:
+            col_type = col_type
+        return SchemaFieldClass(
+            fieldPath=col_name,
+            type=self.get_field_type(col_type),
+            nativeDataType=col_type,
+            description=col_type.upper()
+            if col_type in (self.NODE, self.RELATIONSHIP)
+            else col_type,
+            lastModified=AuditStampClass(
+                time=round(time.time() * 1000), actor="urn:li:corpuser:ingestion"
+            ),
+        )
+
+    def add_properties(
+        self,
+        dataset: str,
+        description: Optional[str] = None,
+        custom_properties: Optional[Dict[str, str]] = None,
+    ) -> MetadataChangeProposalWrapper:
+        dataset_properties = DatasetPropertiesClass(
+            description=description,
+            customProperties=custom_properties,
+        )
+        return MetadataChangeProposalWrapper(
+            entityUrn=make_dataset_urn(
+                platform=self.PLATFORM, name=dataset, env=self.config.env
+            ),
+            aspect=dataset_properties,
+        )
+
+    def generate_neo4j_object(
+        self, dataset: str, columns: list, obj_type: Optional[str] = None
+    ) -> MetadataChangeProposalWrapper:
+        try:
+            fields = [
+                self.get_schema_field_class(key, value.lower(), obj_type=obj_type)
+                for d in columns
+                for key, value in d.items()
+            ]
+            mcp = MetadataChangeProposalWrapper(
+                entityUrn=make_dataset_urn(
+                    platform=self.PLATFORM, name=dataset, env=self.config.env
+                ),
+                aspect=SchemaMetadataClass(
+                    schemaName=dataset,
+                    platform=make_data_platform_urn(self.PLATFORM),
+                    version=0,
+                    hash="",
+                    platformSchema=OtherSchemaClass(rawSchema=""),
+                    lastModified=AuditStampClass(
+                        time=round(time.time() * 1000),
+                        actor="urn:li:corpuser:ingestion",
+                    ),
+                    fields=fields,
+                ),
+            )
+            self.report.obj_created += 1
+        except Exception as e:
+            log.error(e)
+            self.report.obj_failures += 1
+        return mcp
+
+    def get_neo4j_metadata(self, query: str) -> pd.DataFrame:
+        driver = GraphDatabase.driver(
+            self.config.uri, auth=(self.config.username, self.config.password)
+        )
+        """
+        This process retrieves the metadata for Neo4j objects using an APOC query, which returns a dictionary
+        with two columns: key and value. The key represents the Neo4j object, while the value contains the
+        corresponding metadata.
+
+        When data is returned from Neo4j, much of the relationship metadata is stored with the relevant node's
+        metadata. Consequently, the objects are organized into two separate dataframes: one for nodes and one for
+        relationships.
+
+        In the node dataframe, several fields are extracted and added as new columns. Similarly, in the relationship
+        dataframe, certain fields are parsed out, while others require metadata from the nodes dataframe.
+
+        Once the data is parsed and these two dataframes are created, we combine a subset of their columns into a
+        single dataframe, which will be used to create the DataHub objects.
+
+        See the docs for examples of metadata:  metadata-ingestion/docs/sources/neo4j/neo4j.md
+        """
+        try:
+            log.info(f"{query}")
+            with driver.session() as session:
+                result = session.run(query)
+                data = [record for record in result]
+            log.info("Closing Neo4j driver")
+            driver.close()
+
+            node_df = self.process_nodes(data)
+            rel_df = self.process_relationships(data, node_df)
+
+            union_cols = ["key", "obj_type", "property_data_types", "description"]
+            df = pd.concat([node_df[union_cols], rel_df[union_cols]])
+        except Exception as e:
+            self.report.failure(
+                message="Failed to get neo4j metadata",
+                exc=e,
+            )
+
+        return df
+
+    def process_nodes(self, data: list) -> pd.DataFrame:
+        nodes = [record for record in data if record["value"]["type"] == self.NODE]
+        node_df = pd.DataFrame(
+            nodes,
+            columns=["key", "value"],
+        )
+        node_df["obj_type"] = node_df["value"].apply(
+            lambda record: self.get_obj_type(record)
+        )
+        node_df["relationships"] = node_df["value"].apply(
+            lambda record: self.get_relationships(record)
+        )
+        node_df["properties"] = node_df["value"].apply(
+            lambda record: self.get_properties(record)
+        )
+        node_df["property_data_types"] = node_df["properties"].apply(
+            lambda record: self.get_property_data_types(record)
+        )
+        node_df["description"] = node_df.apply(
+            lambda record: self.get_node_description(record, node_df), axis=1
+        )
+        return node_df
+
+    def process_relationships(self, data: list, node_df: pd.DataFrame) -> pd.DataFrame:
+        rels = [
+            record for record in data if record["value"]["type"] == self.RELATIONSHIP
+        ]
+        rel_df = pd.DataFrame(rels, columns=["key", "value"])
+        rel_df["obj_type"] = rel_df["value"].apply(
+            lambda record: self.get_obj_type(record)
+        )
+        rel_df["properties"] = rel_df["value"].apply(
+            lambda record: self.get_properties(record)
+        )
+        rel_df["property_data_types"] = rel_df["properties"].apply(
+            lambda record: self.get_property_data_types(record)
+        )
+        rel_df["description"] = rel_df.apply(
+            lambda record: self.get_rel_descriptions(record, node_df), axis=1
+        )
+        return rel_df
+
+    def get_obj_type(self, record: dict) -> str:
+        return record["type"]
+
+    def get_rel_descriptions(self, record: dict, df: pd.DataFrame) -> str:
+        descriptions = []
+        for _, row in df.iterrows():
+            relationships = row.get("relationships", {})
+            for relationship, props in relationships.items():
+                if record["key"] == relationship:
+                    if props["direction"] == "in":
+                        for prop in props["labels"]:
+                            descriptions.append(
+                                f"({row['key']})-[{record['key']}]->({prop})"
+                            )
+        return "\n".join(descriptions)
+
+    def get_node_description(self, record: dict, df: pd.DataFrame) -> str:
+        descriptions = []
+        for _, row in df.iterrows():
+            if record["key"] == row["key"]:
+                for relationship, props in row["relationships"].items():
+                    direction = props["direction"]
+                    for node in set(props["labels"]):
+                        if direction == "in":
+                            descriptions.append(
+                                f"({row['key']})<-[{relationship}]-({node})"
+                            )
+                        elif direction == "out":
+                            descriptions.append(
+                                f"({row['key']})-[{relationship}]->({node})"
+                            )
+
+        return "\n".join(descriptions)
+
+    def get_property_data_types(self, record: dict) -> List[dict]:
+        return [{k: v["type"]} for k, v in record.items()]
+
+    def get_properties(self, record: dict) -> str:
+        return record["properties"]
+
+    def get_relationships(self, record: dict) -> dict:
+        return record.get("relationships", None)
+
+    def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
+        df = self.get_neo4j_metadata(
+            "CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;"
+        )
+        for index, row in df.iterrows():
+            try:
+                yield MetadataWorkUnit(
+                    id=row["key"],
+                    mcp=self.generate_neo4j_object(
+                        columns=row["property_data_types"],
+                        dataset=row["key"],
+                    ),
+                    is_primary_source=True,
+                )
+
+                yield MetadataWorkUnit(
+                    id=row["key"],
+                    mcp=MetadataChangeProposalWrapper(
+                        entityUrn=make_dataset_urn(
+                            platform=self.PLATFORM,
+                            name=row["key"],
+                            env=self.config.env,
+                        ),
+                        aspect=SubTypesClass(
+                            typeNames=[
+                                DatasetSubTypes.NEO4J_NODE
+                                if row["obj_type"] == self.NODE
+                                else DatasetSubTypes.NEO4J_RELATIONSHIP
+                            ]
+                        ),
+                    ),
+                )
+
+                yield MetadataWorkUnit(
+                    id=row["key"],
+                    mcp=self.add_properties(
+                        dataset=row["key"],
+                        custom_properties=None,
+                        description=row["description"],
+                    ),
+                )
+
+            except Exception as e:
+                raise e
+
+    def get_report(self):
+        return self.report
diff --git a/metadata-ingestion/tests/unit/test_neo4j_source.py b/metadata-ingestion/tests/unit/test_neo4j_source.py
new file mode 100644
index 00000000000000..62586718e86067
--- /dev/null
+++ b/metadata-ingestion/tests/unit/test_neo4j_source.py
@@ -0,0 +1,221 @@
+import unittest
+from pathlib import Path
+
+import pandas as pd
+import pytest
+
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.source.neo4j.neo4j_source import Neo4jConfig, Neo4jSource
+
+
+@pytest.fixture
+def tracking_uri(tmp_path: Path) -> str:
+    # return str(tmp_path / "neo4j")
+    return "neo4j+ssc://host:7687"
+
+
+@pytest.fixture
+def source(tracking_uri: str) -> Neo4jSource:
+    return Neo4jSource(
+        ctx=PipelineContext(run_id="neo4j-test"),
+        config=Neo4jConfig(
+            uri=tracking_uri, env="Prod", username="test", password="test"
+        ),
+    )
+
+
+def data():
+    return [
+        {
+            "key": "Node_1",
+            "value": {
+                "count": 433026,
+                "relationships": {
+                    "RELATIONSHIP_1": {
+                        "count": 1,
+                        "properties": {
+                            "Relationship1_Property1": {
+                                "existence": False,
+                                "type": "STRING",
+                                "indexed": False,
+                                "array": False,
+                            }
+                        },
+                        "direction": "in",
+                        "labels": ["Node_2"],
+                    }
+                },
+                "RELATIONSHIP_2": {
+                    "count": 2,
+                    "properties": {
+                        "Relationship2_Property1": {
+                            "existence": False,
+                            "type": "STRING",
+                            "indexed": False,
+                            "array": False,
+                        }
+                    },
+                    "direction": "in",
+                    "labels": ["Node_3"],
+                },
+                "type": "node",
+                "properties": {
+                    "Node1_Property1": {
+                        "existence": False,
+                        "type": "DATE",
+                        "indexed": False,
+                        "unique": False,
+                    },
+                    "Node1_Property2": {
+                        "existence": False,
+                        "type": "STRING",
+                        "indexed": False,
+                        "unique": False,
+                    },
+                    "Node1_Property3": {
+                        "existence": False,
+                        "type": "STRING",
+                        "indexed": False,
+                        "unique": False,
+                    },
+                },
+                "labels": [],
+            },
+        },
+        {
+            "key": "Node_2",
+            "value": {
+                "count": 3,
+                "relationships": {
+                    "RELATIONSHIP_1": {
+                        "count": 1,
+                        "properties": {
+                            "Relationship1_Property1": {
+                                "existence": False,
+                                "type": "STRING",
+                                "indexed": False,
+                                "array": False,
+                            }
+                        },
+                        "direction": "out",
+                        "labels": ["Node_2"],
+                    }
+                },
+                "type": "node",
+                "properties": {
+                    "Node2_Property1": {
+                        "existence": False,
+                        "type": "DATE",
+                        "indexed": False,
+                        "unique": False,
+                    },
+                    "Node2_Property2": {
+                        "existence": False,
+                        "type": "STRING",
+                        "indexed": False,
+                        "unique": False,
+                    },
+                    "Node2_Property3": {
+                        "existence": False,
+                        "type": "STRING",
+                        "indexed": False,
+                        "unique": False,
+                    },
+                },
+                "labels": [],
+            },
+        },
+        {
+            "key": "RELATIONSHIP_1",
+            "value": {
+                "count": 4,
+                "type": "relationship",
+                "properties": {
+                    "Relationship1_Property1": {
+                        "existence": False,
+                        "type": "STRING",
+                        "indexed": False,
+                        "array": False,
+                    }
+                },
+            },
+        },
+    ]
+
+
+def test_process_nodes(source):
+    df = source.process_nodes(data=data())
+    assert type(df) is pd.DataFrame
+
+
+def test_process_relationships(source):
+    df = source.process_relationships(
+        data=data(), node_df=source.process_nodes(data=data())
+    )
+    assert type(df) is pd.DataFrame
+
+
+def test_get_obj_type(source):
+    results = data()
+    assert source.get_obj_type(results[0]["value"]) == "node"
+    assert source.get_obj_type(results[1]["value"]) == "node"
+    assert source.get_obj_type(results[2]["value"]) == "relationship"
+
+
+def test_get_node_description(source):
+    results = data()
+    df = source.process_nodes(data=data())
+    assert (
+        source.get_node_description(results[0], df)
+        == "(Node_1)<-[RELATIONSHIP_1]-(Node_2)"
+    )
+    assert (
+        source.get_node_description(results[1], df)
+        == "(Node_2)-[RELATIONSHIP_1]->(Node_2)"
+    )
+
+
+def test_get_property_data_types(source):
+    results = data()
+    assert source.get_property_data_types(results[0]["value"]["properties"]) == [
+        {"Node1_Property1": "DATE"},
+        {"Node1_Property2": "STRING"},
+        {"Node1_Property3": "STRING"},
+    ]
+    assert source.get_property_data_types(results[1]["value"]["properties"]) == [
+        {"Node2_Property1": "DATE"},
+        {"Node2_Property2": "STRING"},
+        {"Node2_Property3": "STRING"},
+    ]
+    assert source.get_property_data_types(results[2]["value"]["properties"]) == [
+        {"Relationship1_Property1": "STRING"}
+    ]
+
+
+def test_get_properties(source):
+    results = data()
+    assert list(source.get_properties(results[0]["value"]).keys()) == [
+        "Node1_Property1",
+        "Node1_Property2",
+        "Node1_Property3",
+    ]
+    assert list(source.get_properties(results[1]["value"]).keys()) == [
+        "Node2_Property1",
+        "Node2_Property2",
+        "Node2_Property3",
+    ]
+    assert list(source.get_properties(results[2]["value"]).keys()) == [
+        "Relationship1_Property1"
+    ]
+
+
+def test_get_relationships(source):
+    results = data()
+    record = list(
+        results[0]["value"]["relationships"].keys()
+    )  # Get the first key from the dict_keys
+    assert record == ["RELATIONSHIP_1"]
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml
index 1625df4a99540d..0b3d815c710980 100644
--- a/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml
+++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml
@@ -727,3 +727,14 @@
     displayName: Cassandra
     type: KEY_VALUE_STORE
     logoUrl: "/assets/platforms/cassandralogo.png"
+- entityUrn: urn:li:dataPlatform:neo4j
+  entityType: dataPlatform
+  aspectName: dataPlatformInfo
+  changeType: UPSERT
+  aspect:
+    datasetNameDelimiter: "."
+    name: neo4j
+    displayName: Neo4j
+    type: OTHERS
+    logoUrl: "/assets/platforms/neo4j.png"
+