From 8396829dbc52e32bfc7a542b2a6ebf1cb6daaa6b Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 27 Sep 2023 14:03:34 -0400
Subject: [PATCH 01/25] build(ingest/databricks): Relax databricks-sdk pin
 (#8855)

---
 metadata-ingestion/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 65deadf16a5b3..024950e3a6fd5 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -258,7 +258,7 @@ def get_long_description():
 
 databricks = {
     # 0.1.11 appears to have authentication issues with azure databricks
-    "databricks-sdk>=0.1.1, <0.1.11",
+    "databricks-sdk>=0.1.1, != 0.1.11",
     "pyspark",
     "requests",
 }

From 2e2cd87d99e66cadcbb9bf833fb83077350ba30d Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 27 Sep 2023 17:00:02 -0400
Subject: [PATCH 02/25] test(ingest/delta-lake): Fix minio test for new version
 of delta-lake (#8914)

---
 .../src/datahub/ingestion/source/delta_lake/source.py          | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py
index 180ef00459214..c4d01be52ae7d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py
@@ -296,7 +296,8 @@ def get_storage_options(self) -> Dict[str, str]:
                 "AWS_SECRET_ACCESS_KEY": creds.get("aws_secret_access_key") or "",
                 "AWS_SESSION_TOKEN": creds.get("aws_session_token") or "",
                 # Allow http connections, this is required for minio
-                "AWS_STORAGE_ALLOW_HTTP": "true",
+                "AWS_STORAGE_ALLOW_HTTP": "true",  # for delta-lake < 0.11.0
+                "AWS_ALLOW_HTTP": "true",  # for delta-lake >= 0.11.0
             }
             if aws_config.aws_region:
                 opts["AWS_REGION"] = aws_config.aws_region

From 587a46ea1ec04f4144fa24c05c0346a218f7bacf Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Fri, 29 Sep 2023 01:06:23 +0900
Subject: [PATCH 03/25] docs(): fix title of the ui ingestion guide & remove
 browse.md (#8916)

---
 docs-website/sidebars.js             |  1 -
 docs-website/src/pages/docs/index.js |  2 +-
 docs/browse.md                       | 56 ----------------------------
 docs/ui-ingestion.md                 |  2 +-
 4 files changed, 2 insertions(+), 59 deletions(-)
 delete mode 100644 docs/browse.md

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index b07cd0b03ce11..d8b85da79b31b 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -428,7 +428,6 @@ module.exports = {
         "docs/glossary/business-glossary",
         "docs/tags",
         "docs/ownership/ownership-types",
-        "docs/browse",
         "docs/authorization/access-policies-guide",
         "docs/features/dataset-usage-and-query-history",
         "docs/posts",
diff --git a/docs-website/src/pages/docs/index.js b/docs-website/src/pages/docs/index.js
index 0e8bfdcf3b9d7..0edd07267b27e 100644
--- a/docs-website/src/pages/docs/index.js
+++ b/docs-website/src/pages/docs/index.js
@@ -114,7 +114,7 @@ const featureGuideContent = [
   },
   { title: "Tags", icon: <TagsTwoTone />, to: "docs/tags" },
   {
-    title: "UI-Based Ingestion",
+    title: "Ingestion",
     icon: <ApiTwoTone />,
     to: "docs/ui-ingestion",
   },
diff --git a/docs/browse.md b/docs/browse.md
deleted file mode 100644
index 55a3b16a0a552..0000000000000
--- a/docs/browse.md
+++ /dev/null
@@ -1,56 +0,0 @@
-import FeatureAvailability from '@site/src/components/FeatureAvailability';
-
-# About DataHub Browse
-
-<FeatureAvailability/>
-
-Browse is one of the primary entrypoints for discovering different Datasets, Dashboards, Charts and other DataHub Entities.
-
-Browsing is useful for finding data entities based on a hierarchical structure set in the source system. Generally speaking, that hierarchy will contain the following levels:
-
-* Entity Type (Dataset, Dashboard, Chart, etc.)
-* Environment (prod vs. dev)
-* Platform Type (Snowflake, dbt, Looker, etc.)
-* Container (Warehouse, Schema, Folder, etc.)
-* Entity Name
-
-For example, a user can easily browse for Datasets within the PROD Snowflake environment, the long_tail_companions warehouse, and the analytics schema:
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/browseVid.gif"/>
-</p>
-
-## Using Browse
-
-Browse is accessible by clicking on an Entity Type on the front page of the DataHub UI.
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/browse1.png"/>
-</p>
-
-This will take you into the folder explorer view for browse in which you can drill down to your desired sub categories to find the data you are looking for.
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/browse2.png"/>
-</p>
-
-## Additional Resources
-
-### GraphQL
-
-* [browse](../graphql/queries.md#browse)
-* [browsePaths](../graphql/queries.md#browsePaths)
-
-## FAQ and Troubleshooting
-
-**How are BrowsePaths created?**
-
-BrowsePaths are automatically created for ingested entities based on separator characters that appear within an Urn.
-
-**How can I customize browse paths?**
-
-BrowsePaths are an Aspect similar to other components of an Entity. They can be customized by ingesting custom paths for specified Urns.
-
-*Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!*
-
-### Related Features
-
-* [Search](./how/search.md)
diff --git a/docs/ui-ingestion.md b/docs/ui-ingestion.md
index 2ecb1e634c79f..db2007e1e19a9 100644
--- a/docs/ui-ingestion.md
+++ b/docs/ui-ingestion.md
@@ -1,4 +1,4 @@
-# UI Ingestion Guide 
+# Ingestion
 
 ## Introduction 
 

From e738e16157e2377b0cbcb71c2f2915253f40462f Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Thu, 28 Sep 2023 13:52:35 -0400
Subject: [PATCH 04/25] refactor(ingest/bigquery): Clarify table / view queries
 (#8913)

---
 .../ingestion/source/bigquery_v2/queries.py   | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
index 5be7a0a7f6b2f..a87cb8c1cbfa5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
@@ -43,14 +43,14 @@ class BigqueryQuery:
   t.creation_time as created,
   ts.last_modified_time as last_altered,
   tos.OPTION_VALUE as comment,
-  is_insertable_into,
-  ddl,
-  row_count,
-  size_bytes as bytes,
-  num_partitions,
-  max_partition_id,
-  active_billable_bytes,
-  long_term_billable_bytes,
+  t.is_insertable_into,
+  t.ddl,
+  ts.row_count,
+  ts.size_bytes as bytes,
+  p.num_partitions,
+  p.max_partition_id,
+  p.active_billable_bytes,
+  p.long_term_billable_bytes,
   REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix,
   REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base
 
@@ -90,8 +90,8 @@ class BigqueryQuery:
   t.table_type as table_type,
   t.creation_time as created,
   tos.OPTION_VALUE as comment,
-  is_insertable_into,
-  ddl,
+  t.is_insertable_into,
+  t.ddl,
   REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix,
   REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base
 
@@ -118,10 +118,10 @@ class BigqueryQuery:
   t.creation_time as created,
   ts.last_modified_time as last_altered,
   tos.OPTION_VALUE as comment,
-  is_insertable_into,
-  ddl as view_definition,
-  row_count,
-  size_bytes
+  t.is_insertable_into,
+  t.ddl as view_definition,
+  ts.row_count,
+  ts.size_bytes
 FROM
   `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
   join `{{project_id}}`.`{{dataset_name}}`.__TABLES__ as ts on ts.table_id = t.TABLE_NAME
@@ -143,8 +143,8 @@ class BigqueryQuery:
   t.table_type as table_type,
   t.creation_time as created,
   tos.OPTION_VALUE as comment,
-  is_insertable_into,
-  ddl as view_definition
+  t.is_insertable_into,
+  t.ddl as view_definition
 FROM
   `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
   left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema

From 833daa8efda34ff53feec4b641f575c338990afd Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Thu, 28 Sep 2023 13:52:50 -0400
Subject: [PATCH 05/25] refactor(ingest/graph): Factor out filter logic (#8888)

---
 .../src/datahub/cli/delete_cli.py             |   7 +-
 .../src/datahub/ingestion/graph/client.py     | 171 +-----------------
 .../src/datahub/ingestion/graph/filters.py    | 162 +++++++++++++++++
 3 files changed, 172 insertions(+), 168 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/graph/filters.py

diff --git a/metadata-ingestion/src/datahub/cli/delete_cli.py b/metadata-ingestion/src/datahub/cli/delete_cli.py
index 7ab7605ef6363..f9e0eb45692d4 100644
--- a/metadata-ingestion/src/datahub/cli/delete_cli.py
+++ b/metadata-ingestion/src/datahub/cli/delete_cli.py
@@ -13,11 +13,8 @@
 from datahub.cli import cli_utils
 from datahub.configuration.datetimes import ClickDatetime
 from datahub.emitter.aspect import ASPECT_MAP, TIMESERIES_ASPECT_MAP
-from datahub.ingestion.graph.client import (
-    DataHubGraph,
-    RemovedStatusFilter,
-    get_default_graph,
-)
+from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
+from datahub.ingestion.graph.filters import RemovedStatusFilter
 from datahub.telemetry import telemetry
 from datahub.upgrade import upgrade
 from datahub.utilities.perf_timer import PerfTimer
diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index 38e965f7f6587..e22d48d0af80a 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -16,15 +16,15 @@
 from datahub.cli.cli_utils import get_url_and_token
 from datahub.configuration.common import ConfigModel, GraphError, OperationalError
 from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
-from datahub.emitter.mce_builder import (
-    DEFAULT_ENV,
-    Aspect,
-    make_data_platform_urn,
-    make_dataplatform_instance_urn,
-)
+from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.rest_emitter import DatahubRestEmitter
 from datahub.emitter.serialization_helper import post_json_transform
+from datahub.ingestion.graph.filters import (
+    RemovedStatusFilter,
+    SearchFilterRule,
+    generate_filter,
+)
 from datahub.ingestion.source.state.checkpoint import Checkpoint
 from datahub.metadata.schema_classes import (
     ASPECT_NAME_MAP,
@@ -59,8 +59,6 @@
 
 logger = logging.getLogger(__name__)
 
-SearchFilterRule = Dict[str, Any]
-
 
 class DatahubClientConfig(ConfigModel):
     """Configuration class for holding connectivity to datahub gms"""
@@ -81,19 +79,6 @@ class DatahubClientConfig(ConfigModel):
 DataHubGraphConfig = DatahubClientConfig
 
 
-class RemovedStatusFilter(enum.Enum):
-    """Filter for the status of entities during search."""
-
-    NOT_SOFT_DELETED = "NOT_SOFT_DELETED"
-    """Search only entities that have not been marked as deleted."""
-
-    ALL = "ALL"
-    """Search all entities, including deleted entities."""
-
-    ONLY_SOFT_DELETED = "ONLY_SOFT_DELETED"
-    """Search only soft-deleted entities."""
-
-
 @dataclass
 class RelatedEntity:
     urn: str
@@ -567,7 +552,7 @@ def _bulk_fetch_schema_info_by_filter(
         # Add the query default of * if no query is specified.
         query = query or "*"
 
-        orFilters = self.generate_filter(
+        orFilters = generate_filter(
             platform, platform_instance, env, container, status, extraFilters
         )
 
@@ -621,54 +606,6 @@ def _bulk_fetch_schema_info_by_filter(
             if entity.get("schemaMetadata"):
                 yield entity["urn"], entity["schemaMetadata"]
 
-    def generate_filter(
-        self,
-        platform: Optional[str],
-        platform_instance: Optional[str],
-        env: Optional[str],
-        container: Optional[str],
-        status: RemovedStatusFilter,
-        extraFilters: Optional[List[SearchFilterRule]],
-    ) -> List[Dict[str, List[SearchFilterRule]]]:
-        andFilters: List[SearchFilterRule] = []
-
-        # Platform filter.
-        if platform:
-            andFilters.append(self._get_platform_filter(platform))
-
-        # Platform instance filter.
-        if platform_instance:
-            andFilters.append(
-                self._get_platform_instance_filter(platform, platform_instance)
-            )
-
-        # Browse path v2 filter.
-        if container:
-            andFilters.append(self._get_container_filter(container))
-
-        # Status filter.
-        status_filter = self._get_status_filer(status)
-        if status_filter:
-            andFilters.append(status_filter)
-
-        # Extra filters.
-        if extraFilters:
-            andFilters += extraFilters
-
-        orFilters: List[Dict[str, List[SearchFilterRule]]] = [{"and": andFilters}]
-
-        # Env filter
-        if env:
-            envOrConditions = self._get_env_or_conditions(env)
-            # This matches ALL of the andFilters and at least one of the envOrConditions.
-            orFilters = [
-                {"and": andFilters["and"] + [extraCondition]}
-                for extraCondition in envOrConditions
-                for andFilters in orFilters
-            ]
-
-        return orFilters
-
     def get_urns_by_filter(
         self,
         *,
@@ -709,7 +646,7 @@ def get_urns_by_filter(
         query = query or "*"
 
         # Env filter.
-        orFilters = self.generate_filter(
+        orFilters = generate_filter(
             platform, platform_instance, env, container, status, extraFilters
         )
 
@@ -778,98 +715,6 @@ def _scroll_across_entities(
                     f"Scrolling to next scrollAcrossEntities page: {scroll_id}"
                 )
 
-    def _get_env_or_conditions(self, env: str) -> List[SearchFilterRule]:
-        # The env filter is a bit more tricky since it's not always stored
-        # in the same place in ElasticSearch.
-        return [
-            # For most entity types, we look at the origin field.
-            {
-                "field": "origin",
-                "value": env,
-                "condition": "EQUAL",
-            },
-            # For containers, we look at the customProperties field.
-            # For any containers created after https://github.com/datahub-project/datahub/pull/8027,
-            # we look for the "env" property. Otherwise, we use the "instance" property.
-            {
-                "field": "customProperties",
-                "value": f"env={env}",
-            },
-            {
-                "field": "customProperties",
-                "value": f"instance={env}",
-            },
-            # Note that not all entity types have an env (e.g. dashboards / charts).
-            # If the env filter is specified, these will be excluded.
-        ]
-
-    def _get_status_filer(
-        self, status: RemovedStatusFilter
-    ) -> Optional[SearchFilterRule]:
-        if status == RemovedStatusFilter.NOT_SOFT_DELETED:
-            # Subtle: in some cases (e.g. when the dataset doesn't have a status aspect), the
-            # removed field is simply not present in the ElasticSearch document. Ideally this
-            # would be a "removed" : "false" filter, but that doesn't work. Instead, we need to
-            # use a negated filter.
-            return {
-                "field": "removed",
-                "values": ["true"],
-                "condition": "EQUAL",
-                "negated": True,
-            }
-
-        elif status == RemovedStatusFilter.ONLY_SOFT_DELETED:
-            return {
-                "field": "removed",
-                "values": ["true"],
-                "condition": "EQUAL",
-            }
-
-        elif status == RemovedStatusFilter.ALL:
-            # We don't need to add a filter for this case.
-            return None
-        else:
-            raise ValueError(f"Invalid status filter: {status}")
-
-    def _get_container_filter(self, container: str) -> SearchFilterRule:
-        # Warn if container is not a fully qualified urn.
-        # TODO: Change this once we have a first-class container urn type.
-        if guess_entity_type(container) != "container":
-            raise ValueError(f"Invalid container urn: {container}")
-
-        return {
-            "field": "browsePathV2",
-            "values": [container],
-            "condition": "CONTAIN",
-        }
-
-    def _get_platform_instance_filter(
-        self, platform: Optional[str], platform_instance: str
-    ) -> SearchFilterRule:
-        if platform:
-            # Massage the platform instance into a fully qualified urn, if necessary.
-            platform_instance = make_dataplatform_instance_urn(
-                platform, platform_instance
-            )
-
-        # Warn if platform_instance is not a fully qualified urn.
-        # TODO: Change this once we have a first-class data platform instance urn type.
-        if guess_entity_type(platform_instance) != "dataPlatformInstance":
-            raise ValueError(f"Invalid data platform instance urn: {platform_instance}")
-
-        return {
-            "field": "platformInstance",
-            "values": [platform_instance],
-            "condition": "EQUAL",
-        }
-
-    def _get_platform_filter(self, platform: str) -> SearchFilterRule:
-        return {
-            "field": "platform.keyword",
-            "values": [make_data_platform_urn(platform)],
-            "condition": "EQUAL",
-        }
-
     def _get_types(self, entity_types: Optional[List[str]]) -> Optional[List[str]]:
         types: Optional[List[str]] = None
         if entity_types is not None:
diff --git a/metadata-ingestion/src/datahub/ingestion/graph/filters.py b/metadata-ingestion/src/datahub/ingestion/graph/filters.py
new file mode 100644
index 0000000000000..1a63aea835729
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/graph/filters.py
@@ -0,0 +1,162 @@
+import enum
+from typing import Any, Dict, List, Optional
+
+from datahub.emitter.mce_builder import (
+    make_data_platform_urn,
+    make_dataplatform_instance_urn,
+)
+from datahub.utilities.urns.urn import guess_entity_type
+
+SearchFilterRule = Dict[str, Any]
+
+
+class RemovedStatusFilter(enum.Enum):
+    """Filter for the status of entities during search."""
+
+    NOT_SOFT_DELETED = "NOT_SOFT_DELETED"
+    """Search only entities that have not been marked as deleted."""
+
+    ALL = "ALL"
+    """Search all entities, including deleted entities."""
+
+    ONLY_SOFT_DELETED = "ONLY_SOFT_DELETED"
+    """Search only soft-deleted entities."""
+
+
+def generate_filter(
+    platform: Optional[str],
+    platform_instance: Optional[str],
+    env: Optional[str],
+    container: Optional[str],
+    status: RemovedStatusFilter,
+    extra_filters: Optional[List[SearchFilterRule]],
+) -> List[Dict[str, List[SearchFilterRule]]]:
+    and_filters: List[SearchFilterRule] = []
+
+    # Platform filter.
+    if platform:
+        and_filters.append(_get_platform_filter(platform))
+
+    # Platform instance filter.
+    if platform_instance:
+        and_filters.append(_get_platform_instance_filter(platform, platform_instance))
+
+    # Browse path v2 filter.
+    if container:
+        and_filters.append(_get_container_filter(container))
+
+    # Status filter.
+    status_filter = _get_status_filter(status)
+    if status_filter:
+        and_filters.append(status_filter)
+
+    # Extra filters.
+    if extra_filters:
+        and_filters += extra_filters
+
+    or_filters: List[Dict[str, List[SearchFilterRule]]] = [{"and": and_filters}]
+
+    # Env filter
+    if env:
+        env_filters = _get_env_filters(env)
+        # This matches ALL the and_filters and at least one of the envOrConditions.
+        or_filters = [
+            {"and": and_filter["and"] + [extraCondition]}
+            for extraCondition in env_filters
+            for and_filter in or_filters
+        ]
+
+    return or_filters
+
+
+def _get_env_filters(env: str) -> List[SearchFilterRule]:
+    # The env filter is a bit more tricky since it's not always stored
+    # in the same place in ElasticSearch.
+    return [
+        # For most entity types, we look at the origin field.
+        {
+            "field": "origin",
+            "value": env,
+            "condition": "EQUAL",
+        },
+        # For containers, we look at the customProperties field.
+        # For any containers created after https://github.com/datahub-project/datahub/pull/8027,
+        # we look for the "env" property. Otherwise, we use the "instance" property.
+        {
+            "field": "customProperties",
+            "value": f"env={env}",
+        },
+        {
+            "field": "customProperties",
+            "value": f"instance={env}",
+        },
+        # Note that not all entity types have an env (e.g. dashboards / charts).
+        # If the env filter is specified, these will be excluded.
+    ]
+
+
+def _get_status_filter(status: RemovedStatusFilter) -> Optional[SearchFilterRule]:
+    if status == RemovedStatusFilter.NOT_SOFT_DELETED:
+        # Subtle: in some cases (e.g. when the dataset doesn't have a status aspect), the
+        # removed field is simply not present in the ElasticSearch document. Ideally this
+        # would be a "removed" : "false" filter, but that doesn't work. Instead, we need to
+        # use a negated filter.
+        return {
+            "field": "removed",
+            "values": ["true"],
+            "condition": "EQUAL",
+            "negated": True,
+        }
+
+    elif status == RemovedStatusFilter.ONLY_SOFT_DELETED:
+        return {
+            "field": "removed",
+            "values": ["true"],
+            "condition": "EQUAL",
+        }
+
+    elif status == RemovedStatusFilter.ALL:
+        # We don't need to add a filter for this case.
+        return None
+    else:
+        raise ValueError(f"Invalid status filter: {status}")
+
+
+def _get_container_filter(container: str) -> SearchFilterRule:
+    # Warn if container is not a fully qualified urn.
+    # TODO: Change this once we have a first-class container urn type.
+    if guess_entity_type(container) != "container":
+        raise ValueError(f"Invalid container urn: {container}")
+
+    return {
+        "field": "browsePathV2",
+        "values": [container],
+        "condition": "CONTAIN",
+    }
+
+
+def _get_platform_instance_filter(
+    platform: Optional[str], platform_instance: str
+) -> SearchFilterRule:
+    if platform:
+        # Massage the platform instance into a fully qualified urn, if necessary.
+        platform_instance = make_dataplatform_instance_urn(platform, platform_instance)
+
+    # Warn if platform_instance is not a fully qualified urn.
+    # TODO: Change this once we have a first-class data platform instance urn type.
+    if guess_entity_type(platform_instance) != "dataPlatformInstance":
+        raise ValueError(f"Invalid data platform instance urn: {platform_instance}")
+
+    return {
+        "field": "platformInstance",
+        "values": [platform_instance],
+        "condition": "EQUAL",
+    }
+
+
+def _get_platform_filter(platform: str) -> SearchFilterRule:
+    return {
+        "field": "platform.keyword",
+        "values": [make_data_platform_urn(platform)],
+        "condition": "EQUAL",
+    }

From d33a85314dc50ca22178ace1612c705b146936db Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Thu, 28 Sep 2023 21:21:04 -0500
Subject: [PATCH 06/25] fix(docker): move base image to `-base` tag, full image
 to head (#8919)

Co-authored-by: Pedro Silva <pedro@acryl.io>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 .github/workflows/docker-unified.yml       | 18 +++++++-----------
 docker/datahub-ingestion-base/build.gradle |  2 ++
 docker/datahub-ingestion/build.gradle      |  2 ++
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml
index de3e0ca93e6b7..44785419ea63b 100644
--- a/.github/workflows/docker-unified.yml
+++ b/.github/workflows/docker-unified.yml
@@ -40,10 +40,8 @@ jobs:
     outputs:
       tag: ${{ steps.tag.outputs.tag }}
       slim_tag: ${{ steps.tag.outputs.slim_tag }}
-      full_tag: ${{ steps.tag.outputs.full_tag }}
       unique_tag: ${{ steps.tag.outputs.unique_tag }}
       unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
-      unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
       publish: ${{ steps.publish.outputs.publish }}
       python_release_version: ${{ steps.tag.outputs.python_release_version }}
     steps:
@@ -55,10 +53,8 @@ jobs:
           source .github/scripts/docker_helpers.sh
           echo "tag=$(get_tag)" >> $GITHUB_OUTPUT
           echo "slim_tag=$(get_tag)-slim" >> $GITHUB_OUTPUT
-          echo "full_tag=$(get_tag)-full" >> $GITHUB_OUTPUT
           echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
           echo "unique_slim_tag=$(get_unique_tag)-slim" >> $GITHUB_OUTPUT
-          echo "unique_full_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
           echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT
       - name: Check whether publishing enabled
         id: publish
@@ -459,7 +455,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute DataHub Ingestion (Base) Tag
         id: tag
-        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
   datahub_ingestion_base_slim_build:
     name: Build and Push DataHub Ingestion (Base-Slim) Docker Image
     runs-on: ubuntu-latest
@@ -531,7 +527,7 @@ jobs:
           target: full-install
           images: |
             ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
-          tags: ${{ needs.setup.outputs.unique_full_tag }}
+          tags: ${{ needs.setup.outputs.tag }}
           username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
           password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
           build-args: |
@@ -543,7 +539,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute DataHub Ingestion (Base-Full) Tag
         id: tag
-        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
 
 
   datahub_ingestion_slim_build:
@@ -656,7 +652,7 @@ jobs:
         uses: ishworkh/docker-image-artifact-download@v1
         if: ${{ needs.setup.outputs.publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }}
         with:
-          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}
+          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}
       - name: Build and push Full Image
         if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
         uses: ./.github/actions/docker-custom-build-and-push
@@ -666,9 +662,9 @@ jobs:
             ${{ env.DATAHUB_INGESTION_IMAGE }}
           build-args: |
             BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
-            DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}
+            DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}
             RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
-          tags: ${{ needs.setup.outputs.unique_full_tag }}
+          tags: ${{ needs.setup.outputs.tag }}
           username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
           password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
           publish: ${{ needs.setup.outputs.publish }}
@@ -677,7 +673,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute Tag (Full)
         id: tag
-        run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
   datahub_ingestion_full_scan:
     permissions:
       contents: read # for actions/checkout to fetch code
diff --git a/docker/datahub-ingestion-base/build.gradle b/docker/datahub-ingestion-base/build.gradle
index 10cd2ee71cce3..fe65bccde4a07 100644
--- a/docker/datahub-ingestion-base/build.gradle
+++ b/docker/datahub-ingestion-base/build.gradle
@@ -9,6 +9,8 @@ ext {
     docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
     docker_repo = 'datahub-ingestion-base'
     docker_dir = 'datahub-ingestion-base'
+
+    revision = 0 // increment to trigger rebuild
 }
 
 docker {
diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle
index 307594018c92e..293437376ae71 100644
--- a/docker/datahub-ingestion/build.gradle
+++ b/docker/datahub-ingestion/build.gradle
@@ -9,6 +9,8 @@ ext {
     docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
     docker_repo = 'datahub-ingestion'
     docker_dir = 'datahub-ingestion'
+
+    revision = 0 // increment to trigger rebuild
 }
 
 dependencies {

From b45b7f7d2a369a5ecc3c9b0900acd98b0f0a2734 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Fri, 29 Sep 2023 17:49:36 -0500
Subject: [PATCH 07/25] fix(docker): slim tags (#8922)

---
 .github/scripts/docker_helpers.sh          | 16 ++++++++++++++++
 .github/workflows/docker-unified.yml       | 14 +++++++++-----
 docker/datahub-ingestion-base/build.gradle |  2 +-
 docker/datahub-ingestion/build.gradle      |  2 +-
 4 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/.github/scripts/docker_helpers.sh b/.github/scripts/docker_helpers.sh
index 63c53b2c3d02f..a74d90455acd6 100755
--- a/.github/scripts/docker_helpers.sh
+++ b/.github/scripts/docker_helpers.sh
@@ -15,10 +15,26 @@ function get_tag {
     echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
 }
 
+function get_tag_slim {
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
+}
+
+function get_tag_full {
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
+}
+
 function get_python_docker_release_v {
     echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},0.0.0+docker.${SHORT_SHA},g" -e 's,refs/tags/v\(.*\),\1+docker,g' -e 's,refs/pull/\([0-9]*\).*,0.0.0+docker.pr\1,g')
 }
 
 function get_unique_tag {
     echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
+}
+
+function get_unique_tag_slim {
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
+}
+
+function get_unique_tag_full {
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
 }
\ No newline at end of file
diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml
index 44785419ea63b..2aae6bf51529d 100644
--- a/.github/workflows/docker-unified.yml
+++ b/.github/workflows/docker-unified.yml
@@ -40,8 +40,10 @@ jobs:
     outputs:
       tag: ${{ steps.tag.outputs.tag }}
       slim_tag: ${{ steps.tag.outputs.slim_tag }}
+      full_tag: ${{ steps.tag.outputs.full_tag }}
       unique_tag: ${{ steps.tag.outputs.unique_tag }}
       unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
+      unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
       publish: ${{ steps.publish.outputs.publish }}
       python_release_version: ${{ steps.tag.outputs.python_release_version }}
     steps:
@@ -52,9 +54,11 @@ jobs:
         run: |
           source .github/scripts/docker_helpers.sh
           echo "tag=$(get_tag)" >> $GITHUB_OUTPUT
-          echo "slim_tag=$(get_tag)-slim" >> $GITHUB_OUTPUT
+          echo "slim_tag=$(get_tag_slim)" >> $GITHUB_OUTPUT
+          echo "full_tag=$(get_tag_full)" >> $GITHUB_OUTPUT
           echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
-          echo "unique_slim_tag=$(get_unique_tag)-slim" >> $GITHUB_OUTPUT
+          echo "unique_slim_tag=$(get_unique_tag_slim)" >> $GITHUB_OUTPUT
+          echo "unique_full_tag=$(get_unique_tag_full)" >> $GITHUB_OUTPUT
           echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT
       - name: Check whether publishing enabled
         id: publish
@@ -520,14 +524,14 @@ jobs:
         if: ${{ needs.setup.outputs.publish != 'true' &&  steps.filter.outputs.datahub-ingestion-base == 'true' }}
         with:
           image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}
-      - name: Build and push Base-Full Image
+      - name: Build and push (Base-Full) Image
         if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' }}
         uses: ./.github/actions/docker-custom-build-and-push
         with:
           target: full-install
           images: |
             ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
-          tags: ${{ needs.setup.outputs.tag }}
+          tags: ${{ needs.setup.outputs.full_tag }}
           username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
           password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
           build-args: |
@@ -539,7 +543,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute DataHub Ingestion (Base-Full) Tag
         id: tag
-        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
 
 
   datahub_ingestion_slim_build:
diff --git a/docker/datahub-ingestion-base/build.gradle b/docker/datahub-ingestion-base/build.gradle
index fe65bccde4a07..84dd7f557bf9b 100644
--- a/docker/datahub-ingestion-base/build.gradle
+++ b/docker/datahub-ingestion-base/build.gradle
@@ -10,7 +10,7 @@ ext {
     docker_repo = 'datahub-ingestion-base'
     docker_dir = 'datahub-ingestion-base'
 
-    revision = 0 // increment to trigger rebuild
+    revision = 1 // increment to trigger rebuild
 }
 
 docker {
diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle
index 293437376ae71..6dd20c15a055b 100644
--- a/docker/datahub-ingestion/build.gradle
+++ b/docker/datahub-ingestion/build.gradle
@@ -10,7 +10,7 @@ ext {
     docker_repo = 'datahub-ingestion'
     docker_dir = 'datahub-ingestion'
 
-    revision = 0 // increment to trigger rebuild
+    revision = 1 // increment to trigger rebuild
 }
 
 dependencies {

From 4d9a7ce7c94419307617bb8ad56477a301516de2 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Fri, 29 Sep 2023 23:07:45 -0500
Subject: [PATCH 08/25] ci: Docker slim tag fix (#8925)

---
 .github/scripts/docker_helpers.sh          | 8 ++++----
 docker/datahub-ingestion-base/build.gradle | 2 +-
 docker/datahub-ingestion/build.gradle      | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/scripts/docker_helpers.sh b/.github/scripts/docker_helpers.sh
index a74d90455acd6..f238c5c409184 100755
--- a/.github/scripts/docker_helpers.sh
+++ b/.github/scripts/docker_helpers.sh
@@ -16,11 +16,11 @@ function get_tag {
 }
 
 function get_tag_slim {
-    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim\,${SHORT_SHA}-slim,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
 }
 
 function get_tag_full {
-    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full\,${SHORT_SHA}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
 }
 
 function get_python_docker_release_v {
@@ -32,9 +32,9 @@ function get_unique_tag {
 }
 
 function get_unique_tag_slim {
-    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-slim,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
 }
 
 function get_unique_tag_full {
-    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
 }
\ No newline at end of file
diff --git a/docker/datahub-ingestion-base/build.gradle b/docker/datahub-ingestion-base/build.gradle
index 84dd7f557bf9b..bbd8242553cc5 100644
--- a/docker/datahub-ingestion-base/build.gradle
+++ b/docker/datahub-ingestion-base/build.gradle
@@ -10,7 +10,7 @@ ext {
     docker_repo = 'datahub-ingestion-base'
     docker_dir = 'datahub-ingestion-base'
 
-    revision = 1 // increment to trigger rebuild
+    revision = 2 // increment to trigger rebuild
 }
 
 docker {
diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle
index 6dd20c15a055b..fed33752a4b81 100644
--- a/docker/datahub-ingestion/build.gradle
+++ b/docker/datahub-ingestion/build.gradle
@@ -10,7 +10,7 @@ ext {
     docker_repo = 'datahub-ingestion'
     docker_dir = 'datahub-ingestion'
 
-    revision = 1 // increment to trigger rebuild
+    revision = 2 // increment to trigger rebuild
 }
 
 dependencies {

From b61c38ab0539f546b65f3122962a0f84d215e581 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Sat, 30 Sep 2023 22:47:59 -0500
Subject: [PATCH 09/25] =?UTF-8?q?refactor(misc):=20testngJava=20fix,=20sys?=
 =?UTF-8?q?temrestli=20client,=20cache=20key=20fix,=20e=E2=80=A6=20(#8926)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build.gradle                                  |  2 +-
 .../system/elasticsearch/util/IndexUtils.java |  2 +-
 ...pgradeCliApplicationTestConfiguration.java |  4 +
 .../linkedin/metadata/entity/AspectDao.java   |  4 +
 .../metadata/entity/EntityServiceImpl.java    |  9 +-
 .../entity/cassandra/CassandraAspectDao.java  |  7 ++
 .../metadata/entity/ebean/EbeanAspectDao.java | 13 +++
 .../elastic/ElasticSearchGraphService.java    |  4 +-
 .../elasticsearch/ElasticSearchService.java   |  4 +-
 .../indexbuilder/ESIndexBuilder.java          | 27 ++++--
 .../indexbuilder/EntityIndexBuilder.java      | 35 --------
 .../indexbuilder/EntityIndexBuilders.java     | 59 +++++++------
 .../indexbuilder/MappingsBuilder.java         | 86 +++++++++++--------
 .../indexbuilder/ReindexConfig.java           | 24 ++++--
 .../SearchDocumentTransformer.java            | 23 +++--
 .../service/UpdateIndicesService.java         | 50 ++++++++---
 .../metadata/shared/ElasticSearchIndexed.java |  2 +-
 .../ElasticSearchSystemMetadataService.java   |  4 +-
 .../ElasticSearchTimeseriesAspectService.java |  5 +-
 .../TimeseriesAspectIndexBuilders.java        |  5 +-
 .../entity/EbeanAspectMigrationsDaoTest.java  | 31 +++++--
 .../io/datahubproject/test/DataGenerator.java | 22 ++++-
 .../src/main/resources/application.properties |  2 +-
 ...eConsumerApplicationTestConfiguration.java |  4 +
 .../kafka/MetadataChangeLogProcessor.java     |  7 +-
 .../kafka/hook/MetadataChangeLogHook.java     |  8 ++
 .../kafka/hook/UpdateIndicesHook.java         |  2 +-
 .../kafka/hook/UpdateIndicesHookTest.java     | 15 +++-
 .../spring/MCLSpringTestConfiguration.java    |  4 +
 ...eConsumerApplicationTestConfiguration.java |  4 +
 .../src/main/resources/application.yml        |  2 +-
 .../factory/entity/EntityServiceFactory.java  | 18 ++--
 .../entity/JavaEntityClientFactory.java       |  9 +-
 .../indices/UpdateIndicesServiceFactory.java  | 28 +++++-
 .../search/ElasticSearchServiceFactory.java   |  7 +-
 .../search/EntityIndexBuildersFactory.java    | 35 ++++++++
 .../entity/client/EntityClientCache.java      | 12 ++-
 .../metadata/entity/EntityService.java        |  8 ++
 38 files changed, 404 insertions(+), 183 deletions(-)
 delete mode 100644 metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilder.java
 create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/EntityIndexBuildersFactory.java

diff --git a/build.gradle b/build.gradle
index 0a94991b131aa..c8892045a6683 100644
--- a/build.gradle
+++ b/build.gradle
@@ -291,7 +291,7 @@ subprojects {
     maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
 
     if (project.configurations.getByName("testImplementation").getDependencies()
-            .any{ it.getName() == "testng" }) {
+            .any{ it.getName().contains("testng") }) {
       useTestNG()
     }
   }
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java
index 4b04feac62cbf..d9788448444ed 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java
@@ -31,7 +31,7 @@ public static List<ReindexConfig> getAllReindexConfigs(List<ElasticSearchIndexed
     List<ReindexConfig> reindexConfigs = new ArrayList<>(_reindexConfigs);
     if (reindexConfigs.isEmpty()) {
       for (ElasticSearchIndexed elasticSearchIndexed : elasticSearchIndexedList) {
-        reindexConfigs.addAll(elasticSearchIndexed.getReindexConfigs());
+        reindexConfigs.addAll(elasticSearchIndexed.buildReindexConfigs());
       }
       _reindexConfigs = new ArrayList<>(reindexConfigs);
     }
diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java
index b1bdead58a72b..6cc853b2c7c4d 100644
--- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java
+++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java
@@ -6,6 +6,7 @@
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.search.SearchService;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import io.ebean.Database;
 import org.springframework.boot.test.context.TestConfiguration;
 import org.springframework.boot.test.mock.mockito.MockBean;
@@ -35,4 +36,7 @@ public class UpgradeCliApplicationTestConfiguration {
 
     @MockBean
     ConfigEntityRegistry configEntityRegistry;
+
+    @MockBean
+    public EntityIndexBuilders entityIndexBuilders;
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java
index 2d5c5e23ae528..42dd3f0405a6a 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java
@@ -8,6 +8,7 @@
 import io.ebean.PagedList;
 import io.ebean.Transaction;
 
+import java.util.stream.Stream;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import java.sql.Timestamp;
@@ -103,6 +104,9 @@ Integer countAspect(
     @Nonnull
     PagedList<EbeanAspectV2> getPagedAspects(final RestoreIndicesArgs args);
 
+    @Nonnull
+    Stream<EntityAspect> streamAspects(String entityName, String aspectName);
+
     int deleteUrn(@Nullable Transaction tx, @Nonnull final String urn);
 
     @Nonnull
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
index 66188473b9d03..57f88e31deea5 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
@@ -3,6 +3,7 @@
 import com.codahale.metrics.Timer;
 import com.linkedin.data.template.GetMode;
 import com.linkedin.data.template.SetMode;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.metadata.config.PreProcessHooks;
 import com.datahub.util.RecordUtils;
 import com.datahub.util.exception.ModelConversionException;
@@ -93,6 +94,7 @@
 import javax.persistence.EntityNotFoundException;
 
 import io.ebean.Transaction;
+import lombok.Getter;
 import lombok.extern.slf4j.Slf4j;
 
 import static com.linkedin.metadata.Constants.*;
@@ -144,11 +146,11 @@ public class EntityServiceImpl implements EntityService {
   private final Map<String, Set<String>> _entityToValidAspects;
   private RetentionService _retentionService;
   private final Boolean _alwaysEmitChangeLog;
+  @Getter
   private final UpdateIndicesService _updateIndicesService;
   private final PreProcessHooks _preProcessHooks;
   protected static final int MAX_KEYS_PER_QUERY = 500;
 
-
   private final Integer ebeanMaxTransactionRetry;
 
   public EntityServiceImpl(
@@ -180,6 +182,11 @@ public EntityServiceImpl(
     ebeanMaxTransactionRetry = retry != null ? retry : DEFAULT_MAX_TRANSACTION_RETRY;
   }
 
+  @Override
+  public void setSystemEntityClient(SystemEntityClient systemEntityClient) {
+    this._updateIndicesService.setSystemEntityClient(systemEntityClient);
+  }
+
   /**
    * Retrieves the latest aspects corresponding to a batch of {@link Urn}s based on a provided
    * set of aspect names.
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java
index b215dd4a5d1ed..9f4a36efb4501 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java
@@ -41,6 +41,7 @@
 import java.util.Set;
 import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 
@@ -445,6 +446,12 @@ public PagedList<EbeanAspectV2> getPagedAspects(final RestoreIndicesArgs args) {
     return null;
   }
 
+  @Nonnull
+  @Override
+  public Stream<EntityAspect> streamAspects(String entityName, String aspectName) {
+    // Not implemented
+    return null;
+  }
 
   @Override
   @Nonnull
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
index 30886db264994..c16c98b34f3eb 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
@@ -42,6 +42,7 @@
 import java.util.Set;
 import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 
@@ -433,6 +434,18 @@ public PagedList<EbeanAspectV2> getPagedAspects(final RestoreIndicesArgs args) {
             .findPagedList();
   }
 
+  @Override
+  @Nonnull
+  public Stream<EntityAspect> streamAspects(String entityName, String aspectName) {
+    ExpressionList<EbeanAspectV2> exp = _server.find(EbeanAspectV2.class)
+        .select(EbeanAspectV2.ALL_COLUMNS)
+        .where()
+        .eq(EbeanAspectV2.VERSION_COLUMN, ASPECT_LATEST_VERSION)
+        .eq(EbeanAspectV2.ASPECT_COLUMN, aspectName)
+        .like(EbeanAspectV2.URN_COLUMN, "urn:li:" + entityName + ":%");
+    return exp.query().findStream().map(EbeanAspectV2::toEntityAspect);
+  }
+
   @Override
   @Nonnull
   public Iterable<String> listAllUrns(int start, int pageSize) {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
index 02e36af343b07..5fdf4d45ffa3b 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
@@ -318,7 +318,7 @@ public void removeEdgesFromNode(
   public void configure() {
     log.info("Setting up elastic graph index");
     try {
-      for (ReindexConfig config : getReindexConfigs()) {
+      for (ReindexConfig config : buildReindexConfigs()) {
         _indexBuilder.buildIndex(config);
       }
     } catch (IOException e) {
@@ -327,7 +327,7 @@ public void configure() {
   }
 
   @Override
-  public List<ReindexConfig> getReindexConfigs() throws IOException {
+  public List<ReindexConfig> buildReindexConfigs() throws IOException {
     return List.of(_indexBuilder.buildReindexState(_indexConvention.getIndexName(INDEX_NAME),
             GraphRelationshipMappingsBuilder.getMappings(), Collections.emptyMap()));
   }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
index bf4dffe9e5fb8..ef5a555e95ba8 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
@@ -46,8 +46,8 @@ public void configure() {
   }
 
   @Override
-  public List<ReindexConfig> getReindexConfigs() {
-    return indexBuilders.getReindexConfigs();
+  public List<ReindexConfig> buildReindexConfigs() {
+    return indexBuilders.buildReindexConfigs();
   }
 
   @Override
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java
index 10c2fd725dca9..43431e93622f7 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java
@@ -206,12 +206,7 @@ public void buildIndex(ReindexConfig indexState) throws IOException {
       // no need to reindex and only new mappings or dynamic settings
 
       // Just update the additional mappings
-      if (indexState.isPureMappingsAddition()) {
-        log.info("Updating index {} mappings in place.", indexState.name());
-        PutMappingRequest request = new PutMappingRequest(indexState.name()).source(indexState.targetMappings());
-        _searchClient.indices().putMapping(request, RequestOptions.DEFAULT);
-        log.info("Updated index {} with new mappings", indexState.name());
-      }
+      applyMappings(indexState, true);
 
       if (indexState.requiresApplySettings()) {
         UpdateSettingsRequest request = new UpdateSettingsRequest(indexState.name());
@@ -234,6 +229,26 @@ public void buildIndex(ReindexConfig indexState) throws IOException {
     }
   }
 
+  /**
+   * Apply mappings changes if reindex is not required
+   * @param indexState the state of the current and target index settings/mappings
+   * @param suppressError during reindex logic this is not an error, for structured properties it is an error
+   * @throws IOException communication issues with ES
+   */
+  public void applyMappings(ReindexConfig indexState, boolean suppressError) throws IOException {
+    if (indexState.isPureMappingsAddition()) {
+      log.info("Updating index {} mappings in place.", indexState.name());
+      PutMappingRequest request = new PutMappingRequest(indexState.name()).source(indexState.targetMappings());
+      _searchClient.indices().putMapping(request, RequestOptions.DEFAULT);
+      log.info("Updated index {} with new mappings", indexState.name());
+    } else {
+      if (!suppressError) {
+        log.error("Attempted to apply invalid mappings. Current: {} Target: {}", indexState.currentMappings(),
+                indexState.targetMappings());
+      }
+    }
+  }
+
   public String reindexInPlaceAsync(String indexAlias, @Nullable QueryBuilder filterQuery, BatchWriteOperationsOptions options, ReindexConfig config)
       throws Exception {
     GetAliasesResponse aliasesResponse = _searchClient.indices().getAlias(
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilder.java
deleted file mode 100644
index 04c9f1993ff35..0000000000000
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilder.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package com.linkedin.metadata.search.elasticsearch.indexbuilder;
-
-import com.linkedin.metadata.models.EntitySpec;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-import com.linkedin.metadata.shared.ElasticSearchIndexed;
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-
-
-@Slf4j
-@RequiredArgsConstructor
-public class EntityIndexBuilder implements ElasticSearchIndexed {
-  private final ESIndexBuilder indexBuilder;
-  private final EntitySpec entitySpec;
-  private final SettingsBuilder settingsBuilder;
-  private final String indexName;
-
-  @Override
-  public void reindexAll() throws IOException {
-    log.info("Setting up index: {}", indexName);
-    for (ReindexConfig config : getReindexConfigs()) {
-      indexBuilder.buildIndex(config);
-    }
-  }
-
-  @Override
-  public List<ReindexConfig> getReindexConfigs() throws IOException {
-    Map<String, Object> mappings = MappingsBuilder.getMappings(entitySpec);
-    Map<String, Object> settings = settingsBuilder.getSettings();
-    return List.of(indexBuilder.buildReindexState(indexName, mappings, settings));
-  }
-}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java
index f38418058ca6d..56cb26b09dc33 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java
@@ -3,8 +3,10 @@
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.shared.ElasticSearchIndexed;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
+
 import java.io.IOException;
 import java.util.List;
+import java.util.Map;
 import java.util.stream.Collectors;
 
 import lombok.RequiredArgsConstructor;
@@ -14,32 +16,37 @@
 @RequiredArgsConstructor
 @Slf4j
 public class EntityIndexBuilders implements ElasticSearchIndexed {
-  private final ESIndexBuilder indexBuilder;
-  private final EntityRegistry entityRegistry;
-  private final IndexConvention indexConvention;
-  private final SettingsBuilder settingsBuilder;
-
-  @Override
-  public void reindexAll() {
-      for (ReindexConfig config : getReindexConfigs()) {
-          try {
-              indexBuilder.buildIndex(config);
-          } catch (IOException e) {
-              throw new RuntimeException(e);
-          }
-      }
-  }
-
-  @Override
-  public List<ReindexConfig> getReindexConfigs() {
-    return entityRegistry.getEntitySpecs().values().stream().flatMap(entitySpec -> {
-                      try {
-                        return new EntityIndexBuilder(indexBuilder, entitySpec, settingsBuilder, indexConvention.getIndexName(entitySpec))
-                                .getReindexConfigs().stream();
-                      } catch (IOException e) {
+    private final ESIndexBuilder indexBuilder;
+    private final EntityRegistry entityRegistry;
+    private final IndexConvention indexConvention;
+    private final SettingsBuilder settingsBuilder;
+
+    public ESIndexBuilder getIndexBuilder() {
+        return indexBuilder;
+    }
+
+    @Override
+    public void reindexAll() {
+        for (ReindexConfig config : buildReindexConfigs()) {
+            try {
+                indexBuilder.buildIndex(config);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+    }
+
+    @Override
+    public List<ReindexConfig> buildReindexConfigs() {
+        Map<String, Object> settings = settingsBuilder.getSettings();
+        return entityRegistry.getEntitySpecs().values().stream().map(entitySpec -> {
+                    try {
+                        Map<String, Object> mappings = MappingsBuilder.getMappings(entitySpec);
+                        return indexBuilder.buildReindexState(indexConvention.getIndexName(entitySpec), mappings, settings);
+                    } catch (IOException e) {
                         throw new RuntimeException(e);
-                      }
                     }
-            ).collect(Collectors.toList());
-  }
+                }
+        ).collect(Collectors.toList());
+    }
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
index b3e05d966e36b..004b2e0a2adc4 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
@@ -51,6 +51,8 @@ public static Map<String, String> getPartialNgramConfigWithOverrides(Map<String,
   public static final String ALIAS = "alias";
   public static final String PATH = "path";
 
+  public static final String PROPERTIES = "properties";
+
   private MappingsBuilder() {
   }
 
@@ -66,7 +68,7 @@ public static Map<String, Object> getMappings(@Nonnull final EntitySpec entitySp
     mappings.put("urn", getMappingsForUrn());
     mappings.put("runId", getMappingsForRunId());
 
-    return ImmutableMap.of("properties", mappings);
+    return ImmutableMap.of(PROPERTIES, mappings);
   }
 
   private static Map<String, Object> getMappingsForUrn() {
@@ -98,42 +100,9 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
     Map<String, Object> mappings = new HashMap<>();
     Map<String, Object> mappingForField = new HashMap<>();
     if (fieldType == FieldType.KEYWORD) {
-      mappingForField.put(TYPE, KEYWORD);
-      mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER);
-      // Add keyword subfield without lowercase filter
-      mappingForField.put(FIELDS, ImmutableMap.of(KEYWORD, KEYWORD_TYPE_MAP));
+      mappingForField.putAll(getMappingsForKeyword());
     } else if (fieldType == FieldType.TEXT || fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) {
-      mappingForField.put(TYPE, KEYWORD);
-      mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER);
-      Map<String, Object> subFields = new HashMap<>();
-      if (fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) {
-        subFields.put(NGRAM, getPartialNgramConfigWithOverrides(
-                ImmutableMap.of(
-                        ANALYZER, PARTIAL_ANALYZER
-                )
-        ));
-        if (fieldType == FieldType.WORD_GRAM) {
-          for (Map.Entry<String, String> entry : Map.of(
-              WORD_GRAMS_LENGTH_2, WORD_GRAM_2_ANALYZER,
-              WORD_GRAMS_LENGTH_3, WORD_GRAM_3_ANALYZER,
-              WORD_GRAMS_LENGTH_4, WORD_GRAM_4_ANALYZER).entrySet()) {
-            String fieldName = entry.getKey();
-            String analyzerName = entry.getValue();
-            subFields.put(fieldName, ImmutableMap.of(
-                TYPE, TEXT,
-                ANALYZER, analyzerName
-            ));
-          }
-        }
-      }
-      subFields.put(DELIMITED, ImmutableMap.of(
-              TYPE, TEXT,
-              ANALYZER, TEXT_ANALYZER,
-              SEARCH_ANALYZER, TEXT_SEARCH_ANALYZER,
-              SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER));
-      // Add keyword subfield without lowercase filter
-      subFields.put(KEYWORD, KEYWORD_TYPE_MAP);
-      mappingForField.put(FIELDS, subFields);
+      mappingForField.putAll(getMappingsForSearchText(fieldType));
     } else if (fieldType == FieldType.BROWSE_PATH) {
       mappingForField.put(TYPE, TEXT);
       mappingForField.put(FIELDS,
@@ -189,6 +158,51 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
     return mappings;
   }
 
+  private static Map<String, Object> getMappingsForKeyword() {
+    Map<String, Object> mappingForField = new HashMap<>();
+    mappingForField.put(TYPE, KEYWORD);
+    mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER);
+    // Add keyword subfield without lowercase filter
+    mappingForField.put(FIELDS, ImmutableMap.of(KEYWORD, KEYWORD_TYPE_MAP));
+    return mappingForField;
+  }
+
+  private static Map<String, Object> getMappingsForSearchText(FieldType fieldType) {
+    Map<String, Object> mappingForField = new HashMap<>();
+    mappingForField.put(TYPE, KEYWORD);
+    mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER);
+    Map<String, Object> subFields = new HashMap<>();
+    if (fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) {
+      subFields.put(NGRAM, getPartialNgramConfigWithOverrides(
+              ImmutableMap.of(
+                      ANALYZER, PARTIAL_ANALYZER
+              )
+      ));
+      if (fieldType == FieldType.WORD_GRAM) {
+        for (Map.Entry<String, String> entry : Map.of(
+                WORD_GRAMS_LENGTH_2, WORD_GRAM_2_ANALYZER,
+                WORD_GRAMS_LENGTH_3, WORD_GRAM_3_ANALYZER,
+                WORD_GRAMS_LENGTH_4, WORD_GRAM_4_ANALYZER).entrySet()) {
+          String fieldName = entry.getKey();
+          String analyzerName = entry.getValue();
+          subFields.put(fieldName, ImmutableMap.of(
+                  TYPE, TEXT,
+                  ANALYZER, analyzerName
+          ));
+        }
+      }
+    }
+    subFields.put(DELIMITED, ImmutableMap.of(
+            TYPE, TEXT,
+            ANALYZER, TEXT_ANALYZER,
+            SEARCH_ANALYZER, TEXT_SEARCH_ANALYZER,
+            SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER));
+    // Add keyword subfield without lowercase filter
+    subFields.put(KEYWORD, KEYWORD_TYPE_MAP);
+    mappingForField.put(FIELDS, subFields);
+    return mappingForField;
+  }
+
   private static Map<String, Object> getMappingsForSearchScoreField(
       @Nonnull final SearchScoreFieldSpec searchScoreFieldSpec) {
     return ImmutableMap.of(searchScoreFieldSpec.getSearchScoreAnnotation().getFieldName(),
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java
index 4f5f2926d3da0..8b8a48f5d9cda 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java
@@ -121,13 +121,14 @@ public ReindexConfig build() {
             if (super.exists) {
                 /* Consider mapping changes */
                 MapDifference<String, Object> mappingsDiff = Maps.difference(
-                        (TreeMap<String, Object>) super.currentMappings.getOrDefault("properties", new TreeMap()),
-                        (TreeMap<String, Object>) super.targetMappings.getOrDefault("properties", new TreeMap()));
+                        getOrDefault(super.currentMappings, List.of("properties")),
+                        getOrDefault(super.targetMappings, List.of("properties")));
                 super.requiresApplyMappings = !mappingsDiff.entriesDiffering().isEmpty()
                         || !mappingsDiff.entriesOnlyOnRight().isEmpty();
                 super.isPureMappingsAddition = super.requiresApplyMappings
                         && mappingsDiff.entriesDiffering().isEmpty()
                         && !mappingsDiff.entriesOnlyOnRight().isEmpty();
+
                 if (super.requiresApplyMappings && super.isPureMappingsAddition) {
                     log.info("Index: {} - New fields have been added to index. Adding: {}",
                             super.name, mappingsDiff.entriesOnlyOnRight());
@@ -171,8 +172,21 @@ public ReindexConfig build() {
             return super.build();
         }
 
+        private static TreeMap<String, Object> getOrDefault(Map<String, Object> map, List<String> path) {
+            if (map == null) {
+                return new TreeMap<>();
+            }
+
+            TreeMap<String, Object> item = (TreeMap<String, Object>) map.getOrDefault(path.get(0), new TreeMap());
+            if (path.size() == 1) {
+                return item;
+            } else {
+                return getOrDefault(item, path.subList(1, path.size()));
+            }
+        }
+
         private boolean isAnalysisEqual() {
-            if (!super.targetSettings.containsKey("index")) {
+            if (super.targetSettings == null || !super.targetSettings.containsKey("index")) {
                 return true;
             }
             Map<String, Object> indexSettings = (Map<String, Object>) super.targetSettings.get("index");
@@ -186,7 +200,7 @@ private boolean isAnalysisEqual() {
         }
 
         private boolean isSettingsEqual() {
-            if (!super.targetSettings.containsKey("index")) {
+            if (super.targetSettings == null || !super.targetSettings.containsKey("index")) {
                 return true;
             }
             Map<String, Object> indexSettings = (Map<String, Object>) super.targetSettings.get("index");
@@ -196,7 +210,7 @@ private boolean isSettingsEqual() {
         }
 
         private boolean isSettingsReindexRequired() {
-            if (!super.targetSettings.containsKey("index")) {
+            if (super.targetSettings == null || !super.targetSettings.containsKey("index")) {
                 return false;
             }
             Map<String, Object> indexSettings = (Map<String, Object>) super.targetSettings.get("index");
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
index 76f4736f2746e..49809cf933936 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
@@ -7,6 +7,7 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.data.schema.DataSchema;
 import com.linkedin.data.template.RecordTemplate;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.metadata.models.AspectSpec;
 import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.models.SearchScoreFieldSpec;
@@ -21,6 +22,7 @@
 import java.util.stream.Collectors;
 
 import lombok.RequiredArgsConstructor;
+import lombok.Setter;
 import lombok.extern.slf4j.Slf4j;
 
 import javax.annotation.Nonnull;
@@ -30,6 +32,7 @@
  * Class that provides a utility function that transforms the snapshot object into a search document
  */
 @Slf4j
+@Setter
 @RequiredArgsConstructor
 public class SearchDocumentTransformer {
 
@@ -42,6 +45,8 @@ public class SearchDocumentTransformer {
   // Maximum customProperties value length
   private final int maxValueLength;
 
+  private SystemEntityClient entityClient;
+
    private static final String BROWSE_PATH_V2_DELIMITER = "␟";
 
   public Optional<String> transformSnapshot(final RecordTemplate snapshot, final EntitySpec entitySpec,
@@ -72,14 +77,18 @@ public Optional<String> transformAspect(
         FieldExtractor.extractFields(aspect, aspectSpec.getSearchableFieldSpecs(), maxValueLength);
     final Map<SearchScoreFieldSpec, List<Object>> extractedSearchScoreFields =
         FieldExtractor.extractFields(aspect, aspectSpec.getSearchScoreFieldSpecs(), maxValueLength);
-    if (extractedSearchableFields.isEmpty() && extractedSearchScoreFields.isEmpty()) {
-      return Optional.empty();
+
+    Optional<String> result = Optional.empty();
+
+    if (!extractedSearchableFields.isEmpty() || !extractedSearchScoreFields.isEmpty()) {
+      final ObjectNode searchDocument = JsonNodeFactory.instance.objectNode();
+      searchDocument.put("urn", urn.toString());
+      extractedSearchableFields.forEach((key, values) -> setSearchableValue(key, values, searchDocument, forDelete));
+      extractedSearchScoreFields.forEach((key, values) -> setSearchScoreValue(key, values, searchDocument, forDelete));
+      result = Optional.of(searchDocument.toString());
     }
-    final ObjectNode searchDocument = JsonNodeFactory.instance.objectNode();
-    searchDocument.put("urn", urn.toString());
-    extractedSearchableFields.forEach((key, values) -> setSearchableValue(key, values, searchDocument, forDelete));
-    extractedSearchScoreFields.forEach((key, values) -> setSearchScoreValue(key, values, searchDocument, forDelete));
-    return Optional.of(searchDocument.toString());
+
+    return result;
   }
 
   public void setSearchableValue(final SearchableFieldSpec fieldSpec, final List<Object> fieldValues,
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
index 36b685f084d51..ea7286112f870 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
@@ -12,6 +12,7 @@
 import com.linkedin.data.template.RecordTemplate;
 import com.linkedin.dataset.FineGrainedLineage;
 import com.linkedin.dataset.UpstreamLineage;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.events.metadata.ChangeType;
 import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.graph.Edge;
@@ -28,6 +29,7 @@
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.RelationshipDirection;
 import com.linkedin.metadata.search.EntitySearchService;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
 import com.linkedin.metadata.search.utils.SearchUtils;
 import com.linkedin.metadata.systemmetadata.SystemMetadataService;
@@ -39,6 +41,8 @@
 import com.linkedin.mxe.MetadataChangeLog;
 import com.linkedin.mxe.SystemMetadata;
 import com.linkedin.util.Pair;
+
+import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.URLEncoder;
 import java.util.ArrayList;
@@ -68,6 +72,7 @@ public class UpdateIndicesService {
   private final SystemMetadataService _systemMetadataService;
   private final EntityRegistry _entityRegistry;
   private final SearchDocumentTransformer _searchDocumentTransformer;
+  private final EntityIndexBuilders _entityIndexBuilders;
 
   @Value("${featureFlags.graphServiceDiffModeEnabled:true}")
   private boolean _graphDiffMode;
@@ -90,25 +95,31 @@ public void setSearchDiffMode(boolean searchDiffMode) {
   }
 
   public UpdateIndicesService(
-      GraphService graphService,
-      EntitySearchService entitySearchService,
-      TimeseriesAspectService timeseriesAspectService,
-      SystemMetadataService systemMetadataService,
-      EntityRegistry entityRegistry,
-      SearchDocumentTransformer searchDocumentTransformer) {
+          GraphService graphService,
+          EntitySearchService entitySearchService,
+          TimeseriesAspectService timeseriesAspectService,
+          SystemMetadataService systemMetadataService,
+          EntityRegistry entityRegistry,
+          SearchDocumentTransformer searchDocumentTransformer,
+          EntityIndexBuilders entityIndexBuilders) {
     _graphService = graphService;
     _entitySearchService = entitySearchService;
     _timeseriesAspectService = timeseriesAspectService;
     _systemMetadataService = systemMetadataService;
     _entityRegistry = entityRegistry;
     _searchDocumentTransformer = searchDocumentTransformer;
+    _entityIndexBuilders = entityIndexBuilders;
   }
 
   public void handleChangeEvent(@Nonnull final MetadataChangeLog event) {
-    if (UPDATE_CHANGE_TYPES.contains(event.getChangeType())) {
-      handleUpdateChangeEvent(event);
-    } else if (event.getChangeType() == ChangeType.DELETE) {
-      handleDeleteChangeEvent(event);
+    try {
+      if (UPDATE_CHANGE_TYPES.contains(event.getChangeType())) {
+        handleUpdateChangeEvent(event);
+      } else if (event.getChangeType() == ChangeType.DELETE) {
+        handleDeleteChangeEvent(event);
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
     }
   }
 
@@ -123,7 +134,7 @@ public void handleChangeEvent(@Nonnull final MetadataChangeLog event) {
    *
    * @param event the change event to be processed.
    */
-  public void handleUpdateChangeEvent(@Nonnull final MetadataChangeLog event) {
+  public void handleUpdateChangeEvent(@Nonnull final MetadataChangeLog event) throws IOException {
 
     final EntitySpec entitySpec = getEventEntitySpec(event);
     final Urn urn = EntityKeyUtils.getUrnFromLog(event, entitySpec.getKeyAspectSpec());
@@ -212,7 +223,7 @@ public void handleDeleteChangeEvent(@Nonnull final MetadataChangeLog event) {
     if (!aspectSpec.isTimeseries()) {
       deleteSystemMetadata(urn, aspectSpec, isDeletingKey);
       deleteGraphData(urn, aspectSpec, aspect, isDeletingKey, event);
-      deleteSearchData(urn, entitySpec.getName(), aspectSpec, aspect, isDeletingKey);
+      deleteSearchData(_entitySearchService, urn, entitySpec.getName(), aspectSpec, aspect, isDeletingKey);
     }
   }
 
@@ -405,7 +416,8 @@ private static List<Edge> getMergedEdges(final Set<Edge> oldEdgeSet, final Set<E
   /**
    * Process snapshot and update search index
    */
-  private void updateSearchService(String entityName, Urn urn, AspectSpec aspectSpec, RecordTemplate aspect,
+  private void updateSearchService(String entityName, Urn urn,
+                                   AspectSpec aspectSpec, RecordTemplate aspect,
       @Nullable SystemMetadata systemMetadata, @Nullable RecordTemplate previousAspect) {
     Optional<String> searchDocument;
     Optional<String> previousSearchDocument = Optional.empty();
@@ -513,7 +525,8 @@ private void deleteGraphData(
     }
   }
 
-  private void deleteSearchData(Urn urn, String entityName, AspectSpec aspectSpec, RecordTemplate aspect, Boolean isKeyAspect) {
+  private void deleteSearchData(EntitySearchService entitySearchService, Urn urn, String entityName,
+                                AspectSpec aspectSpec, RecordTemplate aspect, Boolean isKeyAspect) {
     String docId;
     try {
       docId = URLEncoder.encode(urn.toString(), "UTF-8");
@@ -551,4 +564,13 @@ private EntitySpec getEventEntitySpec(@Nonnull final MetadataChangeLog event) {
               event.getEntityType()));
     }
   }
+
+  /**
+   * Allow internal use of the system entity client. Solves recursive dependencies between the UpdateIndicesService
+   * and the SystemJavaEntityClient
+   * @param systemEntityClient system entity client
+   */
+  public void setSystemEntityClient(SystemEntityClient systemEntityClient) {
+    _searchDocumentTransformer.setEntityClient(systemEntityClient);
+  }
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java b/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java
index 1f13cb8321284..64ad88c08a741 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java
@@ -11,7 +11,7 @@ public interface ElasticSearchIndexed {
      * The index configurations for the given service.
      * @return List of reindex configurations
      */
-    List<ReindexConfig> getReindexConfigs() throws IOException;
+    List<ReindexConfig> buildReindexConfigs() throws IOException;
 
     /**
      * Mirrors the service's functions which
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java
index dd8e19861ccd2..e9ee1d6ee78d5 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java
@@ -205,7 +205,7 @@ public List<IngestionRunSummary> listRuns(Integer pageOffset, Integer pageSize,
   public void configure() {
     log.info("Setting up system metadata index");
     try {
-      for (ReindexConfig config : getReindexConfigs()) {
+      for (ReindexConfig config : buildReindexConfigs()) {
         _indexBuilder.buildIndex(config);
       }
     } catch (IOException ie) {
@@ -214,7 +214,7 @@ public void configure() {
   }
 
   @Override
-  public List<ReindexConfig> getReindexConfigs() throws IOException {
+  public List<ReindexConfig> buildReindexConfigs() throws IOException {
     return List.of(_indexBuilder.buildReindexState(_indexConvention.getIndexName(INDEX_NAME),
             SystemMetadataMappingsBuilder.getMappings(), Collections.emptyMap()));
   }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
index 43ba87f474d6a..a496fc427138e 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
@@ -137,9 +137,10 @@ public void configure() {
   }
 
   @Override
-  public List<ReindexConfig> getReindexConfigs() {
-    return _indexBuilders.getReindexConfigs();
+  public List<ReindexConfig> buildReindexConfigs() {
+    return _indexBuilders.buildReindexConfigs();
   }
+
   public String reindexAsync(String index, @Nullable QueryBuilder filterQuery, BatchWriteOperationsOptions options)
       throws Exception {
     return _indexBuilders.reindexAsync(index, filterQuery, options);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java
index b0751a9c6f9ea..e9518ed8c39fa 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java
@@ -29,7 +29,7 @@ public class TimeseriesAspectIndexBuilders implements ElasticSearchIndexed {
 
   @Override
   public void reindexAll() {
-    for (ReindexConfig config : getReindexConfigs()) {
+    for (ReindexConfig config : buildReindexConfigs()) {
       try {
         _indexBuilder.buildIndex(config);
       } catch (IOException e) {
@@ -63,7 +63,7 @@ public String reindexAsync(String index, @Nullable QueryBuilder filterQuery, Bat
   }
 
   @Override
-  public List<ReindexConfig> getReindexConfigs() {
+  public List<ReindexConfig> buildReindexConfigs() {
     return _entityRegistry.getEntitySpecs().values().stream()
             .flatMap(entitySpec -> entitySpec.getAspectSpecs().stream()
                     .map(aspectSpec -> Pair.of(entitySpec, aspectSpec)))
@@ -80,4 +80,5 @@ public List<ReindexConfig> getReindexConfigs() {
               }
             }).collect(Collectors.toList());
   }
+
 }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java
index 38b2ed4ed199a..30d821662d377 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java
@@ -1,18 +1,27 @@
 package com.linkedin.metadata.entity;
 
+import com.linkedin.common.urn.Urn;
+import com.linkedin.metadata.AspectIngestionUtils;
 import com.linkedin.metadata.config.PreProcessHooks;
 import com.linkedin.metadata.EbeanTestUtils;
 import com.linkedin.metadata.entity.ebean.EbeanAspectDao;
 import com.linkedin.metadata.entity.ebean.EbeanRetentionService;
 import com.linkedin.metadata.event.EventProducer;
+import com.linkedin.metadata.key.CorpUserKey;
 import com.linkedin.metadata.models.registry.EntityRegistryException;
 import com.linkedin.metadata.service.UpdateIndicesService;
 import io.ebean.Database;
-import org.testng.Assert;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
+import static com.linkedin.metadata.Constants.*;
 import static org.mockito.Mockito.*;
+import static org.testng.Assert.*;
 
 
 public class EbeanAspectMigrationsDaoTest extends AspectMigrationsDaoTest<EbeanAspectDao> {
@@ -37,13 +46,19 @@ public void setupTest() {
     _migrationsDao = dao;
   }
 
-  /**
-   * Ideally, all tests would be in the base class, so they're reused between all implementations.
-   * When that's the case - test runner will ignore this class (and its base!) so we keep this dummy test
-   * to make sure this class will always be discovered.
-   */
   @Test
-  public void obligatoryTest() throws AssertionError {
-    Assert.assertTrue(true);
+  public void testStreamAspects() throws AssertionError {
+    final int totalAspects = 30;
+    Map<Urn, CorpUserKey> ingestedAspects =
+        AspectIngestionUtils.ingestCorpUserKeyAspects(_entityServiceImpl, totalAspects);
+    List<String> ingestedUrns = ingestedAspects.keySet().stream().map(Urn::toString).collect(Collectors.toList());
+
+    Stream<EntityAspect> aspectStream = _migrationsDao.streamAspects(CORP_USER_ENTITY_NAME, CORP_USER_KEY_ASPECT_NAME);
+    List<EntityAspect> aspectList = aspectStream.collect(Collectors.toList());
+    assertEquals(ingestedUrns.size(), aspectList.size());
+    Set<String> urnsFetched = aspectList.stream().map(EntityAspect::getUrn).collect(Collectors.toSet());
+    for (String urn : ingestedUrns) {
+      assertTrue(urnsFetched.contains(urn));
+    }
   }
 }
diff --git a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
index cfa9c1258583d..12a02f954e1bc 100644
--- a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
@@ -12,11 +12,16 @@
 import com.linkedin.events.metadata.ChangeType;
 import com.linkedin.glossary.GlossaryTermInfo;
 import com.linkedin.metadata.Constants;
+import com.linkedin.metadata.config.PreProcessHooks;
+import com.linkedin.metadata.entity.AspectDao;
 import com.linkedin.metadata.entity.AspectUtils;
 import com.linkedin.metadata.entity.EntityService;
+import com.linkedin.metadata.entity.EntityServiceImpl;
+import com.linkedin.metadata.event.EventProducer;
 import com.linkedin.metadata.models.AspectSpec;
 import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.service.UpdateIndicesService;
 import com.linkedin.metadata.utils.EntityKeyUtils;
 import com.linkedin.metadata.utils.GenericRecordUtils;
 import net.datafaker.Faker;
@@ -42,6 +47,8 @@
 import java.util.stream.LongStream;
 import java.util.stream.Stream;
 
+import static org.mockito.Mockito.mock;
+
 public class DataGenerator {
     private final static Faker FAKER = new Faker();
     private final EntityRegistry entityRegistry;
@@ -52,10 +59,21 @@ public DataGenerator(EntityService entityService) {
         this.entityRegistry = entityService.getEntityRegistry();
     }
 
+    public static DataGenerator build(EntityRegistry entityRegistry) {
+        EntityServiceImpl mockEntityServiceImpl = new EntityServiceImpl(mock(AspectDao.class),
+                mock(EventProducer.class), entityRegistry, false,
+                mock(UpdateIndicesService.class), mock(PreProcessHooks.class));
+        return new DataGenerator(mockEntityServiceImpl);
+    }
+
     public Stream<List<MetadataChangeProposal>> generateDatasets() {
         return generateMCPs("dataset", 10, List.of());
     }
 
+    public List<MetadataChangeProposal> generateTags(long count) {
+        return generateMCPs("tag", count, List.of()).findFirst().get();
+    }
+
     public Stream<List<MetadataChangeProposal>> generateMCPs(String entityName, long count, List<String> aspects) {
         EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName);
 
@@ -127,9 +145,7 @@ public Stream<List<MetadataChangeProposal>> generateMCPs(String entityName, long
     public Map<String, BiFunction<RecordTemplate, Integer, List<MetadataChangeProposal>>> nestedRandomAspectGenerators = Map.of(
             "globalTags", (aspect, count) -> {
                 try {
-                    List<MetadataChangeProposal> tags = generateMCPs("tag", count, List.of())
-                            .map(mcps -> mcps.get(0))
-                            .collect(Collectors.toList());
+                    List<MetadataChangeProposal> tags = generateTags(count);
                     Method setTagsMethod = aspect.getClass().getMethod("setTags", TagAssociationArray.class);
                     TagAssociationArray tagAssociations = new TagAssociationArray();
                     tagAssociations.addAll(tags.stream().map(
diff --git a/metadata-jobs/mae-consumer-job/src/main/resources/application.properties b/metadata-jobs/mae-consumer-job/src/main/resources/application.properties
index 6befa3e8789d8..7df61c93ab66d 100644
--- a/metadata-jobs/mae-consumer-job/src/main/resources/application.properties
+++ b/metadata-jobs/mae-consumer-job/src/main/resources/application.properties
@@ -3,4 +3,4 @@ management.endpoints.web.exposure.include=metrics, health, info
 spring.mvc.servlet.path=/
 management.health.elasticsearch.enabled=false
 management.health.neo4j.enabled=false
-
+entityClient.preferredImpl=restli
diff --git a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java
index a214117f4e1bc..aa097a52c8fc6 100644
--- a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java
+++ b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java
@@ -7,6 +7,7 @@
 import com.linkedin.metadata.graph.GraphService;
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService;
 import io.ebean.Database;
 import org.springframework.boot.test.context.TestConfiguration;
@@ -40,4 +41,7 @@ public class MaeConsumerApplicationTestConfiguration {
 
   @MockBean
   private ConfigEntityRegistry _configEntityRegistry;
+
+  @MockBean
+  public EntityIndexBuilders entityIndexBuilders;
 }
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
index 64f89c595163d..796f570a1732e 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
@@ -14,6 +14,8 @@
 import com.linkedin.metadata.utils.metrics.MetricUtils;
 import com.linkedin.mxe.MetadataChangeLog;
 import com.linkedin.mxe.Topics;
+
+import java.util.Comparator;
 import java.util.List;
 import java.util.stream.Collectors;
 import lombok.Getter;
@@ -47,7 +49,10 @@ public class MetadataChangeLogProcessor {
 
   @Autowired
   public MetadataChangeLogProcessor(List<MetadataChangeLogHook> metadataChangeLogHooks) {
-    this.hooks = metadataChangeLogHooks.stream().filter(MetadataChangeLogHook::isEnabled).collect(Collectors.toList());
+    this.hooks = metadataChangeLogHooks.stream()
+            .filter(MetadataChangeLogHook::isEnabled)
+            .sorted(Comparator.comparing(MetadataChangeLogHook::executionOrder))
+            .collect(Collectors.toList());
     this.hooks.forEach(MetadataChangeLogHook::init);
   }
 
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java
index c7857eb7baffc..39b47768a6dcf 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java
@@ -29,4 +29,12 @@ default boolean isEnabled() {
    * Invoke the hook when a MetadataChangeLog is received
    */
   void invoke(@Nonnull MetadataChangeLog log) throws Exception;
+
+  /**
+   * Controls hook execution ordering
+   * @return order to execute
+   */
+  default int executionOrder() {
+    return 100;
+  }
 }
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java
index fad7a34074964..78c87ec8f4b3b 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java
@@ -24,7 +24,7 @@
     EntityRegistryFactory.class, SystemMetadataServiceFactory.class, SearchDocumentTransformerFactory.class})
 public class UpdateIndicesHook implements MetadataChangeLogHook {
 
-  private final UpdateIndicesService _updateIndicesService;
+  protected final UpdateIndicesService _updateIndicesService;
   private final boolean _isEnabled;
 
   public UpdateIndicesHook(
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java
index 030ca83131433..90f8f208c4cb6 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java
@@ -34,6 +34,7 @@
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.RelationshipDirection;
 import com.linkedin.metadata.search.EntitySearchService;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
 import com.linkedin.metadata.service.UpdateIndicesService;
 import com.linkedin.metadata.systemmetadata.SystemMetadataService;
@@ -42,10 +43,12 @@
 import com.linkedin.mxe.MetadataChangeLog;
 import com.linkedin.mxe.SystemMetadata;
 import com.linkedin.schema.SchemaField;
+
 import java.net.URISyntaxException;
 import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
 import org.mockito.Mockito;
+import org.springframework.beans.factory.annotation.Value;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
@@ -82,9 +85,13 @@ public class UpdateIndicesHookTest {
   private SearchDocumentTransformer _searchDocumentTransformer;
   private DataHubUpgradeKafkaListener _mockDataHubUpgradeKafkaListener;
   private ConfigurationProvider _mockConfigurationProvider;
+  private EntityIndexBuilders _mockEntityIndexBuilders;
   private Urn _actorUrn;
   private UpdateIndicesService _updateIndicesService;
 
+  @Value("${elasticsearch.index.maxArrayLength}")
+  private int maxArrayLength;
+
   @BeforeMethod
   public void setupTest() {
     _actorUrn = UrnUtils.getUrn(TEST_ACTOR_URN);
@@ -95,6 +102,8 @@ public void setupTest() {
     _searchDocumentTransformer = new SearchDocumentTransformer(1000, 1000, 1000);
     _mockDataHubUpgradeKafkaListener = Mockito.mock(DataHubUpgradeKafkaListener.class);
     _mockConfigurationProvider = Mockito.mock(ConfigurationProvider.class);
+    _mockEntityIndexBuilders = Mockito.mock(EntityIndexBuilders.class);
+
     ElasticSearchConfiguration elasticSearchConfiguration = new ElasticSearchConfiguration();
     SystemUpdateConfiguration systemUpdateConfiguration = new SystemUpdateConfiguration();
     systemUpdateConfiguration.setWaitForSystemUpdate(false);
@@ -105,7 +114,8 @@ public void setupTest() {
         _mockTimeseriesAspectService,
         _mockSystemMetadataService,
         ENTITY_REGISTRY,
-        _searchDocumentTransformer
+        _searchDocumentTransformer,
+        _mockEntityIndexBuilders
     );
     _updateIndicesHook = new UpdateIndicesHook(
         _updateIndicesService,
@@ -163,7 +173,8 @@ public void testInputFieldsEdgesAreAdded() throws Exception {
         _mockTimeseriesAspectService,
         _mockSystemMetadataService,
         mockEntityRegistry,
-        _searchDocumentTransformer
+        _searchDocumentTransformer,
+        _mockEntityIndexBuilders
     );
     _updateIndicesHook = new UpdateIndicesHook(_updateIndicesService, true);
 
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java
index dc5a6cd23295b..1d9c17c676990 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java
@@ -9,6 +9,7 @@
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.registry.SchemaRegistryService;
 import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
 import com.linkedin.metadata.systemmetadata.SystemMetadataService;
 import com.linkedin.metadata.timeseries.TimeseriesAspectService;
@@ -64,4 +65,7 @@ public class MCLSpringTestConfiguration {
 
   @MockBean
   public SchemaRegistryService schemaRegistryService;
+
+  @MockBean
+  public EntityIndexBuilders entityIndexBuilders;
 }
diff --git a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java
index 558a7b9d90ccb..bee1441b5aaf6 100644
--- a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java
+++ b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java
@@ -8,6 +8,7 @@
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.restli.DefaultRestliClientFactory;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.timeseries.TimeseriesAspectService;
 import com.linkedin.parseq.retry.backoff.ExponentialBackoff;
 import com.linkedin.restli.client.Client;
@@ -57,4 +58,7 @@ public RestliEntityClient restliEntityClient() {
 
     @MockBean
     protected SiblingGraphService siblingGraphService;
+
+    @MockBean
+    public EntityIndexBuilders entityIndexBuilders;
 }
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index 42749d8205d21..f180a3f42b730 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -339,7 +339,7 @@ cache:
       statsEnabled: ${CACHE_CLIENT_ENTITY_CLIENT_STATS_ENABLED:true}
       statsIntervalSeconds: ${CACHE_CLIENT_ENTITY_CLIENT_STATS_INTERVAL_SECONDS:120}
       defaultTTLSeconds: ${CACHE_CLIENT_ENTITY_CLIENT_TTL_SECONDS:0} # do not cache entity/aspects by default
-      maxBytes: ${CACHE_CLIENT_USAGE_ENTITY_MAX_BYTES:104857600} # 100MB
+      maxBytes: ${CACHE_CLIENT_ENTITY_CLIENT_MAX_BYTES:104857600} # 100MB
       entityAspectTTLSeconds:
         # cache user aspects for 20s
         corpuser:
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java
index 5122be69982f0..f1c1a7b743714 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java
@@ -33,17 +33,19 @@ public class EntityServiceFactory {
           TopicConventionFactory.TOPIC_CONVENTION_BEAN, "entityRegistry"})
   @Nonnull
   protected EntityService createInstance(
-      Producer<String, ? extends IndexedRecord> producer,
-      TopicConvention convention,
-      KafkaHealthChecker kafkaHealthChecker,
-      @Qualifier("entityAspectDao") AspectDao aspectDao,
-      EntityRegistry entityRegistry,
-      ConfigurationProvider configurationProvider,
-      UpdateIndicesService updateIndicesService) {
+          Producer<String, ? extends IndexedRecord> producer,
+          TopicConvention convention,
+          KafkaHealthChecker kafkaHealthChecker,
+          @Qualifier("entityAspectDao") AspectDao aspectDao,
+          EntityRegistry entityRegistry,
+          ConfigurationProvider configurationProvider,
+          UpdateIndicesService updateIndicesService) {
 
     final KafkaEventProducer eventProducer = new KafkaEventProducer(producer, convention, kafkaHealthChecker);
     FeatureFlags featureFlags = configurationProvider.getFeatureFlags();
-    return new EntityServiceImpl(aspectDao, eventProducer, entityRegistry,
+    EntityService entityService = new EntityServiceImpl(aspectDao, eventProducer, entityRegistry,
         featureFlags.isAlwaysEmitChangeLog(), updateIndicesService, featureFlags.getPreProcessHooks(), _ebeanMaxTransactionRetry);
+
+    return entityService;
   }
 }
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java
index e1c24b805437b..3f2388f4829e3 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java
@@ -16,14 +16,17 @@
 import com.linkedin.metadata.timeseries.TimeseriesAspectService;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnExpression;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
 import org.springframework.context.annotation.Import;
 
 
 @Configuration
+@ConditionalOnExpression("'${entityClient.preferredImpl:java}'.equals('java')")
 @Import({DataHubKafkaProducerFactory.class})
 public class JavaEntityClientFactory {
+
   @Autowired
   @Qualifier("entityService")
   private EntityService _entityService;
@@ -74,7 +77,7 @@ public JavaEntityClient getJavaEntityClient(@Qualifier("restliEntityClient") fin
   public SystemJavaEntityClient systemJavaEntityClient(@Qualifier("configurationProvider") final ConfigurationProvider configurationProvider,
                                                        @Qualifier("systemAuthentication") final Authentication systemAuthentication,
                                                        @Qualifier("systemRestliEntityClient") final RestliEntityClient restliEntityClient) {
-    return new SystemJavaEntityClient(
+    SystemJavaEntityClient systemJavaEntityClient = new SystemJavaEntityClient(
             _entityService,
             _deleteEntityService,
             _entitySearchService,
@@ -86,5 +89,9 @@ public SystemJavaEntityClient systemJavaEntityClient(@Qualifier("configurationPr
             restliEntityClient,
             systemAuthentication,
             configurationProvider.getCache().getClient().getEntityClient());
+
+    _entityService.setSystemEntityClient(systemJavaEntityClient);
+
+    return systemJavaEntityClient;
   }
 }
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java
index f86f6bf7d0877..a4ea02af94bad 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java
@@ -1,24 +1,44 @@
 package com.linkedin.gms.factory.entity.update.indices;
 
+import com.linkedin.entity.client.SystemRestliEntityClient;
+import com.linkedin.gms.factory.search.EntityIndexBuildersFactory;
 import com.linkedin.metadata.graph.GraphService;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.search.EntitySearchService;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
 import com.linkedin.metadata.service.UpdateIndicesService;
 import com.linkedin.metadata.systemmetadata.SystemMetadataService;
 import com.linkedin.metadata.timeseries.TimeseriesAspectService;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.ApplicationContext;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Import;
 
 
 @Configuration
+@Import(EntityIndexBuildersFactory.class)
 public class UpdateIndicesServiceFactory {
+  @Autowired
+  private ApplicationContext context;
+  @Value("${entityClient.preferredImpl:java}")
+  private String entityClientImpl;
 
   @Bean
   public UpdateIndicesService updateIndicesService(GraphService graphService, EntitySearchService entitySearchService,
-      TimeseriesAspectService timeseriesAspectService, SystemMetadataService systemMetadataService,
-      EntityRegistry entityRegistry, SearchDocumentTransformer searchDocumentTransformer) {
-    return new UpdateIndicesService(graphService, entitySearchService, timeseriesAspectService,
-        systemMetadataService, entityRegistry, searchDocumentTransformer);
+                                                   TimeseriesAspectService timeseriesAspectService,
+                                                   SystemMetadataService systemMetadataService,
+                                                   EntityRegistry entityRegistry, SearchDocumentTransformer searchDocumentTransformer,
+                                                   EntityIndexBuilders entityIndexBuilders) {
+    UpdateIndicesService updateIndicesService = new UpdateIndicesService(graphService, entitySearchService, timeseriesAspectService,
+            systemMetadataService, entityRegistry, searchDocumentTransformer, entityIndexBuilders);
+
+    if ("restli".equals(entityClientImpl)) {
+      updateIndicesService.setSystemEntityClient(context.getBean(SystemRestliEntityClient.class));
+    }
+
+    return updateIndicesService;
   }
 }
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java
index a2a0dbaf89c79..6d8a62ac1fd18 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java
@@ -47,6 +47,9 @@ public class ElasticSearchServiceFactory {
   @Qualifier("settingsBuilder")
   private SettingsBuilder settingsBuilder;
 
+  @Autowired
+  private EntityIndexBuilders entityIndexBuilders;
+
   @Autowired
   private ConfigurationProvider configurationProvider;
 
@@ -64,9 +67,7 @@ protected ElasticSearchService getInstance(ConfigurationProvider configurationPr
         new ESSearchDAO(entityRegistry, components.getSearchClient(), components.getIndexConvention(),
                 configurationProvider.getFeatureFlags().isPointInTimeCreationEnabled(),
                 elasticSearchConfiguration.getImplementation(), searchConfiguration, customSearchConfiguration);
-    return new ElasticSearchService(
-        new EntityIndexBuilders(components.getIndexBuilder(), entityRegistry, components.getIndexConvention(),
-            settingsBuilder), esSearchDAO,
+    return new ElasticSearchService(entityIndexBuilders, esSearchDAO,
         new ESBrowseDAO(entityRegistry, components.getSearchClient(), components.getIndexConvention(),
             searchConfiguration, customSearchConfiguration),
         new ESWriteDAO(entityRegistry, components.getSearchClient(), components.getIndexConvention(),
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/EntityIndexBuildersFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/EntityIndexBuildersFactory.java
new file mode 100644
index 0000000000000..6bb206ee3ad61
--- /dev/null
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/EntityIndexBuildersFactory.java
@@ -0,0 +1,35 @@
+package com.linkedin.gms.factory.search;
+
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.PropertySource;
+
+
+@Configuration
+@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
+public class EntityIndexBuildersFactory {
+
+    @Autowired
+    @Qualifier("baseElasticSearchComponents")
+    private BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components;
+
+    @Autowired
+    @Qualifier("entityRegistry")
+    private EntityRegistry entityRegistry;
+
+    @Autowired
+    @Qualifier("settingsBuilder")
+    private SettingsBuilder settingsBuilder;
+
+
+    @Bean
+    protected EntityIndexBuilders entityIndexBuilders() {
+        return new EntityIndexBuilders(components.getIndexBuilder(), entityRegistry, components.getIndexConvention(), settingsBuilder);
+    }
+}
\ No newline at end of file
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java
index 3b35dc528915a..6006f3a9a87f6 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java
@@ -21,7 +21,6 @@
 import java.util.function.BiFunction;
 import java.util.function.Function;
 import java.util.stream.Collectors;
-import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 
 import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName;
@@ -44,8 +43,7 @@ public Map<Urn, EntityResponse> batchGetV2(@Nonnull final Set<Urn> urns, @Nonnul
 
         if (config.isEnabled()) {
             Set<Key> keys = urns.stream()
-                    .flatMap(urn -> aspectNames.stream()
-                            .map(a -> Key.builder().urn(urn).aspectName(a).build()))
+                    .flatMap(urn -> aspectNames.stream().map(a -> Key.builder().urn(urn).aspectName(a).build()))
                     .collect(Collectors.toSet());
             Map<Key, EnvelopedAspect> envelopedAspects = cache.getAll(keys);
 
@@ -92,13 +90,13 @@ public EntityClientCache build(Class<?> metricClazz) {
                 Map<String, Set<Key>> keysByEntity = StreamSupport.stream(keys.spliterator(), true)
                         .collect(Collectors.groupingBy(Key::getEntityName, Collectors.toSet()));
 
-                Stream<Map.Entry<Key, EnvelopedAspect>> results = keysByEntity.entrySet().parallelStream()
+                Map<Key, EnvelopedAspect> results = keysByEntity.entrySet().parallelStream()
                         .flatMap(entry -> {
                             Set<Urn> urns = entry.getValue().stream()
                                     .map(Key::getUrn)
                                     .collect(Collectors.toSet());
                             Set<String> aspects = entry.getValue().stream()
-                                    .map(Key::getEntityName)
+                                    .map(Key::getAspectName)
                                     .collect(Collectors.toSet());
                             return loadFunction.apply(urns, aspects).entrySet().stream();
                         })
@@ -106,9 +104,9 @@ public EntityClientCache build(Class<?> metricClazz) {
                                 .map(envAspect -> {
                                     Key key = Key.builder().urn(resp.getKey()).aspectName(envAspect.getName()).build();
                                     return Map.entry(key, envAspect);
-                                }));
+                                })).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
 
-                return results.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+                return results;
             };
 
             // ideally the cache time comes from caching headers from service, but configuration driven for now
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java
index 30cfc2e0288bd..b7607053df8e3 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java
@@ -9,6 +9,7 @@
 import com.linkedin.entity.Entity;
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.events.metadata.ChangeType;
 import com.linkedin.metadata.aspect.VersionedAspect;
 import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs;
@@ -297,4 +298,11 @@ Pair<Boolean, List<Pair<String, RecordTemplate>>> generateDefaultAspectsOnFirstW
    */
   @Nonnull
   BrowsePathsV2 buildDefaultBrowsePathV2(final @Nonnull Urn urn, boolean useContainerPaths) throws URISyntaxException;
+
+  /**
+   * Allow internal use of the system entity client. Solves recursive dependencies between the EntityService
+   * and the SystemJavaEntityClient
+   * @param systemEntityClient system entity client
+   */
+  void setSystemEntityClient(SystemEntityClient systemEntityClient);
 }

From b81e818e47d6d16330693265fd87a590446c7131 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Mon, 2 Oct 2023 12:08:37 -0500
Subject: [PATCH 10/25] feat(openapi): openapi v2 updates (#8927)

---
 build.gradle                                  |   4 +-
 .../io/datahubproject/OpenApiEntities.java    |  29 ++-
 .../src/main/resources/application.yml        |   2 +
 .../health/config/SpringWebConfig.java        |  35 ----
 .../delegates/EntityApiDelegateImpl.java      | 197 +++++++++++++++++-
 .../openapi/util/OpenApiEntitiesUtil.java     |   8 +-
 .../OpenAPIEntityTestConfiguration.java       |  19 +-
 .../delegates/EntityApiDelegateImplTest.java  |  54 ++++-
 .../0.0.0-dev/entity-registry.yaml            |   8 +
 .../0.0.0-dev/metadata-models-custom.jar      | Bin 0 -> 20878 bytes
 .../openapi/config/SpringWebConfig.java       |  25 +++
 .../{ => health}/HealthController.java        |   2 +-
 .../openapi/util/MappingUtil.java             | 119 ++++++++---
 .../webapp/WEB-INF/healthServlet-servlet.xml  |  14 --
 .../webapp/WEB-INF/openapiServlet-servlet.xml |   4 +-
 .../war/src/main/webapp/WEB-INF/web.xml       |   8 +-
 16 files changed, 428 insertions(+), 100 deletions(-)
 delete mode 100644 metadata-service/health-servlet/src/main/java/com/datahub/health/config/SpringWebConfig.java
 create mode 100644 metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/entity-registry.yaml
 create mode 100644 metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/metadata-models-custom.jar
 rename metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/{ => health}/HealthController.java (94%)
 delete mode 100644 metadata-service/war/src/main/webapp/WEB-INF/healthServlet-servlet.xml

diff --git a/build.gradle b/build.gradle
index c8892045a6683..025c588da2b52 100644
--- a/build.gradle
+++ b/build.gradle
@@ -200,8 +200,8 @@ project.ext.externalDependency = [
     'springBootStarterValidation': "org.springframework.boot:spring-boot-starter-validation:$springBootVersion",
     'springKafka': 'org.springframework.kafka:spring-kafka:2.8.11',
     'springActuator': "org.springframework.boot:spring-boot-starter-actuator:$springBootVersion",
-    'swaggerAnnotations': 'io.swagger.core.v3:swagger-annotations:2.1.12',
-    'swaggerCli': 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.41',
+    'swaggerAnnotations': 'io.swagger.core.v3:swagger-annotations:2.2.15',
+    'swaggerCli': 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.46',
     'testngJava8': 'org.testng:testng:7.5.1',
     'testng': 'org.testng:testng:7.8.0',
     'testContainers': 'org.testcontainers:testcontainers:' + testContainersVersion,
diff --git a/buildSrc/src/main/java/io/datahubproject/OpenApiEntities.java b/buildSrc/src/main/java/io/datahubproject/OpenApiEntities.java
index 7fbf013384b7d..888c4a0e99931 100644
--- a/buildSrc/src/main/java/io/datahubproject/OpenApiEntities.java
+++ b/buildSrc/src/main/java/io/datahubproject/OpenApiEntities.java
@@ -6,6 +6,7 @@
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
 import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
+import com.google.common.collect.ImmutableSet;
 import com.linkedin.metadata.models.registry.config.Entities;
 import com.linkedin.metadata.models.registry.config.Entity;
 import org.gradle.internal.Pair;
@@ -16,7 +17,12 @@
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.StandardOpenOption;
-import java.util.*;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.Spliterator;
+import java.util.Spliterators;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
@@ -37,10 +43,23 @@ public class OpenApiEntities {
     private String entityRegistryYaml;
     private Path combinedDirectory;
 
-    private final static Set<String> SUPPORTED_ASPECT_PATHS = Set.of(
-            "domains", "ownership", "deprecation", "status", "globalTags", "glossaryTerms", "dataContractInfo",
-            "browsePathsV2"
-    );
+    private final static ImmutableSet<Object> SUPPORTED_ASPECT_PATHS = ImmutableSet.builder()
+                .add("domains")
+                .add("ownership")
+                .add("deprecation")
+                .add("status")
+                .add("globalTags")
+                .add("glossaryTerms")
+                .add("dataContractInfo")
+                .add("browsePathsV2")
+                .add("datasetProperties").add("editableDatasetProperties")
+                .add("chartInfo").add("editableChartProperties")
+                .add("dashboardInfo").add("editableDashboardProperties")
+                .add("notebookInfo").add("editableNotebookProperties")
+                .add("dataProductProperties")
+                .add("institutionalMemory")
+                .build();
+
 
     public OpenApiEntities(JsonNodeFactory NODE_FACTORY) {
         this.NODE_FACTORY = NODE_FACTORY;
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index f180a3f42b730..4be31b2b6bb15 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -351,3 +351,5 @@ cache:
           status: 20
           corpUserCredentials: 20
           corpUserSettings: 20
+
+springdoc.api-docs.groups.enabled: true
\ No newline at end of file
diff --git a/metadata-service/health-servlet/src/main/java/com/datahub/health/config/SpringWebConfig.java b/metadata-service/health-servlet/src/main/java/com/datahub/health/config/SpringWebConfig.java
deleted file mode 100644
index 76d9a6744c4cf..0000000000000
--- a/metadata-service/health-servlet/src/main/java/com/datahub/health/config/SpringWebConfig.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package com.datahub.health.config;
-
-import io.swagger.v3.oas.annotations.OpenAPIDefinition;
-import io.swagger.v3.oas.annotations.info.Info;
-import io.swagger.v3.oas.annotations.servers.Server;
-import java.util.List;
-import org.springframework.context.annotation.Configuration;
-import org.springframework.format.FormatterRegistry;
-import org.springframework.http.converter.ByteArrayHttpMessageConverter;
-import org.springframework.http.converter.FormHttpMessageConverter;
-import org.springframework.http.converter.HttpMessageConverter;
-import org.springframework.http.converter.StringHttpMessageConverter;
-import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter;
-import org.springframework.web.servlet.config.annotation.EnableWebMvc;
-import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
-
-
-@EnableWebMvc
-@OpenAPIDefinition(info = @Info(title = "DataHub OpenAPI", version = "1.0.0"),
-        servers = {@Server(url = "/health/", description = "Default Server URL")})
-@Configuration
-public class SpringWebConfig implements WebMvcConfigurer {
-
-  @Override
-  public void configureMessageConverters(List<HttpMessageConverter<?>> messageConverters) {
-    messageConverters.add(new StringHttpMessageConverter());
-    messageConverters.add(new ByteArrayHttpMessageConverter());
-    messageConverters.add(new FormHttpMessageConverter());
-    messageConverters.add(new MappingJackson2HttpMessageConverter());
-  }
-
-  @Override
-  public void addFormatters(FormatterRegistry registry) {
-  }
-}
diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java
index 5d1065e80d419..ade49c876f168 100644
--- a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java
+++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java
@@ -14,22 +14,34 @@
 import io.datahubproject.openapi.dto.UrnResponseMap;
 import io.datahubproject.openapi.entities.EntitiesController;
 import com.datahub.authorization.AuthorizerChain;
+import io.datahubproject.openapi.exception.UnauthorizedException;
 import io.datahubproject.openapi.generated.BrowsePathsV2AspectRequestV2;
 import io.datahubproject.openapi.generated.BrowsePathsV2AspectResponseV2;
+import io.datahubproject.openapi.generated.ChartInfoAspectRequestV2;
+import io.datahubproject.openapi.generated.ChartInfoAspectResponseV2;
+import io.datahubproject.openapi.generated.DataProductPropertiesAspectRequestV2;
+import io.datahubproject.openapi.generated.DataProductPropertiesAspectResponseV2;
+import io.datahubproject.openapi.generated.DatasetPropertiesAspectRequestV2;
+import io.datahubproject.openapi.generated.DatasetPropertiesAspectResponseV2;
 import io.datahubproject.openapi.generated.DeprecationAspectRequestV2;
 import io.datahubproject.openapi.generated.DeprecationAspectResponseV2;
 import io.datahubproject.openapi.generated.DomainsAspectRequestV2;
 import io.datahubproject.openapi.generated.DomainsAspectResponseV2;
+import io.datahubproject.openapi.generated.EditableChartPropertiesAspectRequestV2;
+import io.datahubproject.openapi.generated.EditableChartPropertiesAspectResponseV2;
+import io.datahubproject.openapi.generated.EditableDatasetPropertiesAspectRequestV2;
+import io.datahubproject.openapi.generated.EditableDatasetPropertiesAspectResponseV2;
 import io.datahubproject.openapi.generated.GlobalTagsAspectRequestV2;
 import io.datahubproject.openapi.generated.GlobalTagsAspectResponseV2;
 import io.datahubproject.openapi.generated.GlossaryTermsAspectRequestV2;
 import io.datahubproject.openapi.generated.GlossaryTermsAspectResponseV2;
+import io.datahubproject.openapi.generated.InstitutionalMemoryAspectRequestV2;
+import io.datahubproject.openapi.generated.InstitutionalMemoryAspectResponseV2;
 import io.datahubproject.openapi.generated.OwnershipAspectRequestV2;
 import io.datahubproject.openapi.generated.OwnershipAspectResponseV2;
 import io.datahubproject.openapi.generated.SortOrder;
 import io.datahubproject.openapi.generated.StatusAspectRequestV2;
 import io.datahubproject.openapi.generated.StatusAspectResponseV2;
-import io.datahubproject.openapi.exception.UnauthorizedException;
 import io.datahubproject.openapi.util.OpenApiEntitiesUtil;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
@@ -408,4 +420,187 @@ private void checkScrollAuthorized(Authentication authentication, EntitySpec ent
             throw new UnauthorizedException(actorUrnStr + " is unauthorized to get entities.");
         }
     }
+
+    public ResponseEntity<DatasetPropertiesAspectResponseV2> createDatasetProperties(@Valid DatasetPropertiesAspectRequestV2 body, String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return createAspect(urn, methodNameToAspectName(methodName), body, DatasetPropertiesAspectRequestV2.class,
+                DatasetPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<EditableDatasetPropertiesAspectResponseV2> createEditableDatasetProperties(
+            @Valid EditableDatasetPropertiesAspectRequestV2 body, String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return createAspect(urn, methodNameToAspectName(methodName), body, EditableDatasetPropertiesAspectRequestV2.class,
+                EditableDatasetPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<InstitutionalMemoryAspectResponseV2> createInstitutionalMemory(
+            @Valid InstitutionalMemoryAspectRequestV2 body, String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return createAspect(urn, methodNameToAspectName(methodName), body, InstitutionalMemoryAspectRequestV2.class,
+                InstitutionalMemoryAspectResponseV2.class);
+    }
+
+    public ResponseEntity<ChartInfoAspectResponseV2> createChartInfo(@Valid ChartInfoAspectRequestV2 body, String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return createAspect(urn, methodNameToAspectName(methodName), body, ChartInfoAspectRequestV2.class,
+                ChartInfoAspectResponseV2.class);
+    }
+
+    public ResponseEntity<EditableChartPropertiesAspectResponseV2> createEditableChartProperties(
+            @Valid EditableChartPropertiesAspectRequestV2 body, String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return createAspect(urn, methodNameToAspectName(methodName), body, EditableChartPropertiesAspectRequestV2.class,
+                EditableChartPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<DataProductPropertiesAspectResponseV2> createDataProductProperties(
+            @Valid DataProductPropertiesAspectRequestV2 body, String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return createAspect(urn, methodNameToAspectName(methodName), body, DataProductPropertiesAspectRequestV2.class,
+                DataProductPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<Void> deleteDatasetProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return deleteAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> deleteEditableDatasetProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return deleteAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> deleteInstitutionalMemory(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return deleteAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> deleteChartInfo(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return deleteAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<DatasetPropertiesAspectResponseV2> getDatasetProperties(String urn, Boolean systemMetadata) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz,
+                DatasetPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<EditableDatasetPropertiesAspectResponseV2> getEditableDatasetProperties(String urn, Boolean systemMetadata) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz,
+                EditableDatasetPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<InstitutionalMemoryAspectResponseV2> getInstitutionalMemory(String urn, Boolean systemMetadata) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz,
+                InstitutionalMemoryAspectResponseV2.class);
+    }
+
+    public ResponseEntity<EditableChartPropertiesAspectResponseV2> getEditableChartProperties(String urn, Boolean systemMetadata) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz, EditableChartPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<ChartInfoAspectResponseV2> getChartInfo(String urn, Boolean systemMetadata) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz,
+                ChartInfoAspectResponseV2.class);
+    }
+
+    public ResponseEntity<DataProductPropertiesAspectResponseV2> getDataProductProperties(String urn, Boolean systemMetadata) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz,
+                DataProductPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<Void> headDatasetProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return headAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> headEditableDatasetProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return headAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> headInstitutionalMemory(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return headAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> headDataProductProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return headAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> headEditableChartProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return headAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> headChartInfo(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return headAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> deleteEditableChartProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return deleteAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> deleteDataProductProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return deleteAspect(urn, methodNameToAspectName(methodName));
+    }
 }
diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/util/OpenApiEntitiesUtil.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/util/OpenApiEntitiesUtil.java
index 13c2d83343aa0..205d401dd956d 100644
--- a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/util/OpenApiEntitiesUtil.java
+++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/util/OpenApiEntitiesUtil.java
@@ -54,7 +54,7 @@ public static <T> UpsertAspectRequest convertAspectToUpsert(String entityUrn, Ob
             if (aspectRequest != null) {
                 // i.e. GlobalTags
                 Method valueMethod = REFLECT.lookupMethod(aspectRequestClazz, "getValue");
-                Object aspect = valueMethod.invoke(aspectRequest);
+                Object aspect = valueMethod == null ? null : valueMethod.invoke(aspectRequest);
 
                 if (aspect != null) {
                     builder.aspect((OneOfGenericAspectValue) aspect);
@@ -82,13 +82,13 @@ public static <T> List<UpsertAspectRequest> convertEntityToUpsert(Object openapi
                         Method aspectMethod = REFLECT.lookupMethod(fromClazz, "get" + upperAspectName);
 
                         // i.e. GlobalTagsAspectRequestV2
-                        Object aspectRequest = aspectMethod.invoke(openapiEntity);
+                        Object aspectRequest = aspectMethod == null ? null : aspectMethod.invoke(openapiEntity);
                         if (aspectRequest != null) {
                             Class<?> aspectRequestClazz = REFLECT.lookupClass(upperAspectName + ASPECT_REQUEST_SUFFIX);
 
                             // i.e. GlobalTags
                             Method valueMethod = REFLECT.lookupMethod(aspectRequestClazz, "getValue");
-                            Object aspect = valueMethod.invoke(aspectRequest);
+                            Object aspect = valueMethod == null ? null : valueMethod.invoke(aspectRequest);
 
                             if (aspect != null) {
                                 builder.aspect((OneOfGenericAspectValue) aspect);
@@ -109,7 +109,7 @@ public static <E, A> Optional<A> convertAspect(UrnResponseMap urnResponseMap, St
         return convertEntity(urnResponseMap, entityClazz, withSystemMetadata).map(entity -> {
             try {
                 Method aspectMethod = REFLECT.lookupMethod(entityClazz, "get" + toUpperFirst(aspectName));
-                return aspectClazz.cast(aspectMethod.invoke(entity));
+                return aspectMethod == null ? null : aspectClazz.cast(aspectMethod.invoke(entity));
             } catch (IllegalAccessException | InvocationTargetException e) {
                 throw new RuntimeException(e);
             }
diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java
index b7e255b8c270e..cabaa2cbd75e6 100644
--- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java
+++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java
@@ -13,6 +13,9 @@
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistryException;
+import com.linkedin.metadata.models.registry.MergedEntityRegistry;
+import com.linkedin.metadata.models.registry.PluginEntityRegistryLoader;
+import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
 import com.linkedin.metadata.search.ScrollResult;
 import com.linkedin.metadata.search.SearchEntityArray;
 import com.linkedin.metadata.search.SearchService;
@@ -87,9 +90,21 @@ public AuthorizerChain authorizerChain() {
 
     @Bean("entityRegistry")
     @Primary
-    public ConfigEntityRegistry configEntityRegistry() throws EntityRegistryException {
-        return new ConfigEntityRegistry(
+    public EntityRegistry entityRegistry() throws EntityRegistryException, InterruptedException {
+        /*
+           Considered a few different approach to loading a custom model. Chose this method
+           to as closely match a production configuration rather than direct project to project
+           dependency.
+         */
+        PluginEntityRegistryLoader custom = new PluginEntityRegistryLoader(
+                getClass().getResource("/custom-model").getFile());
+
+        ConfigEntityRegistry standard = new ConfigEntityRegistry(
                 OpenAPIEntityTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml"));
+        MergedEntityRegistry entityRegistry = new MergedEntityRegistry(SnapshotEntityRegistry.getInstance()).apply(standard);
+        custom.withBaseRegistry(entityRegistry).start(true);
+
+        return entityRegistry;
     }
 
     /* Controllers not under this module */
diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/delegates/EntityApiDelegateImplTest.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/delegates/EntityApiDelegateImplTest.java
index fc2aae1a75ab8..57803ac904a93 100644
--- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/delegates/EntityApiDelegateImplTest.java
+++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/delegates/EntityApiDelegateImplTest.java
@@ -1,6 +1,7 @@
 package io.datahubproject.openapi.delegates;
 
 import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
+import com.linkedin.metadata.models.registry.EntityRegistry;
 import io.datahubproject.openapi.config.OpenAPIEntityTestConfiguration;
 import io.datahubproject.openapi.config.SpringWebConfig;
 import io.datahubproject.openapi.generated.BrowsePathEntry;
@@ -31,24 +32,30 @@
 import io.datahubproject.openapi.generated.controller.ChartApiController;
 import io.datahubproject.openapi.generated.controller.DatasetApiController;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.context.annotation.ComponentScan;
 import org.springframework.context.annotation.Import;
 import org.springframework.http.HttpStatus;
+import org.springframework.http.MediaType;
 import org.springframework.http.ResponseEntity;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
+import org.springframework.test.web.servlet.MockMvc;
+import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
+import org.springframework.test.web.servlet.result.MockMvcResultMatchers;
 import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;
 
 import java.util.List;
 
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertNotNull;
+import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
+import static org.testng.Assert.*;
 
 
 @SpringBootTest(classes = {SpringWebConfig.class})
 @ComponentScan(basePackages = {"io.datahubproject.openapi.generated.controller"})
 @Import({OpenAPIEntityTestConfiguration.class})
+@AutoConfigureMockMvc
 public class EntityApiDelegateImplTest extends AbstractTestNGSpringContextTests {
     @BeforeTest
     public void disableAssert() {
@@ -60,11 +67,18 @@ public void disableAssert() {
     private ChartApiController chartApiController;
     @Autowired
     private DatasetApiController datasetApiController;
+    @Autowired
+    private EntityRegistry entityRegistry;
+    @Autowired
+    private MockMvc mockMvc;
 
     @Test
     public void initTest() {
         assertNotNull(chartApiController);
         assertNotNull(datasetApiController);
+
+        assertTrue(entityRegistry.getEntitySpec("dataset").getAspectSpecMap().containsKey("customDataQualityRules"),
+                "Failed to load custom model from custom registry");
     }
 
     @Test
@@ -200,4 +214,40 @@ public void glossaryTermsTest() {
         assertEquals(datasetApiController.getGlossaryTerms(testUrn, false).getStatusCode(), HttpStatus.NOT_FOUND);
         assertEquals(datasetApiController.headGlossaryTerms(testUrn).getStatusCode(), HttpStatus.NOT_FOUND);
     }
+
+
+    /**
+     * The purpose of this test is to ensure no errors when a custom aspect is encountered,
+     * not that the custom aspect is processed. The missing piece to support custom
+     * aspects is the openapi generated classes for the custom aspects and related request/responses.
+     */
+    @Test
+    public void customModelTest() throws Exception {
+        String expectedUrn = "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)";
+
+        //CHECKSTYLE:OFF
+        String body = "[\n" +
+                "    {\n" +
+                "        \"urn\": \"" + expectedUrn + "\",\n" +
+                "        \"customDataQualityRules\": [\n" +
+                "            {\n" +
+                "                \"field\": \"my_event_data\",\n" +
+                "                \"isFieldLevel\": false,\n" +
+                "                \"type\": \"isNull\",\n" +
+                "                \"checkDefinition\": \"n/a\",\n" +
+                "                \"url\": \"https://github.com/datahub-project/datahub/blob/master/checks/nonNull.sql\"\n" +
+                "            }\n" +
+                "        ]\n" +
+                "    }\n" +
+                "]";
+        //CHECKSTYLE:ON
+
+        mockMvc.perform(MockMvcRequestBuilders
+                        .post("/v2/entity/dataset")
+                        .content(body)
+                        .contentType(MediaType.APPLICATION_JSON)
+                        .accept(MediaType.APPLICATION_JSON))
+                .andExpect(status().is2xxSuccessful())
+                .andExpect(MockMvcResultMatchers.jsonPath("$.[0].urn").value(expectedUrn));
+    }
 }
diff --git a/metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/entity-registry.yaml b/metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/entity-registry.yaml
new file mode 100644
index 0000000000000..2b501946ca858
--- /dev/null
+++ b/metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/entity-registry.yaml
@@ -0,0 +1,8 @@
+id: mycompany-dq-model
+entities:
+  - name: dataset
+    aspects:
+      - customDataQualityRules
+  - name: container
+    aspects:
+      - customDataQualityRules
\ No newline at end of file
diff --git a/metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/metadata-models-custom.jar b/metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/metadata-models-custom.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7a5cfb325987d51432b4d9ae6b922ecf49b80ef3
GIT binary patch
literal 20878
zcma%j1yE(%k|j{MyF=mb?(XjH?(VJ^cXxMphbr9N-CYWY0*c}HpPBCW`%lli5$DE@
z6Pfo&tlTSe?Y&b$`Wq-T5D+9J(0DzsI?&e#<R4#OAE>WGR#ZihR#Hxk{u_|OUkn@l
zL9TJW46wcq)c<NID<~%^CaSDLCo6U@J2@dEO-nZiCrwK=Jvq~)#IVS`d$fN9@-Kh(
z<^O-L3i|ci*xvTPUGTr!!2fR>ThA{`2SYp0|Ir%hzge3&{x=7ec7@;sa3G-X$Us2U
z|IR_g(8W;E)zHS$#Z$@E##GSB$<UL|*v8P=xkmj$9Yq}VGX@l$t;*n6U>tU(#<zk&
zt&4AqQr}c;5LuvAo4IVzH8mR@-HVj0wcq^uzMs56EY_*bAZgDu_+4;*6nS}>H<DnY
zLoOq%W#sfcyZb+HXLyr;zQ3Ii19jb21u3GznyW4zWT*F_4SVV!)^*3?8$-LGDVhvM
z78v1TpvjcahDf2721y%bqc)}nQKBJbtH!Gu5|<UCr-c?0<JWq4P~EjFMOC@B11t$2
zTZ1m3w+%H|4_DJ*x0tUkf!xWln*Z1=J0zmJ>kg<o1Q(RfuvAA&C%N27+z7A>YfH72
zA*W{KurQgiCG6qLxBm`d975TO9Vqj*Af(;t!o=1FO(v#k=PjKWp&=glF^#wl%xvuy
zhFNJ;GXPsqoWqEDu57l<k(!6ioSh_;PMFx}Y6s^$f_n4EXf%r<VgX*#)G9NVGdD-h
zgpNGq@xaqAWBVbZ+f+hqkQns>L+w0NHc4AD_kb$`F~tu6*RV+H7fODYYe|~eYKkT_
zPs)-$T~GJY^zsX|?QD~#Z(1PcoAmq@mq@v*<-Jj=w`W)YJh*Zdq~vVeFi0b*h;r$L
zWJJ7%(Q0=(?m)uoJi2%qIudx*i0sHCW-A@7+$FB?s9hCMmf#d>8@{l2wJRr%>JnbC
zx7Z%$<-Kd#rY5aCK0$xQGA*}sgmn#SkW2FYi7#!-bEe(lp{3$4x{kUH&h=te5(4r}
zJDgZM*yA3Z-2TRq9X#|IJsH?oC^B8?EYc{}R#fydYkZ)A@#c}oG2q;!F3q3bCB7Ux
z^GUal(G}UEj>&BDzv)3cxEXLn&yd1SoXnmIqa}{4wv1~}9JZ*{qv&hZhHc>_Aw&wE
z0_<K%tvuE?sd>(=)~A*E-eeS6*g$RL;D1IN9`u(f3&YmzH|X;qLvOD*pnSJ@LHurM
z@zV`(Trvtd702omB#-7(y65a8*x;Xul3##f3AjQ#v*L(s24K_&7gQt%Ua!T@U2wDt
zJ3>9>Je38}v-*VAfqmGs6f+PIyn#mWf$dnm!0#}>4~I6)U1;)|wyQkDA;rcLb;V8|
zxsA-)k}v3p-TiJq_04JJiQlv;of7l3W7l0nQ65GcILB?Wo!DJ-i;MIxqZeQm&AVhA
zHf?HXx(B+8Rqb^<80G}yw|GI~w|v3sV;H2O)q~|u>*B89QcK-jJKj+mms3)yG;U3s
zK5=S2z{)ThEA(oHOwim2T~9fzWfympRY)qjJIS=ixdRC3b^^?qwo+Rpx=OW&GDP2W
zkKhPdR}4G*N2gUqVMUB9+FK#T;7Dud!`r1C)n^Ngk#XRtsz~Ab-iE1JJR)5%`?<i}
zXE{0dR@Jedz*Vkax53bU(83HQ%H>9j`(nZBhBE=Ixx`96LMoCxJk?9%jpjNH-eC-H
znsO+wBfL>9VIPY}??aW<YK5W)IIr7>FB?WDA9-lw)`++b@HdH8!Z>ljW){%6rP$xl
zRw*KBXJ<*MuxnW6w{oSJc{fgLP*hpIHMfQtl$qBXfvfgO_i*s4Vh74X8Z)nlstBUf
z7Y2(3CjD~sLP~6;CY>(D3IPcyEMBJxTt*A=C1Bi$NQ0CTxGuB@aoP;CNsPbcSC~@x
zVH>q0J{2xdmv~xb0orQF+R>StO{-}^rDf4S@xUIomr{shWQs8LB$fJBeD%@GExF^?
zPV0vuRqO`tdNe&~VUuG_rNe?&TJDB4E1Sr<`z^CPm857s<4L>#hE#nqq4fy@a^|-R
z12Q2v!HZXruJjLWip72nXW&u`n#f=dl8=aeRa~+wE|&w=olCP<FvtW9caw2Yw-}u*
z(L~9Im&B=gsQf2FmURf~i4xJ`a{t#3q!%GQ+pM3JBM+Yxeq}T~5NGr|1)O_|zQZjK
zPh3-{cXS?etMYwCfunGNqdg&WZ9sQ8rlW*&zP^VOesYvWRrUpZf7tsGkO=j0J_|eg
z?TT&*t>j0Dt?>=6jH@$#c5)BVsxj~iSaPzBxOArjIBF0Q#|R~hsS07KSf-IVw`YY#
z<X%MF<|&%3MebJ8HmN~79j7I$Zf~VIw)UO&4@eFJfh#wzi!2$_&zaKwg3jPV*6MZA
zi#T3wRzZLAfMqGq0Avv-8`mC;Ui=4U$*?KWFZe#A#fTrD8*b5)XKh0&uMJ&I7Iba=
z0#5LaCwM82QYw+)R4xG|@6cBatc-ofJrN5|Ji=E(D%og*g}GFMc&+p8aIT-ge`RWv
zYkkVGFHRo=0|KJ^8_ecRB4%l7W8(Zza!XOxmPZjp{ZrazE8T%6C?H6s;1w(?I3yqe
z3kcD;RT4Jx$!tw(tlwI`S}f)t@CTufm)I{t_yZ%$aw`}!EN;xxL|f7-`@@&R`FN6Z
z=XJV``R23R6UG3GTF@e#I%zf@x)9>tPX>HXOlKhIJCbYh8)MBLc(DCcJ#5D`1CI2~
zt-4N~)WG6`m}u?tGg%w7m)JT$RU_tx!Glf8^XF_H8%={Qty$<tw7G0#$U+G&yL-EB
zxi9f)LYG)*rww-hWD?4HBaNYbdbbi%vuv?*NugeK0<CjZ>52aNbH|~4<tB^CmE{s@
zN`}`Nm0?Q9rYWYyi6l8XJr^t&CUf+=bd?%k3EKjoX<4&J?lCZ0vA(@}UB#(Qx2TqN
z%hh=#`ElKf3^1hezBzZZ##WaT^GUD_(rqfKi1=icrhvD~Qtg|-PQxpo??u{19jW6l
z17bE#ByO=CdDv=#c^)KC)}gdixS&_dk)6sYqYv4|PV8y-Nl4G03~wf-%1zD&*MepD
z`4c8m0Fv;5in(BqO!(Zt8Oji90ye?9BGdt<jdjy7T=!vpuB#mp4e&E4zH$znG?m6s
zvJ!4V|GK@pATs`$L^s+-H4)#rVft+z*)w?yh4XAPf3%|*sbHm6&=;S9lv3xkXbZ;c
z!%nM3gvk}wO`;a(!z4Hv$R_~dJEOq13NKu6`{54@%wb4D${QvzO7G}*1!AUX$Gh72
zn|{@@Jy6XhhX}Ql16kYw*He=0mN$`0KJKe?fo(BvfTWH!P&Ee%kT1CdzdY_wlqdWF
z_gRrU&N6*1h2L5VrPvg8P(#Aj51?8op|<2ZgdE?W$m0+tIPDljuipigQyu#udXb0O
zLvC{be*}v@24Vd;@laeBOJx+5{&WryMOxk^pBE+bN_!|qg75y0VT8*>HkigD0uX8q
z96lN(ECZF0E&ay$Yfw}X+*zl81x3wQ%E$FLg5p2YJrV^cdn;387fX9PSwm;*e+Ed8
zs;>N^04nc-E`|1ZSUCj|G<3UmawI_^LB!8^o5q*W{UWSR;^eE=67HqTqP}5u8Pp+&
zgRBUCsqTduGEf~pd>@OGr|F*OY-fA!kJpO}b|B?;`+PigX9flv1JI(Y_7H2XJ8(~E
z+j`jMd;<g671!vCdaP*mTGXX=<T_PMt~`;S0Ec3n7q~FZ2VW#wk8U*-RzO4|iprH$
z&Nbolg`4Ms-zcd+w)0HNqj>{4;8`u*)FQ{(SACk<<SrBqa4DpK4y#MLf)TfWUKyTy
zuNue0Tk>OX8#+$sF`KK&X^V98OxW?e2vXe*#7vLtk(VNriAMsx?JxGt*piF2I%EEb
zCJbfcRtF{60Ss2k@fZLc%aFn8R#hYZAI0Cbn){m#Z+@2{o<u7RG#$#C`-c6JBeGVz
zja|piSmGj%WtQJQVMdY=EIGr8#c4V#S9~FkqN#1kIvEZ7Mbcum$;cQ@_t{FZE{&a7
z9U+)u*Xv6TY9qYUml#JFT#<jn22)Kre?(Gr_3D<Qb{$toQME1c!9^GJjai&>k7F#2
zuzO(gu#QK7rWFQqXYhCLy?Jl-56o6EN0;72jeM}a-Bv;u0$)2L9cRW@S`nEL3~f@g
zZ<X%$`rFMTp$sP)9s_L>GhuPg`!ImIPJ^%*Jl4jS%q_(zAD&6V8^zG%FU|7@`${~?
zFKkKdQ@KETa|oxdh4*VdkS$H`@raf~YmbsNpe(09BCR7F_7dFVcY3V~stvd|`vjTU
z7PXK&+7;R2bfhh#lbWK`%TWP?K0rVk8Z!f5+M>z=r!0lRfNp-KMc?U=SO;7(T67i5
zEf}7vkm<UkwJfGsR&!ByW|!j@TT?XDA_B*w$2^J60zWEu7A)`l74Mo-JWV41DD1vS
zfc9_T{VyC4lPGP!Ab>dX>4xtnbV?j@XJx6Rlv!ND_X9Y&GEF5Vo(L4KGK9l6$F^}%
z6M~=WC@KIH83G=cgTSi*S;7a157lTH3JjT=G;Hkha?<_}^Tl`mK0gQtge*;x`pQY$
zZ`PV7ii`~=Mr$H-B4dM*fti4Z)Oe!R<$5j4UW1WI=x=)(vU?DZduxV`#w{T7qjEi+
zb%bjU8#)6v$`XGlF7X_0D7hUd674(LoS!nt7d=sBz;*YO({z94uyJ;TDLK{cG3=bU
zRI}N(nLmP?yve{<KS^7Mfn7?D;v&I535BJFrML<`YR)b(u+t^kkz9y^kbzojyr?hb
ze`;%bMmMo)_Q*bFq@GHkhv291VCpN{$iA;5qnRp)Sz)(a?j6R)i+9r&FO^o%?p)vD
zH{ma2*YLGfc+0)az03Md;<d=m%XgLWyjg^OuPj5UCGk3)b8S5W<W9MVX4WyZj>?i!
zhrx_eHkB8+)CS-#HyG~50$M(O$l&hr@XLRLmvkmh1Q>Z{y6w+Jo)9Vq{q|a#2`CK_
z9uei@Eh~@2X?cuMwje(wV-RMOhMLtUtmzx3$%z_ZncSO)RQ_QqL}?GKPVq+d+M!q+
zV11}du0Ik}zDK0F^o#DJDqH0W^|xg5ud*#1lE*g1E<y@ct1?3<cJY8W2{Cu(op=wm
zw;&Tf(SH8Cg!ywd|7Q%-<LE}7(8Sn+?zj@vGKFZ&@h}Z3OX#8K0-xxTYMBBPBZ@pD
z3NAa*)@Yvx<cHg$NKX^Lf5}O}7`ax#zAAl9NFX5AzXL~M3qw0|QyEKV7gIY^r+<#T
zIjU3kIHIVZQ(hlEveD8)l+ADz(s*6d!;<GhMj$c57C~8TjvEpV6j_fX+h$E!s}}*P
zfdT}G$jA_Kn^bh74wWiODo~3s5QzJL!IXClf<TX^?Pgc2F*=;7w@miF=i?ssY5LFY
z52PO)sl?&NEJ4ATFi9-<g0N-6qfmZiAs9j)j3E(-WW=Kfq=KcOo|^IMpqw~^%|9qW
z8BqqS1iOb3d%TwPI)+`%BkGu7(OtcTw=5917OMeT+e+pv;@Z{LrF$%?=VW;%kcV&+
z4yBlKB-~vHx`y#EZW+uE=x|Z=i|lG1kBO~DiKKeXu2bEGs&EC~Vrvzctbkl0(WYgO
zxpL0UeN}MNX(By_7Oj{CzgFW*5o!=q{MtKJy`xk)!{4?SjjG40O@<oQCoNN@&KY|q
zjUKWa%PL7OEr%+W-gkr(U~O90Jw6xRgmkyz+`<0Z%op9R3*2K4`G*$2xrQF>Hj(G<
ztuPqFonf7IQG1*%8?V8N-Niu1m`FM~g}`XAtW%E{tCAnpvWmgNEj2E>Z8WShfK;~F
zWY?K&s!GwRbjoNuUwW4hyDDB#hvJeiy;M({02CWZZ%nx8;;1~K!a<Xir^0<fp51aq
zWl^)tKB_WwM}_&}_lqT}!C9t^PBF3|D$(oJ45+Vc>=iwal~{v8K``y*28!{df>Q0p
zyG(RUgHL5OyPU90<#C*uv=UPn;DR5V_xu-<2p!JcXF-yTpaqd?RV?}xSjVTn_q2CK
z<jX5&bYa^Dn5Up}G4n1tD^?MgqZ9)jc4AKnP?%QFwNDP>VAr`!PxVZ6$*&_(9JU)|
zE7hs--mMU$WnfN?8>t2XewGM9!^vtolSCik6Y+ySiAR?6W|B$a;!5rWCG$H!-J@8b
z!Xlo2h#TQGHpkRIu;mhJ`!C?k>|^dqeKRqB5LaFqG%>xqUD&qs5*#1Pz#HA^>|L>Z
z?dyCC6@I6!&$HQQbIOxP)PMMNJFwqhn0qb!g)=eu!42sXVW}_I8{tES@3O?GS)L)o
zb65%0rx0hhZ>W9+1KGlk64!1;ke>8*XfJZfKKE{-72FK4lx=^8NL6avGu|QY^g$uc
zt&Y$Bb@+|s``saaBAWcQE_%ydM+>%CvXv{IzVyw0>I1%=^vwap*|*IkzQLJ!@fX$K
zn0?ZDy9j5>@07mB%T5A|7+ufS9Z?4o3UHw=AT!En@||eve+2o$F)s?}y?CT{^dX0)
zm4=e^Fh-!pPTUxM5gL@aJ@y4LzJVw<ZiPoiUAxkQX9g#SF$iyi5XMXG09*TaX4vog
z^#T`fZNr?QGST(ddn*lvPc(5zy)&#GJoJ4+isB<f7!ziWYT`nLe17{Y%?%xFA8miB
znNGMsK;(a8T>0NcPK~Cg2kH^lr)=Vs4FGE2$x=WgRdv8_6g*Nr$Ur7Ygbhgo^jnK#
zBX>Mu6Z6ER4S|>yz4nclimmmHFi-hWJPm72e(UP`!unh9&HWCauhq^?*4mVrW}>Wg
zF8R)+&%NKxt9P8F-p9;6J&;DQ|FQw=HtjyI>O~zeiOQ~At8-e2YrFO!FS5wygxhtJ
zMMs`(Um7nT!M-3=i$^)Y4gcxD_wtn3#<Lmx;x;{m*Mpz?4!HGM5GgL|B|Ze<fEXEl
zzleyBZ2tfm{eY04?`LhE$wb-eoh4M%uaNLy<kSPG;VVff`Hsz8^xNp~VI+%RCr+7>
zbBD~=vIdA^UmR70h`pQOM?`o=YDd;47+Xz5v*R5&lU_QodEy*Pr%yzE1chMq48(F!
z9i;}NKn~>w6l&OX8*G=*8N8cJuN6^Z&9HSNc}eoUf=kaOpGs{N7~QHIr`%My2n8;8
zj(Q>oQOu}Pu?tvhFo_!uw>7%<DYTg)YdMLvnLDk*blGSJ>&tA7#nr0<DELdPhvPFB
zqcsxc1}_KmGnI_AmqIK_vEuN11xL&twWATtiFfBMNn5p2nS@S`AD+o0QT25Jok=mK
zckC{1gBx$J+DP<JzdS3esdivjpe7-#S%saKp~|cE8)OAkvoch`vN2X*Tey?36>9WM
z1<G;+Hf1P&vklF!HnnLU!CD<Y94%-ICZ;Z?Da6gWQl6pHWnDpPuz+EmWktCNe?(>s
zYDsBS?w8XgE+@OXG5x`eGuieBQS;&%7me{(XyDYpKF0&G^2Q})6jAe>&Ni)}bk=yZ
zc<)jj>AIpi;t-cDab==#%Q<&ZoYX39qX-kO=-W|xvQ>V7jk~}%)rdF?X;_8(HmO%w
zF6D?CGLb;NZ;GUkT3-m8cg)i6U_I>6oZKnf#M;=i#2_PX7)xe-(Ju66Eak_%Y>O=>
zwvvHz)vZ4NXd0O{9-iBnGs19)<L1j&qVGflHaM|V&vRBVCbv<oAtmLiVx9Qx1GU8>
zF%&f+=mAre{jm*IVeBs?>&!;fD}pJJeugy8PPEHja+}M(A?j22ERkKLyuhprU4DY(
z3Pm6#MO3iv=ApV^1^o2zr&)VCW5QXZW_4#URTZ?i1!{{rRVgl`0r?y}hV=T~h!i`6
zl!>h{Dc^6|h|HK-ZelgyVs(oeB-3miX0?NUq8&fW#0N~@G)_NhE;jayqv-4}f1zXP
zi+>Lce`d$p`N3}L15-El!lsvYi|3;*hTIu_v&R~wp^0A{LU#ph77~6l@dE5O8uBUe
zV;&0B<-D^~d#*x+%L+do1YJ(yfg{!IvX`ysOmbP$+MWdf>zs#VQbJ8}F!BPW?MHQB
z{=#|i2?I>4yzg!=*0e}skGWOaw5DXc<Z#z$`i@)|^^yXMl5AeUtYX{ZX08-U4r*g_
zAWCzJ{$wD<C{bz|1L}zY<3tZJr7NgZ#dtG1e7HTluQApR)rWqIgVi^5V?t6Nx_%Y{
zzNLc8o|P#$lrP|wMV=#d91RAJ$2=-k8x3oRQg5ThGS-gP)?k{BhlQ@7Z)cAlA`6bj
z#9c$O$c)-mx+I4pR)-Hqi8ZoWiH&-p&}3}72tedUvw|mOYBes6%m~94LNB{6xGy-K
zQ&4K6oF(46E_kB-iJS3U=uR@F$1yG-rcSf|T}k{jql?C6H(+Q&yIy0G!hfflq4<VI
zmU-NF$L6k_!fW=p-F9~Km9ZAS%IiMB^1RZBg`-%cNnF5L*r;50YlsRyhbLHYcs}YG
ze$z)_@W%8V*gyG}{5>PnX$>VZx^K6RX}`VTu%c<vGxMV~jyYv|6&6BSs{TwIA+tP6
z8~pJ&qK0jGl_JHP>8#n3)2fkx)8{b<XLZu^Lq+%u_P9%K*k*y#QsIz+4?eGsBq`~l
zC%g|vLtE+~L{c<-#ff`)IG~7ZEMQ+AVz}iV7i$vY=}&TE=}%Te?ASuAC$g0BkN4lq
zIGi}{Ra;aPyNODmC7GY&4l{gHlCVtD5)BS=4GhXO*}}pcae&C2MSD>t<)Y+WWj0>*
zd0g26iHxMtnAlS0VAPF{pXYeZlfEFe6b1I(h^7xcTI&HN=J%{okk1A!iE_#rMhmLl
z?JctVoD~}_sB>u!MvOf>j9WGPiW5nysBPKc8{@G;@^sM3q;|O$l4E-eH<^i~<)Fh`
z4q)?G=3U3InOI>r&eRa&@=2+b2i#~vGkSVkG;0Q0ltYF!p#kk3ynILc*pYGd^dHSe
zYgX4$%$h_(%m(a3C+CL<s^ge-knYJMM>3zI1pD8UML&(_?#LpX<czS?jPZcJpRc0I
zxVzePms*4248NuTOnc>HVr%aS>c7g(nuq2rIcOJMEG%)+JbHx;(Iv$fA59hUkOM>u
z?aL@g&6l_$GGtG9R^#|shgRNZy8MG5q>%AMFi1j}X@$U#rzN^;rqMVJF|{(MPap+o
zF1q*;iPbb_DfCp({1KI;imoISBA4E;+|$!2L)Ugf-=V60!e2gw&>f!#WRQP9hGwi?
z@@b2p*on^K9Va*ee;HBSNE^B|I%gI6K!3;^TTz$sB&}%^xTWZdpx?-mZiOn_tKt%2
zKLI<A-g_5r)*VBbE~TD@Mp{Wse*%%8ew@VFZ5D#FCA)L%9zWov9$~8Y?RQ0HFORC-
z0(&<*B2Ef9Z$vV2VoKuoLUk@OPG5OpX^Yu}@+(cmfrSSC5F2LrthbKJ(}h1Vp+Aje
z=>}2c2=6%1SY1JE-vy=P3597aH+D2DKG0ZXj~zPPjslS$u1^jD8>y#>njPz=7DBa-
zk2FN?3)){5VeE|N>C5%$OM3T$S=*U7P?hwasE-O5g2kIUFhC8Sm2f<6R4s>BpYjUM
z*P=9P3FjpO5X@fTaMU?e1?|wiV~7TLijk*I{?tTh!Xt%?71tNM;z)f4J2pY$!-_yp
z|4wV@w>?Zs0{-AjN+fI806E4+xIvf|<_Pz6G}i;fX!jjPNE{e3s66pACbalPYz9^R
zvl~ys(}KkVQa14b;KgV8qu=vJQD>6@AnjD(7lv_8HJwAsULFJ_j1)On5~eGB@diTO
z7@qBdCqQ3FK3mU=O!@|27-~TRGBO@)K_#hx!5afcx-CNX?nHY$``e9*^hSjE+TfG>
zht{TK=BB9XC4S>B-`&rT<R*mpFXdbnA?XC4FpgJhF>cR}CwByVEnR%DXH{BNao0Wg
z@CN#N2s`|-cl~w#9c)vy72}=-?>KabwX=a9aiXvyc@8YI%k^CHZbvzJaz>#Om&k_C
zJC*Cm>Y(Jo2AT(w<JDr$yrfL352CLmfkrK-|J);4b)sCIwn(u}OAO(}neWFpk8Gvz
z#^hOVBstXb`bFiR@<40i?jTS1@i3SLJKFa~5WEeB$ZuqHsjBB^CS7zKlAH316@Cdo
zAIPNl6JU1(p081qlH_A~3*)_2yswOqJQ{pK3z<;BcCIzl6lQ#JhJ;x%l5A;H$x}5P
z4tC9yLuSM4Ccl<H^J2cY9oXufSlkNGi4}6kc9m4b&lLuhM<mD7IfzoBz_$BI&FH{b
zJ+GU2;HD|-_U&UH{zwsbT!LcB6PjRrDHm)s>UhGe<qOg&a(UZQI+qpH5r9$*ba#Zi
z&cB0@db6UsdJv{snOCMRIX`kS?MXPtSwGdm$e5L#soS@=sH%OX|7(g|U+eHY@k<FB
z{wlvI|4xcb!uZc}J0{6menA0q<a29yTr3cZD4d<{s8mQ4jGmzLQV6?CwvEwfHePt8
zUTDQ=MUbKQcHu=!29gngup7q2S9Xo;vnR2B;_p{BbAQ7rFYo{H#}R&)abIzBI0y1K
zBYMFc#|RWs60Qk;sYiOO5qJ}iHJwmV7cTkBDL%rp=wy@v=;TBw@xlT%mK(u^oNB9#
z_YOr?tBx6d5^s^00pm>k3LO1GG8}Lq?c=;?sF~t(r5t5x8yNuyw>mPS)XV)xrX|jr
zm+Oc_l+8$C2JqBehN@BRSO?>p_OJ9_d3tMy=G{fzEr~2#K;px6o2yyIwqvgyR!b<a
z+vq5_=|!qC^VL@Lo@#buI0O+oDbFA=R4`_Id6G18>aEBR3VKBvvl}YSGKCdt%Z2)J
z$`w}~D0$CyX8c@Z$wBBgjTDDa?d~6$A!$6_AhvdinFT6wl=uv98kGJje{{3EDFl>k
zHylvVtCpfO12o=_I&yf0pFyMDACMS+I%meHdmN;g9i*ro41`Tq-d|>{b<oy04?(kX
zSD^1_rfzIb+gW4+y*il|9?Tn?z3QjOjymwAjHM`Tbg+xEro~uqSalk%u6WF6o+_?1
zb@)^Yer~8=pt`M3ds*x52{G1@MdCfpO=BcgxcCSc1tIFo+eG{2V}7^=u0nnu0-X`4
z&o&IJ!Y1euLlQ|4@7`mD+~>{RKxY4<_J~xVct$H&;R8T*ty3TaHdbXRetQ7I?U8x+
z)MqG*OQ8(u4b^epAU`vGg2N$eEHQO4f0C3+nx8h)CD0Mal31eO{K|d!Mzu=b;t(QV
zELaIr2&wQMger9ilvlgJ8`>+I(8z=#d`6TwR<7^zja^c>UDWYsTghhvYQr`EUxDqY
z(&xkR1?-bAVE-*m>EEgOIf?%<-8J&rM7wp>C`1`vSXf?etEvWyi~>uKm`DcJO~8k+
zQGnOERpb5hLHi7bDuU-Uc=$BfkKiDqg;A%ItaEHSn>TIe&rHVa-Tf}&4<2+G6GlrT
z#56daxi2A`sy3rN*76%W>?VuDgn*=YDB-h$7n7FJX!5u=?3!sG9VWnf9_7N(TN^aX
zXM<F};h1}KYd=tBhu56VYCPWXRB`tPnmn<$ZFm4ZSoM(yiKdGO7Hr7eI(DaZLsyG?
z^J4uG;|V(M<siL_>6|Y@m(e%7el*^nvgo)H)h@@dR^<<ZkRVc}VSjl9?;aE8c+1*V
z{DYz?9ee147817=sLE(Q^-WWN(o~rrE{Bn7CEtkLyNy}ejUx=YopyV07wk9gz7#pi
zjM@jSLOR|qIaRk)7SXH?sYUFY)AR;|gWOn6m}f@uro<5vvm%l0dS@jqGI$-ld)O!b
z*r4>1N-pgtLZTEYCQLGKW?s5hjMB51a&+4?Z@8{=(LzFy%xQdg<A#O-2D!oY3RtQv
z2~#DA91bH71umubhMEw%+^1yuF0X|T?Gu+Nh_mx5a`C!Q=KKxGWe}D|pdO{!oq!%P
z<pG8ova8u`!ZN)4_+%1G%%7N7u;z^0!Zd_QgU~6y7DF;mwBSz(-g=&{L67*t>w8FQ
zdk7ag(9-gzz68r7z18=WG{h;75gK`Kjbut1uPV#xM!pvlo$>ul%9=Y7k3|U514AZX
zv&tlCb91SRB7gp6cIl@rzVPEWAfOx=ARxBCHOBrsEKa3vIc<obj(k%4Qz$MLH#BS{
zlF4edakMJ4+MYr{Q4*38?y|@ZG^5z1$JB6Hp{|W8ar=Si22tBhz!rm<1yHB&@p{0G
zuccTWa<x~Ok24rs&*buYPp|FZ*F44b`MqQPz`eGbW#A51)LC`_a~2-m(?wHfo!7zn
z;R*+(v%nC9L$$r2(BFMZ1H1bHOjFrT>D5G)irGxBv%{8k(cLk^;4@Og&qAH6>JAdx
zd(2VVWpi(~V8f(CtA74A>=%^_cikL)o;FN-eJh){k-E|;9h<6?*5;IBcZ`(?lWOc4
z1>5lczzcYB>ak*qPNp5{zPh<dzc%+7A(O4tsdIS4cc-00NO81ZDcnud$Le0g^Z3<p
zbT!ThnDTig^dE%VX|g#8ypiBGCPh6id%LaUG+SV<0<O8u{e#t*Q)SJnSN*F>4+Z0Q
z@b*r%sMP&x>Ik*>m(JOwd`oD!*=Pm-?30%GQ9?fa$+6MPW=prT(mc|0L5|tnq0GFD
zJ5!x;ZCKe$M4pN7&I@=Tgtu%~dd8C<Vj9T%EbH+M7T}xd6Fga0+j6HqjwIEanw)sT
z5Jkn)+?-Dk5*rfdQmd~#Fs@d#d>oJ(A%97JQp_FX8rLvSM~$_aSYxhAHC7pmn5(|b
zeA`51K0R7=#KAE-#^cg+HVh<jZbZkFbCt)4nB_ssf=$!IfEmz;>o>x@GLk=W^OMDW
zI*AJjsaMxY@-{=f`i-PF=l{A);!$Ln)CV(%y|#)Zrh=eL1+Bx}g9x9qXSgf8mf%>u
z=S|dOCoUdc%D3NYZ^x5A1ANQ*5Xx4#-Nv)xWOd)ul<9TkXB1frj;)K2iK^H}LVB7v
z1T~+XoCY0ALZB(Z1NN3m0#<ffp-2sm9KC0Iei4W^?4ad^GXkm!i6D=N(wa_-3bP~l
ziWfYC_y%5i`X108q@VDDFSQ3WmaKwrunQFY3?px$qyvsahGF0+Qx6K8nhe|&xWJGf
zKW&f*BHxmPl-Wd*L)-y*#HnkCidICW8`=NVAwjD$6r@)2n3q=7O`v`&H)@z76S^Q1
z*pd{(KcK*^Oqxi!)wGH@FB6wPCNg{Yjl~3`2n}STE~4}SXFl)_$m?7~^3;sWWq_HU
zFgrjBI1Z&>Q7FA1Ewz+h7`%!m>g}>vrWuiBL97#x2yLDvhDwPl5jDzSJJz5#u>A+F
z|IbE|K8nlfK>v80GsNG~ht7;4e^_n7ZwHk?f6HU8$?*+2HuM^ihZ(VF?I~pm)AaAL
z*v@ju)k8%$FjF*A8abh}YsxvL!v?i-(u^%JQ=*ZEIVV;r$t2l~4$|+@S}A|M>Ej$g
zdx$^5df&gyldP^=n8kf@+Q1j5QT&a8<UeyY+pk>hYv|ZgUZY?D2D^nSfGk%~yp15o
zSe0nmaKR%6hwE^GqjsTn!wbKCd(o1GWDNO7rk1PyjOYtYa>e=V?)~Q^JA3=>;(10H
zC@w*2m&|TxE_jcl#?BcRiE&);*o-ES8}kGh#ZgPv33somYjgA9xHp^<DKdaklfT}l
zDAClraZk;3!^v%`@o3can0bx8%A4(8R+IgsTAK|lgl!QfN-e7})-za$#$Qpu!>Eyj
zpb_`-*&@~W_<V_Wfx3}4%gu{*#lS7X6a8e^EbW@vI4zQuqoB)mhJhJ9><?PyYwNVl
z1V7E9t+Q9y6|+>18|+4n=Cs@L1GSyz;5Q^@Wc>Yv5WWW~mQhV8`ClqO5Ys5woLf_}
zt5#y9hMBIM*6(%X`<d~daGQwq93Hbnq;+`Xz@VZwX&mp7VL-$z(4`*ZFY%q>Q|`o%
z`nC_TfD_L-O51Fjw5F<|BwDRc+T3IYFc^IFGQ~=c7t#1!OE_IiSRYsUnl8GBUj`2W
zZoNjaBwh{qbQoZx7*QCyaeU0&FLf>qgDEa#Ev1S<5_Yl+({|0SrPCMk_DdI9ijT>q
zM=R`~rTk4(tu|j_aV&i{^`2FjIaAUs%eo*jyY(`^eyXDG{3^tif5@O)K(f`L;1vJq
z8sPK&IiESkPlwDulpva+CV+kfQBEG2AMy8e-Y+Dggm-ljJqS$V=vGCNXfTiHSOK)p
z>AXQCt}H5{2!{1xx_%9uE~p`(K0%ReFKhwSV9XM3{2>m!Rn0qTk5HOd-0eNySA=qd
zwC<R1K4jm?(BcKcFQK*~<dcDcbxYh~w=Rf8O|?rO=)nPruwaGO?Q(oUI3{Jm%`P}&
z#A~+@e@)arv!u-7zu58QYwg9~W5<6t`&XkX`?W9$)321a%eF!*YRDc8je@ow42fR=
zwJK(?a*m2jZ?qoow7)TQi?#v6Kb-w0R|Yi(;$SRT5n*^R&xpsu$9z3s6uGitYWn1!
z?}mA1^0N2s_MA09Q74ip(gpQuRHU30Q4}-S5O!=moG8i=j~LlmX{0<>nM;njd1hMe
z7(MiC2mKy)1jfvCO`}$^qdzB8QHzz%Id7-ZY2(hit*Z6cFjh0vM8mrzU|R*|<qsA>
zp;ieQwhKCy*iI$wB*Z&xW{oA0LPf@I#$hG#W7uGnUK@X4S|@`WrqIB%*CAvAG0Uxd
zdTsL_bSlm!?_Go$%rdUsNUy!0ky74#sN*6z#CnCse2+`NB3K&Mx1yg&{{SHCTXfpR
z!uw}qRMs?IOT$5O@sR3I0CRJVV;5B>IZgsBcO)Ns8il#>m@F1fc@5J!$ACi(+J-iR
zxdtuaF9PZ)LqKk-(da;7%dSJ;dE&1nS{G^b4X0F*F{fx`MohDzR?G29j@$%ln50sR
zt1LJ3LbL^PO8;h!iq6te?fH&Fvj|}(GJtOQ#J&wIcRe$}IbM5TN&psxqt2KK0MkB-
zxRcEa_~7^v|FN;KB%+#DWgNNp!2ertr6KeqpE*5o1C!3VPN&a|g+aDWr5ZW@TGA=1
zX~{pYz+2m`F*B?57lo`_$j>+W6T5L&PuuQ3QC#6l)wfx&*Sn%;P+q8xs|PJuLV`Gu
zn9EYVTrc3tqB9K0V&)knAE_Ro0>M*)lpD(ZZ$vNQ@aL|}bF0BU0ajPUxJCSU3E(Hj
z{mqislQBOuEFAC($f3QH@k@ETf2W+s{uJ5ZY8IajGA=N|b5NKr`XgPOM>wh{)JOgZ
z>c4Ow0e@kx^An?_OgiR>{oyd;DBfu7fFU+Mv51Q40WZuYMALhOJ>)>SxV5UlEg3Ut
zJa7^x1Y#NK^(de;eY;YIamlWl2mbDtM(`d}0cmjRbT`ljS7)vA#%#r2`<-m7T)sHt
zL**!6vC5XbUN8@wkY7cjird5wkM!hd!U|D8kiSOQDNy6w0Rj+E!`FI0vcEwa|19up
zG+{hYS6u(hNK~!q#sJY1+&0Dw0G7T%MT7wb0!#9kz#yXULhg@FSjD5Obs4#XQ8X-&
zut+L%M`Ur-=%%$>UhYG*qCtiO7CB-MSS*uDUH1ngw%Ba9SY)kMBC=RwuAZ_p*4Dzr
zs!}d*gx#+<zt%i`X20b*&*1gGE~n*!)@Ry17?9^C99z%xP*1Ityi%j>P;7RC{VIgq
zNlxf{+Sh%-LEE`2#N0_u=zDa!$2<GTiR^neLeIVbm3`BB%d7d=&9alm$sfNBqxX);
zf0qwI?{$m(N6_y@-15&@)c1ot)%Pfz&vQ{uJ-34$lXdy-7kB4%dDAoh@|TRrJnshF
znU34MJ0H<Z^^#Y5`qVvBv^r14xHQu#dX@M2n3Wz2mYvUy7=6!M>-RXEzJ%w`l;bwL
zZo(P%Esu$q&jUI^>_jm~<-jcHl8eMr3G5|VJ2y~ImAmmK4RBUVDV5AUQS#uT4E#*d
z6$XpD3YLwi9+t2k5$6)hlv0^Y&-qDp%Rng@C@pc4!jbj<Ko6Zb6f>8hLh(lfBGi~m
z=)pLo$kwn2hrIWBHpK9Ui<|-vp%XOFmgn$CWmj_su2lw({woCnrukX-?gHL;$V)u<
z5aEb~EWm(Q3S~*?k!#!4W~()I!LkM_ylCv1Z-IpHcY2F(2#AND0t2#M4A>VaNL#}|
z^wexASVoY&(?x+OfdGbYR;O}<LJ5Ak`xkISF+1FkC|;|XoZPl%)-2e|-;i?8uVkZP
z&z6%rda6l0s&)C1BSQuqtTz|9ait?OmL`&zrtGsfPi5jF&^EYRU0a{Ag;cdf95nB6
z?%e@rQVlDp%L!&I+Gg<A#`mjC_a(FoJgt+sJKqn$L~%@Cyo3V#eghTM)2@$Do4Wys
zeqZnX9PqNy-*wQyND~hZ{1qrbLvpihU}!`z_||8lq|UJE)E6AXWUFpaKBPcP91_Y|
z%D72#V)dbHn8S{_@C|CK_@UA#)oeI0{IsbppuQhDf!=rPTVQ+3(&b5TNaD2!fk(7$
zBabHT0z?scVzSHg1yohn^q4J_uLH&86U*f+N|(ogR%H%Ip~LLzM5PZX;_(Y!sE4<=
zabOYz$etlD^Ys%h`sc6cS9UmY<O0DD0SbI0?N%|OIS~O|j4y={tPC6E$zb4m&q2cm
zy>8H!8R7Q`^-O5j1{)8;d52v3FK4g9ji*5^2pZ2b&BU{aS^*k{a`FcafHXQsI|+6~
zy}Cxf%(0}Iz?+Pb3H&sq>68|~X=-ueo=)FFyVMF3?smS6XxKZv^G-vIm0M!&%J{`Q
z#C@hXt~V9;noPXjLdS8`xS;vt{#b5YS*&!P^OWkrurH0lyTPVmefSXe+KO=`6Eh<{
zJGW!i0W_cpY0nlPs!A08P|rHheWN0u&jeAf#AzDz=jkjY%rTVM2&eCww;5Tz@3OTt
z)tVfGetz-P^dyN}?l15lL0<eiE1flw)w8@!qB07XQHAk}lzEm?7M2LW;Y8&_+R~lV
zz@kf;$Ek*tM&e4i$@8$mDwkZ(*4&;(JW8j@GpFYMg=iuG7?kxvaK%ByE3D*9JYM-c
zL_8nBLA&fg4KiOvz#`KConV@REyPhdhg>=T3;*GmnDm~<gi^x71<%en6<fSfY{H^L
z%&&6!m*QrDBAjZKN|hpB&1`utwu)ATA~CCFJB+*a3nVgC&AbI`&E7h}ktQd8XJG+k
znQYcz$TAzTu9}013g&q=n^$oV<I2Y9#arx(IbofI2sKGp<eqiMwk$m|s};Jz1<PjC
z8;h1a4rG+F23Nmf4?p5@5m!N3Ams#^;e`XmCONC&*q_k`i3gKnA-Qen7%ht;mMoS|
ze0M}Gr{T@pJp~zd>dmb=5p|iki-eFGwE?Wn8SzOD7j5H@f#5S{s4iOjjIk0U21G;M
zxz+HaW*3CR13|cF0>M+vHV9?3<W1-lHtXilRw>{yOSOS-GMGH!T~Hg&!f9u(f`7UN
zwz=S)dO;EuUSTIY@hm(fX8CAH_^740MkvR$7+HP6d@g1k<Xm{jxbV^Nw1>}jHI<D&
z585Tl(`lwzb$*Ci=EkGRBc`5cnxIUJw68}XfCc^LP;q5E5UCi|`pq|x45t{}8t5Rp
z8D84yqGiirW46w<)(*EELh~7m@}qYQze~47EoQx3x5&0Rr>@#l{U&?bA`D=>n-eF+
zOfuzYX%x;OmAOlBbegl65u-+z+pijLU$$08n+97^z*YgpQRISv04Hh;)k9CK-2RTY
zadf?uaN)!jnkAQ6y*YAm7|URyi*kA~!PBR5PGI8_gcI!~#3EPj5v=HV<{QoThHSUV
z;tlal*yjXVj5tDo0=ZA|djgY(&m0kZ3lTFKQYxpc1C<MOd;CEr<xG&JKM{$kA48>c
zhe0m6HvmO=AYzPlopd&`@;a$N#6jTFv_)02MYXV%s-ktga;QU#Ytb-)(S2!}mGs6w
zd|<S_&5fgR>*r0Xtj*w5!JvT4YIOh>^`65Y?G>i&-#qRHwF_|90^5G;3C(<IQJ0cU
zQ#DOe{fFDMc_ueXXeNpV77}p}lB&7#L)K_Gw3_$i#o|`I+(@ip9JRy1do^r!W8Sfd
zQb$ZjpsxGXmcyS<7B&n9x;fCD>)g(MM-zHPJ0k#)hW6a|(fPC;_P~h2qZgs1TU)b@
zkd==LpE%_+@ZLT-W@@DO*IE%$w|!k8sd@n`_iNht3m|^94na)$US-S~;$`o?N8I{c
zoe!Lg7X{{A#S2ao-Z<@<^jjNH$&Q&ThQ2LejOTzFR&|9%X6eTYV~4yjbJ%U{&Tt`1
zR7>b$M@eCxmi98haK5gHr0f)iq7xKzuYmyP&nqU6c3t4_R0_l3bp6P7X?GrIR*rbf
z1^C*t0ZBa4%qa(`VlR|GwzI%x_RO7SuegEBpzHw3{Q^F)Wzl;yMjR=yW%yO~=v8nW
zC8bg@u@Bo}8<_DBZ3iFd^`hV=`M-F~VzKi@r4ty}Cz<^rzSo<nVa@|RqfMS;4X~~?
zX?r|D+!owA@vbqmJ9}gISx;?FIhne!ZD_sF=!BlMAKXv%hH##EwExiH2+lKbsyH=Z
zWiR#gZZ+Kwi|IJr`x&?DqhUWrCl{IR?L3~MH<0i9zL`f>x4PXExBqOzXfo-#q$6}z
zPWz+zIF&`e%SONJ7vnqMC!Nvt?vBVLBERXf(t=VOk66;^IG42*y>V{65WsdqS`(Fn
zLIoZM&d~3>2&XcCMRT&H1390afM(_y3|wBlg5NyDDcyUZ>Fo21x#Q{HeY(Co6wLY8
zB7K|&>LBQt5A@ay|0~v(w0ei;rwf@)X$=>2Rx%qgpQ|a>7TDi3<5qdBPYlNGWTq0l
zUFfj0GlGgUur@~UwdQUu%+_4#2syA_QP0<i-j`}mYm(oB1LvR44Sgb;K(Tt__7-e<
z(<{H;#NBT*9I|a&PVe}%$R=t`moc>D=NI(2f_*FafYP!X!-E^s-mS3L7nG+5^M%&7
zesCp2;07MvZ+S?#ja&W}?Z4u1&uWTKb<YHDnry~|Wp2cs7T$89zD|hTz`VZ~68GE;
zeb+YX9U_bR^XFcp|E-&44B!?82PVc7d=K+^`$s({vlTKhWv=+TQJ2*<%`wM>1*<p6
zlvlkzn>lnx-4{xT;Ey^x08QPtDa`@<TkmAWg`~;>wMIl)1Ne+X&<nQPu4d^VyA8HY
zs|TIf<Hp;pedt;kTp@-U8hRRb>6UrJu3QvYOl&x}tli@C3)RT1gDS}EuzfJiiEcu;
z;w{hHF9?}XjF*R3!eBonR}V^BM2z*o)$~?te2R4(3gryhF$We-CqRd(4G=^nR%}^Q
zzvCbwuKTY%x@{d!t3vzBTh<uEl(9>xthI5?>V|z+S0+7XKiMJcbq6huW+U`A{ymQm
z%8`p14n6<a7>`ik?s1%Z?5Ewd)8Pw_aQ)keKh|>ceS7(Dw0Sf3zbNm?_#~3i2g<H!
zxu*(>aLjV#q6`sK`l<%a#cr`S`3+I)8j4o2T9Jlu<3m|cm`|}M$-j?VPKPK~pM>u-
zzbI3SJZW+qXJlgN4QmN^xFr5gQK499sqVrpY37i<;nBX~S$^X@aCl)J;ZckqN0~af
z2YtmdX5~@I;+FUE3*wo(j3>8X?QR}sl;<({9;`ftav>(GO>zz`mee`L!zdq#k2TFL
zKTD=EXQrY|t)_&tJj-TNdbKLZJuAsBL;Vk@sWVR_o_&>E3%O$`9DlRi{BPat?7~I(
z+wZ{|0%4dBwB<$5GW_y9={&7vY2?dgcZ1;E@+=z-%3t?GJI0f6YfU9z3}=1<g&mL=
zTbSE#Q7C9rOkqEXXfFfnO%Z5^JuJutl_R%D(Ss4}f)l%N)gH4w<GC-4avWiB&2wuI
zyWm%pL!NDz<+-1fO@6(v@qv00A@7{~GTAKm#_t|&q)rvd&M9(huG#;AJCrwpH)Kv8
zzAr1M_zo$4TNJDK9&gO(?W@KpjX_3f_T7obOxJ9%{VTXHjh0Pe%{f1)wEv)vj4t33
z>zkJGGv^pR?X{^-+R33eX(tx%m@EvY$o+2KyEIDeohtr>Px`}2_Tk=;n=8)k`yKBA
z1m#owv=GBFT(7_8;ouCd3E?FFSuJaO8c0^Yum8a><?I7{;SJ;RY^6-}MjZD{meQ+@
zYxYex^%pu%H6?k^_nd(38u$k_o?ldY;$C^q#qQeZJ!m^54}2#Q_DpB8MUp0LJ!6>D
zE}i`lfsL4%N)bw?P~#~j3kA={_Ho-+l3w=G^Ic1&P$di8wj5d8<IpcD;EU9<7D{S{
zJ&oX3<615h7qb;M0%`L>!sB9_l{OjpJ>Mq;Z?4#Sa)2{)w?l<Y+07_))9Wrx<=y3@
zw&M<<V8rVlQ_LKvFx_}Ub8=el7)zIbV=8M~`$bpCHjI;RtHbx#;P&erZMCaOeqz_J
z2JMPriFJs-1U2Y~+`qg3RP!z?al@;+VptqsA+Urlp^O(x0FVtB``{`4#*B+aK7#a-
z;vY{2rcn|qS9$SKi#4o4mK$iF9&S&(iNL}%w@vx9Z(?A>yfp+Y!L8*99G4B5FjdbF
z@my0J4_8EX1W4b&Vv{o}i`{b!Z}(!d(;d)P3)nXl3e;5add%^#E`ZTWY_Ou~H;&fq
zN5*G?`Fer~hAN{yq@h4krB~*YJ1!~4C-xC+AwDdEhFeI5ryuAy$Y&vF=recK<|p0Y
zchMP?+CO_<c0|{f)jxyqoa`mcII-&rw7F2nT=Si=;cR`(Z<)9tNVF}TVvD1f__9-+
z6j?LszkKb7(x3KRnt@PZ-AT97_*?-0U3Ukk)q9FwAP&^f(NvET#*xVV`fBHget(ai
ziZZMxmx8Y)^{#jL>i<`n;Lj`rGxnFd%>AV<GyM&j;GdhI{<1qz$<`iO5YaES?Hf;t
zsJ5*yB}Gg3+FQOpb|x8Iv1C%z+<bFpW;(5n!K2+zW5zvwP5xC86#oGJaSrc%RRYT%
zgeGS08J;ic?c?S3kvqWLBT)?5diaorM`obE2o)@prBI{k*J?kwu#q-Zg*)bQ)HU~?
z1a4tk-jS4Vx;P{F>Ud=p-zvCT=8=S}x2<6S(bOp(d=K)X4x{qConxIn=+Chp1hPCG
z(Py=|@#DFudRe&M`jkkEynL9D(Ck}kGw$z5Ykca&WgkB+9j5f*JIEhT4NfDr2zHD5
z+q=bld~3U4VHSFn#2ooAjY_QnNB-J4kN$1y>HBNBHQNQhRg~ROR|W#4T1%f<+RXl3
z7m6AmD8b|b>QJTAV*MFCJJ)c)X&cC8cc|}Mn=M&vi{shMmV^=v4<bR>q$oDZ4aS60
z3V!p?^vAP1$X7lW(xxnsl`~tohR)<*Jm^gIS}-2c6`5#ULfS%~hn!Isuqc{aN;2E$
z@NB!|JZ=Spf3&Kfqk8^+OJdMRovemL(kVI(*{8@+>rmo_;D!Na)BFY$oA85Lhn$`r
zum_GVj18l?NkVn7)sI||Hr7VYuDMBe1OJLt9THS!60@t!1xKdGCw@o+sxz8H0AMfJ
zOsC*WFm8|XMhT+rPuDrdDXg2&M|_KZ;Y7~+qd?mKu+eA1gpvt)9rgr_ema?GZkW?Z
znMrFut^Ze~eSz!$HhB2j@&V*vYHsN4>ipljF#K0L*#Bbp|7!?={x7zFuQ9|wdRP3P
zJNd^I{?qH<G>C}1%I);{a%lKEX#OY1e`yp!=U`%E8_f+5!UzlYmfcUj`mI<`aqb74
z%sv>*2>dI7Xn0pr4Mo1^(BuLIStZst!TRQ6M)smuQod|z)M^ophP{d+2b>c+P<n`Z
zM#%?*js{BTMKz5&We48Na|wB51Po@v#j%ai!@f3$A_sj-<D;Hmb&FB&`)zOk(q)65
z+`78%>&`KN|G5Y3|M}?u@&wNRcm&<p2IzhUM8Gr8P(nwux1ho)O><|&K*pwmOqPcv
zfWX&%K?SvQ^Vg31&IhyHAs!WoX_=RlCs(#|jlK}B{B{DgLqHo)yt9}ojFRa0%SFQ<
ze1%xT*oB1jRVhC)F9n*&rcHJwNRJYA8fnU^sLj0|rVTpD)Qf??&k;;OXM)oN8()ME
zNcaupC96gI*ZqmSZ&P0xSSGQ@Xn-U$)g0)xpj6<YC15Vyh;2XFK=^;d5AEv>*nGjy
z*i+%3AXGNCFts)OpRxE4r2Jc_{~s`k|0l=)#M{4iSNUH9>VoGN&;Tw@yAeiUNwQCE
z4#n&T>YfjbaBHGX!4mt$$+;5@`41@ww0{4`b?D9)_8l`VW^dUg*)Du;iC|PqvV!mb
z-50VP9~T{dy!&{yd1y}|7n|j6!@R(ruuoQ<%r8Q17#v?aeigA=f8l9*y2F|tiPFW+
zbJ*H0oOpifPIbs7)dz=`X-rvHwpoj5O~6c!HLV|xExIS1^<7^yzQgj|%-HGYzn@#M
z;LWLnmv1~&xTrR9&GBFAlRxyi{oStbecW^R@)y5EL%*HV;5hy^Mpyf~QQ^N5r5%;5
zX3YFNM}3PXXQA&tn|k28!%ARy>;;AkX!9>|p^FwSz)Vq`ytaQY-ys8@mhX11Q+B>$
zHVF68*`UNWc~`<jHAVZWi&nk&yUZ1+>8hh6_wL;M^Uen)zF$;Gw?3xJl`9>m7U;OW
z!?ePJZNihsnjU|4oNI_saw-oMU!Li3bXBF^%xM8;daUPiX0(`=e0z4|*S^owuDsRF
ze(b3|;p8W?2TS@=-ITWbuVMbCVf-L8yK}*hZ?9y2UrMZYz7uGZmE#%s>f3bornA-m
zpKb6q+y^WP7@0(vai8-83<V$%V0h~YqT#3hpld@v^#`OF1eP>{mJ-6XA)Nt)ZURU(
zu*m@iz-9*&fGjQpvO(DipAOK$KM(-YF$>5<Ew}OM1swwf0U*66fJ{`qzzl@09sN)s
zm;p-~e`3>)mO_w@2tqd){Y(ag!I#;wxC_l-)Ds%e%|_qajxhTLF#a+8jAl0Ojqd37
zpl>Ng*mHxIustXnjM0ro-yVuEI$eMWqp?jFqg#Q#Qw(9ne&BIdghC0rZw%c~^tH7J
zLm$Z#VJK|%2D-86D;p8U?p7jfEY?#C(5*mUv4*h1O_i_}#YEYGzL*7JM?bLiAQa@d
z0tbCH0m9T2U^9)7sl_V9Bo_4fRD>PJ^a<MmnO#K>arDj?!pyh!gw4e4ilLhiG7nbJ
zf|}eA04n;zoe<4le7ZmlaD=XUBwe6T#itY097pI}jiwXPX2E9$sL6#e;~`EnuoT<q
zO?8+-OB$t|5nd;hG(fFhbO)k0p%EtEA;DyDgBsmvkWoxf^FVD>2mr;`H5VLB6nsXY
z1|PR84kHM~B&hw0aES{(V~C4ZP-_-pU^{UJVu@q)wk^!mC5>l^HWRJt14T=5GJ2q*
lHx>~VJ$1tqsEE)2Iv&j;s6DJ~AhYCva1tj2!!vgf4*(!CVJ83p

literal 0
HcmV?d00001

diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java
index 9feb9c8e5640f..71e8c79a2275a 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java
@@ -5,6 +5,9 @@
 import io.swagger.v3.oas.annotations.info.Info;
 import io.swagger.v3.oas.annotations.servers.Server;
 import java.util.List;
+
+import org.springdoc.core.GroupedOpenApi;
+import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
 import org.springframework.format.FormatterRegistry;
 import org.springframework.http.converter.ByteArrayHttpMessageConverter;
@@ -34,4 +37,26 @@ public void configureMessageConverters(List<HttpMessageConverter<?>> messageConv
   public void addFormatters(FormatterRegistry registry) {
     registry.addConverter(new StringToChangeCategoryConverter());
   }
+
+  @Bean
+  public GroupedOpenApi defaultOpenApiGroup() {
+    return GroupedOpenApi.builder()
+            .group("default")
+            .packagesToExclude(
+                    "io.datahubproject.openapi.operations",
+                    "com.datahub.health",
+                    "io.datahubproject.openapi.health"
+            ).build();
+  }
+
+  @Bean
+  public GroupedOpenApi operationsOpenApiGroup() {
+    return GroupedOpenApi.builder()
+            .group("operations")
+            .packagesToScan(
+                    "io.datahubproject.openapi.operations",
+                    "com.datahub.health",
+                    "io.datahubproject.openapi.health"
+            ).build();
+  }
 }
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/HealthController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/health/HealthController.java
similarity index 94%
rename from metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/HealthController.java
rename to metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/health/HealthController.java
index 250e9f6f71242..2e243f4c8df9e 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/HealthController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/health/HealthController.java
@@ -1,4 +1,4 @@
-package io.datahubproject.openapi;
+package io.datahubproject.openapi.health;
 
 import io.swagger.v3.oas.annotations.tags.Tag;
 import lombok.RequiredArgsConstructor;
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
index 68a8c8ca49235..2b3e84e2df20f 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
@@ -48,6 +48,7 @@
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 import java.util.stream.Stream;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
@@ -91,10 +92,11 @@ private MappingUtil() {
 
   private static final String DISCRIMINATOR = "__type";
   private static final String PEGASUS_PACKAGE = "com.linkedin";
+  private static final String OPENAPI_PACKAGE = "io.datahubproject.openapi.generated";
   private static final ReflectionCache REFLECT_AVRO = ReflectionCache.builder()
           .basePackage("com.linkedin.pegasus2avro").build();
   private static final ReflectionCache REFLECT_OPENAPI = ReflectionCache.builder()
-          .basePackage("io.datahubproject.openapi.generated").build();
+          .basePackage(OPENAPI_PACKAGE).build();
 
   static {
     // Build a map from __type name to generated class
@@ -143,49 +145,108 @@ public static EnvelopedAspect mapEnvelopedAspect(com.linkedin.entity.EnvelopedAs
   }
 
   private static DataMap insertDiscriminator(@Nullable Class<?> parentClazz, DataMap dataMap) {
-    if (REFLECT_OPENAPI.lookupMethod(parentClazz, "get__type") != null) {
+    if (parentClazz != null && REFLECT_OPENAPI.lookupMethod(parentClazz, "get__type") != null) {
       dataMap.put(DISCRIMINATOR, parentClazz.getSimpleName());
     }
 
     Set<Map.Entry<String, DataMap>> requiresDiscriminator = dataMap.entrySet().stream()
             .filter(e -> e.getValue() instanceof DataMap)
-            .filter(e -> e.getKey().startsWith(PEGASUS_PACKAGE + "."))
+            .filter(e -> shouldCollapseClassToDiscriminator(e.getKey()))
             .map(e -> Map.entry(e.getKey(), (DataMap) e.getValue()))
             .collect(Collectors.toSet());
+    // DataMap doesn't support concurrent access
     requiresDiscriminator.forEach(e -> {
       dataMap.remove(e.getKey());
-      dataMap.put(DISCRIMINATOR, e.getKey().substring(e.getKey().lastIndexOf('.') + 1));
+      dataMap.put(DISCRIMINATOR, e.getKey().substring(e.getKey().lastIndexOf(".") + 1));
       dataMap.putAll(e.getValue());
     });
 
-    Set<Pair<String, DataMap>> recurse = dataMap.entrySet().stream()
-            .filter(e -> e.getValue() instanceof DataMap || e.getValue() instanceof DataList)
-            .flatMap(e -> {
-              if (e.getValue() instanceof DataList) {
-                return ((DataList) e.getValue()).stream()
-                        .filter(item -> item instanceof DataMap)
-                        .map(item -> Pair.of((String) null, (DataMap) item));
-              } else {
-                return Stream.of(Pair.of(e.getKey(), (DataMap) e.getValue()));
+    // Look through all the nested classes for possible discriminator requirements
+    Set<Pair<List<String>, DataMap>> nestedDataMaps = getDataMapPaths(new LinkedList<>(), dataMap).collect(Collectors.toSet());
+    // DataMap doesn't support concurrent access
+    for (Pair<List<String>, DataMap> nestedDataMapPath : nestedDataMaps) {
+      List<String> nestedPath = nestedDataMapPath.getFirst();
+      DataMap nested = nestedDataMapPath.getSecond();
+      Class<?> nextClazz = parentClazz;
+
+      if (nextClazz != null) {
+        // reconstruct type path from method path
+        for (String pathElem : nestedPath) {
+          // if not list element
+          if (!pathElem.startsWith("[") && !pathElem.contains(".")) {
+            String methodName = "get" + toUpperFirst(pathElem);
+            Method getMethod = REFLECT_OPENAPI.lookupMethod(nextClazz, methodName);
+            nextClazz = getMethod != null ? getMethod.getReturnType() : null;
+
+            if (nextClazz != null && "List".equals(nextClazz.getSimpleName())) {
+              String listElemClassName = getMethod.getGenericReturnType().getTypeName()
+                      .replace("java.util.List<", "")
+                      .replace(">", "");
+              try {
+                nextClazz = Class.forName(listElemClassName);
+              } catch (ClassNotFoundException ex) {
+                log.warn("Class lookup failed for {}", listElemClassName);
+                nextClazz = null;
               }
-            }).collect(Collectors.toSet());
-
-    recurse.forEach(e -> {
-      if (e.getKey() != null) {
-        Class<?> getterClazz = null;
-        if (parentClazz != null) {
-          Method getMethod = REFLECT_OPENAPI.lookupMethod(parentClazz, "get" + toUpperFirst(e.getKey()));
-          getterClazz = getMethod.getReturnType();
+            }
+          }
+        }
+
+        if ((nextClazz != parentClazz && shouldCheckTypeMethod(nextClazz))
+                || nested.keySet().stream().anyMatch(MappingUtil::shouldCollapseClassToDiscriminator)) {
+          insertDiscriminator(nextClazz, nested);
         }
-        insertDiscriminator(getterClazz, e.getValue());
-      } else {
-        insertDiscriminator(null, e.getValue());
       }
-    });
+    }
 
     return dataMap;
   }
 
+
+  /**
+   * Stream paths to DataMaps
+   * @param paths current path
+   * @param data current DataMap or DataList
+   * @return path to all nested DataMaps
+   */
+  private static Stream<Pair<List<String>, DataMap>> getDataMapPaths(List<String> paths, Object data) {
+    if (data instanceof DataMap) {
+      return ((DataMap) data).entrySet().stream()
+              .filter(e -> e.getValue() instanceof DataMap || e.getValue() instanceof DataList)
+              .flatMap(entry -> {
+                List<String> thisPath = new LinkedList<>(paths);
+                thisPath.add(entry.getKey());
+                if (entry.getValue() instanceof DataMap) {
+                  return Stream.concat(
+                          Stream.of(Pair.of(thisPath, (DataMap) entry.getValue())),
+                          getDataMapPaths(thisPath, entry.getValue())
+                  );
+                } else {
+                  // DataList
+                  return getDataMapPaths(thisPath, entry.getValue());
+                }
+              });
+    } else if (data instanceof DataList) {
+      DataList dataList = (DataList) data;
+      return IntStream.range(0, dataList.size())
+              .mapToObj(idx -> Pair.of(idx, dataList.get(idx)))
+              .filter(idxObject -> idxObject.getValue() instanceof DataMap || idxObject.getValue() instanceof DataList)
+              .flatMap(idxObject -> {
+                Object item = idxObject.getValue();
+                List<String> thisPath = new LinkedList<>(paths);
+                thisPath.add("[" + idxObject.getKey() + "]");
+                if (item instanceof DataMap) {
+                  return Stream.concat(Stream.of(Pair.of(thisPath, (DataMap) item)),
+                          getDataMapPaths(thisPath, item));
+                } else {
+                  // DataList
+                  return getDataMapPaths(thisPath, item);
+                }
+              });
+    }
+    return Stream.empty();
+  }
+
   public static OneOfEnvelopedAspectValue mapAspectValue(String aspectName, Aspect aspect, ObjectMapper objectMapper) {
     Class<? extends OneOfEnvelopedAspectValue> aspectClass = ENVELOPED_ASPECT_TYPE_MAP.get(aspectName);
     DataMap wrapper = insertDiscriminator(aspectClass, aspect.data());
@@ -227,6 +288,14 @@ private static String getAspectName(Class<?> cls) {
     return new String(c);
   }
 
+  private static boolean shouldCheckTypeMethod(@Nullable Class<?> parentClazz) {
+    return Optional.ofNullable(parentClazz).map(cls -> cls.getName().startsWith(OPENAPI_PACKAGE + ".")).orElse(false);
+  }
+
+  private static boolean shouldCollapseClassToDiscriminator(String className) {
+    return className.startsWith(PEGASUS_PACKAGE + ".");
+  }
+
   private static Optional<String> shouldDiscriminate(String parentShortClass, String fieldName, ObjectNode node) {
     try {
       if (parentShortClass != null) {
diff --git a/metadata-service/war/src/main/webapp/WEB-INF/healthServlet-servlet.xml b/metadata-service/war/src/main/webapp/WEB-INF/healthServlet-servlet.xml
deleted file mode 100644
index 11af7d000bddf..0000000000000
--- a/metadata-service/war/src/main/webapp/WEB-INF/healthServlet-servlet.xml
+++ /dev/null
@@ -1,14 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-       xmlns:context="http://www.springframework.org/schema/context"
-       xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.0.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.0.xsd">
-  <context:component-scan base-package="com.datahub.health" />
-  <context:component-scan base-package="org.springdoc.webmvc.ui,org.springdoc.core,org.springdoc.webmvc.core,org.springframework.boot.autoconfigure.jackson"/>
-
-
-  <bean id="yamlProperties" class="org.springframework.beans.factory.config.YamlPropertiesFactoryBean">
-    <property name="resources" value="classpath:/application.yml"/>
-  </bean>
-
-  <context:property-placeholder properties-ref="yamlProperties"/>
-</beans>
diff --git a/metadata-service/war/src/main/webapp/WEB-INF/openapiServlet-servlet.xml b/metadata-service/war/src/main/webapp/WEB-INF/openapiServlet-servlet.xml
index 7c990cee8f65b..3077cfb062638 100644
--- a/metadata-service/war/src/main/webapp/WEB-INF/openapiServlet-servlet.xml
+++ b/metadata-service/war/src/main/webapp/WEB-INF/openapiServlet-servlet.xml
@@ -2,9 +2,9 @@
 <beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xmlns:context="http://www.springframework.org/schema/context"
        xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.0.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.0.xsd">
-<context:component-scan base-package="io.datahubproject.openapi" />
-<context:component-scan base-package="org.springdoc.webmvc.ui,org.springdoc.core,org.springdoc.webmvc.core,org.springframework.boot.autoconfigure.jackson"/>
 
+  <context:component-scan base-package="io.datahubproject.openapi,com.datahub.health"/>
+  <context:component-scan base-package="org.springdoc.webmvc.ui,org.springdoc.core,org.springdoc.webmvc.core,org.springframework.boot.autoconfigure.jackson"/>
 
 <bean id="yamlProperties" class="org.springframework.beans.factory.config.YamlPropertiesFactoryBean">
   <property name="resources" value="classpath:/application.yml"/>
diff --git a/metadata-service/war/src/main/webapp/WEB-INF/web.xml b/metadata-service/war/src/main/webapp/WEB-INF/web.xml
index f210061a0bb27..c1239ed4b7ed4 100644
--- a/metadata-service/war/src/main/webapp/WEB-INF/web.xml
+++ b/metadata-service/war/src/main/webapp/WEB-INF/web.xml
@@ -54,12 +54,6 @@
     <load-on-startup>1</load-on-startup>
     <async-supported>true</async-supported>
   </servlet>
-  <servlet>
-    <servlet-name>healthServlet</servlet-name>
-    <servlet-class>org.springframework.web.servlet.DispatcherServlet</servlet-class>
-    <load-on-startup>1</load-on-startup>
-    <async-supported>true</async-supported>
-  </servlet>
   <servlet>
     <servlet-name>openapiServlet</servlet-name>
     <servlet-class>org.springframework.web.servlet.DispatcherServlet</servlet-class>
@@ -95,7 +89,7 @@
     <url-pattern>/health</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>healthServlet</servlet-name>
+    <servlet-name>openapiServlet</servlet-name>
     <url-pattern>/health/*</url-pattern>
   </servlet-mapping>
   <servlet-mapping>

From c779b92bd0ca0be3d2a0984ecd035d9bf2ea5d92 Mon Sep 17 00:00:00 2001
From: Lucas Phan <lucas123phan@gmail.com>
Date: Mon, 2 Oct 2023 11:58:58 -0700
Subject: [PATCH 11/25] fix(data-product): show data product card on home page
 (#8924)

---
 .../src/app/entity/dataProduct/DataProductEntity.tsx            | 2 +-
 datahub-web-react/src/app/home/HomePageRecommendations.tsx      | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/datahub-web-react/src/app/entity/dataProduct/DataProductEntity.tsx b/datahub-web-react/src/app/entity/dataProduct/DataProductEntity.tsx
index c3f1273681c19..620d42943a74a 100644
--- a/datahub-web-react/src/app/entity/dataProduct/DataProductEntity.tsx
+++ b/datahub-web-react/src/app/entity/dataProduct/DataProductEntity.tsx
@@ -51,7 +51,7 @@ export class DataProductEntity implements Entity<DataProduct> {
 
     isSearchEnabled = () => true;
 
-    isBrowseEnabled = () => false;
+    isBrowseEnabled = () => true;
 
     isLineageEnabled = () => false;
 
diff --git a/datahub-web-react/src/app/home/HomePageRecommendations.tsx b/datahub-web-react/src/app/home/HomePageRecommendations.tsx
index 39d76bf98f28a..6ce7735c4a7c8 100644
--- a/datahub-web-react/src/app/home/HomePageRecommendations.tsx
+++ b/datahub-web-react/src/app/home/HomePageRecommendations.tsx
@@ -95,6 +95,7 @@ const simpleViewEntityTypes = [
     EntityType.Dashboard,
     EntityType.GlossaryNode,
     EntityType.GlossaryTerm,
+    EntityType.DataProduct,
 ];
 
 export const HomePageRecommendations = ({ user }: Props) => {

From 6fe9d6faa55c4fe770b00390e6b64bf7ccb10fd7 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Mon, 2 Oct 2023 16:58:31 -0400
Subject: [PATCH 12/25] fix(graphql): support additional types in
 scrollAcrossEntities (#8891)

---
 .../linkedin/datahub/graphql/resolvers/EntityTypeMapper.java    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/EntityTypeMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/EntityTypeMapper.java
index 3682b2282544e..b0f23e63177e6 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/EntityTypeMapper.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/EntityTypeMapper.java
@@ -17,6 +17,7 @@ public class EntityTypeMapper {
       ImmutableMap.<EntityType, String>builder()
           .put(EntityType.DATASET, "dataset")
           .put(EntityType.ROLE, "role")
+          .put(EntityType.ASSERTION, Constants.ASSERTION_ENTITY_NAME)
           .put(EntityType.CORP_USER, "corpuser")
           .put(EntityType.CORP_GROUP, "corpGroup")
           .put(EntityType.DATA_PLATFORM, "dataPlatform")
@@ -25,6 +26,7 @@ public class EntityTypeMapper {
           .put(EntityType.TAG, "tag")
           .put(EntityType.DATA_FLOW, "dataFlow")
           .put(EntityType.DATA_JOB, "dataJob")
+          .put(EntityType.DATA_PROCESS_INSTANCE, Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME)
           .put(EntityType.GLOSSARY_TERM, "glossaryTerm")
           .put(EntityType.GLOSSARY_NODE, "glossaryNode")
           .put(EntityType.MLMODEL, "mlModel")

From acaf950b9e5e2118310a35940e98b0799a8a9bcd Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Mon, 2 Oct 2023 16:59:18 -0400
Subject: [PATCH 13/25] docs: update cta links for acryl (#8908)

---
 .../src/app/entity/shared/components/styled/DemoButton.tsx  | 2 +-
 datahub-web-react/src/app/home/AcrylDemoBanner.tsx          | 2 +-
 docs-website/docusaurus.config.js                           | 2 +-
 docs-website/src/pages/_components/CardCTAs/index.js        | 6 +++---
 docs-website/src/pages/_components/Section/index.js         | 2 +-
 docs/saas.md                                                | 4 ++--
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/datahub-web-react/src/app/entity/shared/components/styled/DemoButton.tsx b/datahub-web-react/src/app/entity/shared/components/styled/DemoButton.tsx
index 1ed182fa01975..b7b974ef6e2ea 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/DemoButton.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/DemoButton.tsx
@@ -12,7 +12,7 @@ export default function DemoButton() {
     return (
         <StyledButton
             type="primary"
-            href="https://www.acryldata.io/datahub-sign-up"
+            href="https://www.acryldata.io/datahub-sign-up?utm_source=datahub&utm_medium=referral&utm_campaign=acryl_signup"
             target="_blank"
             rel="noopener noreferrer"
         >
diff --git a/datahub-web-react/src/app/home/AcrylDemoBanner.tsx b/datahub-web-react/src/app/home/AcrylDemoBanner.tsx
index 0a6316a71db16..0a85c0c3d7f6c 100644
--- a/datahub-web-react/src/app/home/AcrylDemoBanner.tsx
+++ b/datahub-web-react/src/app/home/AcrylDemoBanner.tsx
@@ -46,7 +46,7 @@ export default function AcrylDemoBanner() {
                 <TextContent>
                     DataHub is already the industry&apos;s #1 Open Source Data Catalog.{' '}
                     <StyledLink
-                        href="https://www.acryldata.io/datahub-sign-up"
+                        href="https://www.acryldata.io/datahub-sign-up?utm_source=datahub&utm_medium=referral&utm_campaign=acryl_signup"
                         target="_blank"
                         rel="noopener noreferrer"
                     >
diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js
index c1ecf0283cf63..68ea1ebffa6c9 100644
--- a/docs-website/docusaurus.config.js
+++ b/docs-website/docusaurus.config.js
@@ -23,7 +23,7 @@ module.exports = {
       announcementBar: {
         id: "announcement",
         content:
-          '<div><img src="/img/acryl-logo-white-mark.svg" /><p><strong>Managed DataHub</strong><span> &nbsp;Acryl Data delivers an easy to consume DataHub platform for the enterprise</span></p></div> <a href="https://www.acryldata.io/datahub-sign-up" target="_blank" class="button button--primary">Sign up for Managed DataHub&nbsp;→</a>',
+          '<div><img src="/img/acryl-logo-white-mark.svg" /><p><strong>Managed DataHub</strong><span> &nbsp;Acryl Data delivers an easy to consume DataHub platform for the enterprise</span></p></div> <a href="https://www.acryldata.io/datahub-sign-up?utm_source=datahub&utm_medium=referral&utm_campaign=acryl_signup" target="_blank" class="button button--primary">Sign up for Managed DataHub&nbsp;→</a>',
         backgroundColor: "#070707",
         textColor: "#ffffff",
         isCloseable: false,
diff --git a/docs-website/src/pages/_components/CardCTAs/index.js b/docs-website/src/pages/_components/CardCTAs/index.js
index d87c803b42818..b173101de66f5 100644
--- a/docs-website/src/pages/_components/CardCTAs/index.js
+++ b/docs-website/src/pages/_components/CardCTAs/index.js
@@ -8,17 +8,17 @@ const cardsContent = [
   {
     label: "Data Mesh",
     title: "Data Products, Delivered",
-    url: "https://www.acryldata.io/blog/data-products-in-datahub-everything-you-need-to-know",
+    url: "https://www.acryldata.io/blog/data-products-in-datahub-everything-you-need-to-know?utm_source=datahub&utm_medium=referral&utm_content=blog",
   },
   {
     label: "Data Contracts",
     title: "End-to-end Reliability in Data",
-    url: "https://www.acryldata.io/blog/data-contracts-in-datahub-combining-verifiability-with-holistic-data-management",
+    url: "https://www.acryldata.io/blog/data-contracts-in-datahub-combining-verifiability-with-holistic-data-management?utm_source=datahub&utm_medium=referral&utm_content=blog",
   },
   {
     label: "Shift Left",
     title: "Developer-friendly Data Governance",
-    url: "https://www.acryldata.io/blog/the-3-must-haves-of-metadata-management-part-2",
+    url: "https://www.acryldata.io/blog/the-3-must-haves-of-metadata-management-part-2?utm_source=datahub&utm_medium=referral&utm_content=blog",
   },
 ];
 
diff --git a/docs-website/src/pages/_components/Section/index.js b/docs-website/src/pages/_components/Section/index.js
index b7e33bad162f9..8fb8dc06937cc 100644
--- a/docs-website/src/pages/_components/Section/index.js
+++ b/docs-website/src/pages/_components/Section/index.js
@@ -18,7 +18,7 @@ const PromoSection = () => (
       <img src={useBaseUrl("/img/acryl-logo-white-mark.svg")} />
       <h2>Managed DataHub</h2>
       <p>Acryl Data delivers an easy to consume DataHub platform for the enterprise</p>
-      <a href="https://www.acryldata.io/datahub-beta" target="_blank" className="button button--primary button--lg">
+      <a href="https://www.acryldata.io/datahub-sign-up?utm_source=datahub&utm_medium=referral&utm_campaign=acryl_signup" target="_blank" className="button button--primary button--lg">
         Sign up for Managed DataHub →
       </a>
     </div>
diff --git a/docs/saas.md b/docs/saas.md
index 35dde5b1ca9a9..de57b5617e062 100644
--- a/docs/saas.md
+++ b/docs/saas.md
@@ -5,10 +5,10 @@ Sign up for fully managed, hassle-free and secure SaaS service for DataHub, prov
 <p>
 <a
     className="button button--primary button--lg"
-    href="https://www.acryldata.io/datahub-beta" 
+    href="https://www.acryldata.io/datahub-sign-up?utm_source=datahub&utm_medium=referral&utm_campaign=acryl_signup"
     target="_blank" >
     Sign up
 </a>
 </p>
 
-Refer to [Managed Datahub Exclusives](/docs/managed-datahub/managed-datahub-overview.md) for more information. 
\ No newline at end of file
+Refer to [Managed Datahub Exclusives](/docs/managed-datahub/managed-datahub-overview.md) for more information.

From 790011d40b9fe97373730e875e00237bd2d97904 Mon Sep 17 00:00:00 2001
From: Pedro Silva <pedro@acryl.io>
Date: Tue, 3 Oct 2023 04:04:55 +0100
Subject: [PATCH 14/25] feat(docs): Corrects release version for custom
 ownership types. (#8847)

---
 docs/ownership/ownership-types.md             |  2 +-
 .../examples/ownership/ownership_type.json    | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/docs/ownership/ownership-types.md b/docs/ownership/ownership-types.md
index f1b951871a5a2..243f638a324ad 100644
--- a/docs/ownership/ownership-types.md
+++ b/docs/ownership/ownership-types.md
@@ -7,7 +7,7 @@ import TabItem from '@theme/TabItem';
 <FeatureAvailability/>
 
 **🤝 Version compatibility**
-> Open Source DataHub: **0.10.3** | Acryl: **0.2.8**
+> Open Source DataHub: **0.10.4** | Acryl: **0.2.8**
 
 ## What are Custom Ownership Types?
 Custom Ownership Types are an improvement on the way to establish ownership relationships between users and the data assets they manage within DataHub.
diff --git a/metadata-ingestion/examples/ownership/ownership_type.json b/metadata-ingestion/examples/ownership/ownership_type.json
index 5f1d3019d2a77..4a194c78a3b72 100644
--- a/metadata-ingestion/examples/ownership/ownership_type.json
+++ b/metadata-ingestion/examples/ownership/ownership_type.json
@@ -1,7 +1,14 @@
-{
-  "urn": "urn:li:ownershipType:architect",
-  "info": {
-    "name": "Architect",
-    "description": "Technical person responsible for the asset"
+[
+  {
+    "auditHeader":null,
+    "entityType":"ownershipType",
+    "entityUrn": "urn:li:ownershipType:architect",
+    "changeType":"UPSERT",
+    "aspectName":"ownershipTypeInfo",
+    "aspect":{
+      "value":"{\"name\": \"Architect\", \"description\": \"Technical person responsible for the asset\", \"created\": {\"time\": 1674291843000,  \"actor\": \"urn:li:corpuser:jdoe\",  \"impersonator\": null},\n\"lastModified\": {\"time\": 1674291843000,  \"actor\": \"urn:li:corpuser:jdoe\",  \"impersonator\": null}}",
+      "contentType":"application/json"
+    },
+    "systemMetadata":null
   }
-}
\ No newline at end of file
+]
\ No newline at end of file

From 2f0616ea5b2c1927107a4726773c907a59a0483f Mon Sep 17 00:00:00 2001
From: Erik McKelvey <Erik.McKelvey.is@gmail.com>
Date: Mon, 2 Oct 2023 20:05:29 -0700
Subject: [PATCH 15/25] docs: fix typo in impact-analysis.md (#8915)

---
 docs/act-on-metadata/impact-analysis.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/act-on-metadata/impact-analysis.md b/docs/act-on-metadata/impact-analysis.md
index 2c10e571cf911..9728a480efe32 100644
--- a/docs/act-on-metadata/impact-analysis.md
+++ b/docs/act-on-metadata/impact-analysis.md
@@ -38,7 +38,7 @@ Follow these simple steps to understand the full dependency chain of your data e
   <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/impact-analysis-filter-dependencies.png"/>
 </p>
 
-4. Slice and dice the result list by Entity Type, Platfrom, Owner, and more to isolate the relevant dependencies
+4. Slice and dice the result list by Entity Type, Platform, Owner, and more to isolate the relevant dependencies
 
 <p align="center">
   <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/impact-analysis-apply-filters.png"/>

From 83a7dad20e7420b7283db22a2964d05ee3c42a7d Mon Sep 17 00:00:00 2001
From: Lucas Phan <lucas123phan@gmail.com>
Date: Tue, 3 Oct 2023 10:05:11 -0700
Subject: [PATCH 16/25] =?UTF-8?q?feat(chrom-ext-editable):=20set=20readOnl?=
 =?UTF-8?q?y=20to=20false=20so=20that=20side=20navigati=E2=80=A6=20(#8930)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/app/entity/shared/embed/EmbeddedProfile.tsx  | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/datahub-web-react/src/app/entity/shared/embed/EmbeddedProfile.tsx b/datahub-web-react/src/app/entity/shared/embed/EmbeddedProfile.tsx
index 31a736e30bdc0..df928fc408de6 100644
--- a/datahub-web-react/src/app/entity/shared/embed/EmbeddedProfile.tsx
+++ b/datahub-web-react/src/app/entity/shared/embed/EmbeddedProfile.tsx
@@ -55,6 +55,8 @@ export default function EmbeddedProfile<T>({ urn, entityType, getOverridePropert
         return <NonExistentEntityPage />;
     }
 
+    const readOnly = false;
+
     return (
         <EntityContext.Provider
             value={{
@@ -80,15 +82,15 @@ export default function EmbeddedProfile<T>({ urn, entityType, getOverridePropert
                     <StyledDivider />
                     <UpstreamHealth />
                     <StyledDivider />
-                    <SidebarAboutSection readOnly />
+                    <SidebarAboutSection readOnly={readOnly} />
                     <StyledDivider />
-                    <SidebarOwnerSection readOnly />
+                    <SidebarOwnerSection readOnly={readOnly} />
                     <StyledDivider />
-                    <SidebarTagsSection readOnly properties={{ hasTags: true, hasTerms: true }} />
+                    <SidebarTagsSection readOnly={readOnly} properties={{ hasTags: true, hasTerms: true }} />
                     <StyledDivider />
-                    <SidebarDomainSection readOnly />
+                    <SidebarDomainSection readOnly={readOnly} />
                     <StyledDivider />
-                    <DataProductSection readOnly />
+                    <DataProductSection readOnly={readOnly} />
                 </>
             )}
         </EntityContext.Provider>

From 0a5e7d176e103c14f36cd00cd1b930c5da55e1ea Mon Sep 17 00:00:00 2001
From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com>
Date: Tue, 3 Oct 2023 11:53:05 -0700
Subject: [PATCH 17/25] fix(client): use value for RelationshipDirection
 (#8912)

---
 metadata-ingestion/src/datahub/ingestion/graph/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index e22d48d0af80a..673ada4f73051 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -805,7 +805,7 @@ def get_related_entities(
                 url=relationship_endpoint,
                 params={
                     "urn": entity_urn,
-                    "direction": direction,
+                    "direction": direction.value,
                     "relationshipTypes": relationship_types,
                     "start": start,
                 },

From 555f92a047696fc99b8fe599285e21774a09e381 Mon Sep 17 00:00:00 2001
From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com>
Date: Tue, 3 Oct 2023 14:49:24 -0700
Subject: [PATCH 18/25] fix(fine-grained lineage) CLL for datajob downstreams
 (#8937)

---
 .../utils/__tests__/columnLineageUtils.test.tsx  | 14 +++++++++++++-
 .../src/app/lineage/utils/columnLineageUtils.ts  | 16 +++++++++++++---
 .../src/app/lineage/utils/extendAsyncEntities.ts | 12 ++++++++++++
 3 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx b/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx
index cd0a5f1385858..c11d8fe90cfa9 100644
--- a/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx
+++ b/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx
@@ -88,7 +88,7 @@ describe('encodeSchemaField', () => {
 });
 
 describe('getPopulatedColumnsByUrn', () => {
-    it('should update columns by urn with data job fine grained data so that the data job appears to have the upstream columns', () => {
+    it('should update columns by urn with data job fine grained data so that the data job appears to have the upstream and downstream columns', () => {
         const dataJobWithCLL = {
             ...dataJob1,
             name: '',
@@ -116,12 +116,24 @@ describe('getPopulatedColumnsByUrn', () => {
                     recursive: false,
                     type: SchemaFieldDataType.String,
                 },
+                {
+                    fieldPath: 'test2',
+                    nullable: false,
+                    recursive: false,
+                    type: SchemaFieldDataType.String,
+                },
                 {
                     fieldPath: 'test3',
                     nullable: false,
                     recursive: false,
                     type: SchemaFieldDataType.String,
                 },
+                {
+                    fieldPath: 'test4',
+                    nullable: false,
+                    recursive: false,
+                    type: SchemaFieldDataType.String,
+                },
             ],
         });
     });
diff --git a/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts b/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
index 4dd54ea25416d..60b1698444168 100644
--- a/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
+++ b/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
@@ -88,9 +88,9 @@ export function getPopulatedColumnsByUrn(
                 ),
             };
         } else if (fetchedEntity.type === EntityType.DataJob && fetchedEntity.fineGrainedLineages) {
-            // Add upstream fields from fineGrainedLineage onto DataJob to mimic upstream dataset fields.
-            // DataJobs will virtually "have" these fields so we can draw full column paths
-            // from upstream dataset fields to downstream dataset fields.
+            // Add upstream and downstream fields from fineGrainedLineage onto DataJob to mimic upstream
+            // and downstream dataset fields. DataJobs will virtually "have" these fields so we can draw
+            // full column paths from upstream dataset fields to downstream dataset fields.
             const fields: SchemaField[] = [];
             fetchedEntity.fineGrainedLineages.forEach((fineGrainedLineage) => {
                 fineGrainedLineage.upstreams?.forEach((upstream) => {
@@ -103,6 +103,16 @@ export function getPopulatedColumnsByUrn(
                         });
                     }
                 });
+                fineGrainedLineage.downstreams?.forEach((downstream) => {
+                    if (!fields.some((field) => field.fieldPath === downstream.path)) {
+                        fields.push({
+                            fieldPath: downgradeV2FieldPath(downstream.path) || '',
+                            nullable: false,
+                            recursive: false,
+                            type: SchemaFieldDataType.String,
+                        });
+                    }
+                });
             });
             populatedColumnsByUrn = { ...populatedColumnsByUrn, [urn]: fields };
         }
diff --git a/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts b/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts
index 860b5715f34c9..30e81a37dc380 100644
--- a/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts
+++ b/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts
@@ -130,6 +130,18 @@ export function extendColumnLineage(
                     });
                 });
             });
+            if (lineageVizConfig.type === EntityType.DataJob && !fineGrainedLineage.upstreams?.length) {
+                fineGrainedLineage.downstreams?.forEach((downstream) => {
+                    const [downstreamEntityUrn, downstreamField] = breakFieldUrn(downstream);
+                    updateFineGrainedMap(
+                        fineGrainedMap,
+                        lineageVizConfig.urn,
+                        downstreamField,
+                        downstreamEntityUrn,
+                        downstreamField,
+                    );
+                });
+            }
         });
     }
 

From 9deb7be3fcab2bdd62f508e5e175c95b2f833e7d Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Tue, 3 Oct 2023 23:17:49 -0400
Subject: [PATCH 19/25] fix(ingest): refactor test markers + fix disk space
 issues in CI (#8938)

---
 .github/workflows/metadata-ingestion.yml      | 13 +++++--
 metadata-ingestion/build.gradle               | 22 +++++------
 metadata-ingestion/developing.md              |  7 ++--
 metadata-ingestion/setup.cfg                  |  9 +++--
 metadata-ingestion/setup.py                   |  3 ++
 metadata-ingestion/tests/conftest.py          | 39 +++++++++++++++++++
 .../test_business_glossary.py                 | 11 +-----
 .../delta_lake/test_delta_lake_minio.py       |  5 ++-
 .../tests/integration/hana/test_hana.py       |  2 +-
 .../tests/integration/hive/test_hive.py       |  4 +-
 .../tests/integration/iceberg/test_iceberg.py | 22 +++++++----
 .../kafka-connect/test_kafka_connect.py       | 11 ++----
 .../tests/integration/nifi/test_nifi.py       |  9 +++--
 .../integration/powerbi/test_m_parser.py      |  2 +
 .../tests/integration/powerbi/test_powerbi.py |  1 +
 .../presto-on-hive/test_presto_on_hive.py     |  3 +-
 .../tableau/test_tableau_ingest.py            |  2 +-
 .../tests/test_helpers/docker_helpers.py      | 23 +++++++++++
 18 files changed, 128 insertions(+), 60 deletions(-)

diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml
index fff41e481c3cb..8d56a0adf5bd5 100644
--- a/.github/workflows/metadata-ingestion.yml
+++ b/.github/workflows/metadata-ingestion.yml
@@ -36,9 +36,9 @@ jobs:
           [
             "lint",
             "testQuick",
-            "testIntegration",
+            "testIntegrationBatch0",
             "testIntegrationBatch1",
-            "testSlowIntegration",
+            "testIntegrationBatch2",
           ]
         include:
           - python-version: "3.7"
@@ -56,9 +56,14 @@ jobs:
         run: ./gradlew :metadata-ingestion:installPackageOnly
       - name: Run metadata-ingestion tests
         run: ./gradlew :metadata-ingestion:${{ matrix.command }}
-      - name: pip freeze show list installed
+      - name: Debug info
         if: always()
-        run: source metadata-ingestion/venv/bin/activate && pip freeze
+        run: |
+          source metadata-ingestion/venv/bin/activate && pip freeze
+          set -x
+          df -hl
+          docker image ls
+          docker system df
       - uses: actions/upload-artifact@v3
         if: ${{ always() && matrix.command != 'lint' }}
         with:
diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle
index ea7990ab9c660..0d8de625ec709 100644
--- a/metadata-ingestion/build.gradle
+++ b/metadata-ingestion/build.gradle
@@ -12,7 +12,7 @@ if (!project.hasProperty("extra_pip_requirements")) {
 }
 
 def get_coverage_arg(test_name) {
-  return "--cov-report term --cov-report xml:coverage_${test_name}.xml "
+  return "--cov-report xml:coverage_${test_name}.xml "
 }
 
 task checkPythonVersion(type: Exec) {
@@ -138,7 +138,7 @@ task testQuick(type: Exec, dependsOn: [installDev, ':metadata-models:generateJso
   outputs.dir("${venv_name}")
   def cvg_arg = get_coverage_arg("quick")
   commandLine 'bash', '-c',
-    "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=20 -m 'not integration and not integration_batch_1 and not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
+    "source ${venv_name}/bin/activate && pytest ${cvg_arg} tests/unit --durations=20 -m 'not integration' -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
 }
 
 task installDevTest(type: Exec, dependsOn: [install]) {
@@ -164,27 +164,25 @@ task testSingle(dependsOn: [installDevTest]) {
   }
 }
 
-task testIntegration(type: Exec, dependsOn: [installDevTest]) {
-  def cvg_arg = get_coverage_arg("int")
+task testIntegrationBatch0(type: Exec, dependsOn: [installDevTest]) {
+  def cvg_arg = get_coverage_arg("intBatch0")
   commandLine 'bash', '-c',
-    "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=50 -m 'integration' -vv --continue-on-collection-errors --junit-xml=junit.integration.xml"
+    "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=50 -m 'integration_batch_0' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch0.xml"
 }
-
 task testIntegrationBatch1(type: Exec, dependsOn: [installDevTest]) {
   def cvg_arg = get_coverage_arg("intBatch1")
   commandLine 'bash', '-c',
     "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=50 -m 'integration_batch_1' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch1.xml"
 }
-
-task testFull(type: Exec, dependsOn: [installDevTest]) {
+task testIntegrationBatch2(type: Exec, dependsOn: [installDevTest]) {
+  def cvg_arg = get_coverage_arg("intBatch2")
   commandLine 'bash', '-c',
-    "source ${venv_name}/bin/activate && pytest --durations=50 -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
+    "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=20 -m 'integration_batch_2' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch2.xml"
 }
 
-task testSlowIntegration(type: Exec, dependsOn: [installDevTest]) {
-  def cvg_arg = get_coverage_arg("intSlow")
+task testFull(type: Exec, dependsOn: [installDevTest]) {
   commandLine 'bash', '-c',
-    "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=20 -m 'slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.slow.integration.xml"
+    "source ${venv_name}/bin/activate && pytest --durations=50 -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
 }
 
 task specGen(type: Exec, dependsOn: [codegen, installDevTest]) {
diff --git a/metadata-ingestion/developing.md b/metadata-ingestion/developing.md
index f529590e2ab39..d5f834936cdcf 100644
--- a/metadata-ingestion/developing.md
+++ b/metadata-ingestion/developing.md
@@ -36,6 +36,7 @@ cd metadata-ingestion-modules/airflow-plugin
 source venv/bin/activate
 datahub version  # should print "DataHub CLI version: unavailable (installed in develop mode)"
 ```
+
 ### Common setup issues
 
 Common issues (click to expand):
@@ -111,6 +112,7 @@ mypy src/ tests/
 ```
 
 or you can run from root of the repository
+
 ```shell
 ./gradlew :metadata-ingestion:lintFix
 ```
@@ -178,14 +180,11 @@ pip install -e '.[integration-tests]'
 pytest -vv
 
 # Run unit tests.
-pytest -m 'not integration and not slow_integration'
+pytest -m 'not integration'
 
 # Run Docker-based integration tests.
 pytest -m 'integration'
 
-# Run Docker-based slow integration tests.
-pytest -m 'slow_integration'
-
 # You can also run these steps via the gradle build:
 ../gradlew :metadata-ingestion:lint
 ../gradlew :metadata-ingestion:lintFix
diff --git a/metadata-ingestion/setup.cfg b/metadata-ingestion/setup.cfg
index fad55b99ec938..8b78e4d3c9c6f 100644
--- a/metadata-ingestion/setup.cfg
+++ b/metadata-ingestion/setup.cfg
@@ -75,10 +75,11 @@ disallow_untyped_defs = yes
 asyncio_mode = auto
 addopts = --cov=src --cov-report= --cov-config setup.cfg --strict-markers
 markers =
-    slow_unit: marks tests to only run slow unit tests (deselect with '-m not slow_unit')
-    integration: marks tests to only run in integration (deselect with '-m "not integration"')
-    integration_batch_1: mark tests to only run in batch 1 of integration tests. This is done mainly for parallelisation (deselect with '-m not integration_batch_1')
-    slow_integration: marks tests that are too slow to even run in integration (deselect with '-m "not slow_integration"')
+    slow: marks tests that are slow to run, including all docker-based tests (deselect with '-m not slow')
+    integration: marks all integration tests, across all batches (deselect with '-m "not integration"')
+    integration_batch_0: mark tests to run in batch 0 of integration tests. This is done mainly for parallelisation in CI. Batch 0 is the default batch.
+    integration_batch_1: mark tests to run in batch 1 of integration tests
+    integration_batch_2: mark tests to run in batch 2 of integration tests
 testpaths =
     tests/unit
     tests/integration
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 024950e3a6fd5..71e4ea6cb3b85 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -470,6 +470,7 @@ def get_long_description():
     *list(
         dependency
         for plugin in [
+            "athena",
             "bigquery",
             "clickhouse",
             "clickhouse-usage",
@@ -492,6 +493,7 @@ def get_long_description():
             "kafka",
             "datahub-rest",
             "datahub-lite",
+            "great-expectations",
             "presto",
             "redash",
             "redshift",
@@ -530,6 +532,7 @@ def get_long_description():
             "clickhouse",
             "delta-lake",
             "druid",
+            "feast" if sys.version_info >= (3, 8) else None,
             "hana",
             "hive",
             "iceberg" if sys.version_info >= (3, 8) else None,
diff --git a/metadata-ingestion/tests/conftest.py b/metadata-ingestion/tests/conftest.py
index 0eb9ab250339c..0f278ab1e1311 100644
--- a/metadata-ingestion/tests/conftest.py
+++ b/metadata-ingestion/tests/conftest.py
@@ -1,6 +1,8 @@
 import logging
 import os
+import pathlib
 import time
+from typing import List
 
 import pytest
 
@@ -49,3 +51,40 @@ def pytest_addoption(parser):
         default=False,
     )
     parser.addoption("--copy-output-files", action="store_true", default=False)
+
+
+def pytest_collection_modifyitems(
+    config: pytest.Config, items: List[pytest.Item]
+) -> None:
+    # https://docs.pytest.org/en/latest/reference/reference.html#pytest.hookspec.pytest_collection_modifyitems
+    # Adapted from https://stackoverflow.com/a/57046943/5004662.
+
+    root = pathlib.Path(config.rootpath)
+    integration_path = root / "tests/integration"
+
+    for item in items:
+        test_path = pathlib.Path(item.fspath)
+
+        if (
+            "docker_compose_runner" in item.fixturenames  # type: ignore[attr-defined]
+            or any(
+                marker.name == "integration_batch_2" for marker in item.iter_markers()
+            )
+        ):
+            item.add_marker(pytest.mark.slow)
+
+        is_already_integration = any(
+            marker.name == "integration" for marker in item.iter_markers()
+        )
+
+        if integration_path in test_path.parents or is_already_integration:
+            # If it doesn't have a marker yet, put it in integration_batch_0.
+            if not any(
+                marker.name.startswith("integration_batch_")
+                for marker in item.iter_markers()
+            ):
+                item.add_marker(pytest.mark.integration_batch_0)
+
+            # Mark everything as an integration test.
+            if not is_already_integration:
+                item.add_marker(pytest.mark.integration)
diff --git a/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py b/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py
index 11fed2a805565..b6e1aca4d4fed 100644
--- a/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py
+++ b/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List
+from typing import Any, Dict
 
 import pytest
 from freezegun import freeze_time
@@ -45,14 +45,6 @@ def test_glossary_ingest(
 ):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/business-glossary"
 
-    # These paths change from one instance run of the clickhouse docker to the other,
-    # and the FROZEN_TIME does not apply to these.
-    ignore_paths: List[str] = [
-        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['metadata_modification_time'\]",
-        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['data_paths'\]",
-        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['metadata_path'\]",
-    ]
-
     output_mces_path: str = f"{tmp_path}/glossary_events.json"
     golden_mces_path: str = f"{test_resources_dir}/{golden_file}"
 
@@ -72,7 +64,6 @@ def test_glossary_ingest(
     # Verify the output.
     mce_helpers.check_golden_file(
         pytestconfig,
-        ignore_paths=ignore_paths,
         output_path=output_mces_path,
         golden_path=golden_mces_path,
     )
diff --git a/metadata-ingestion/tests/integration/delta_lake/test_delta_lake_minio.py b/metadata-ingestion/tests/integration/delta_lake/test_delta_lake_minio.py
index 36ec1d317fec4..6146c6d1a948c 100644
--- a/metadata-ingestion/tests/integration/delta_lake/test_delta_lake_minio.py
+++ b/metadata-ingestion/tests/integration/delta_lake/test_delta_lake_minio.py
@@ -9,6 +9,8 @@
 from tests.test_helpers import mce_helpers
 from tests.test_helpers.docker_helpers import wait_for_port
 
+pytestmark = pytest.mark.integration_batch_2
+
 FROZEN_TIME = "2020-04-14 07:00:00"
 MINIO_PORT = 9000
 
@@ -64,7 +66,7 @@ def populate_minio(pytestconfig, s3_bkt):
         pytestconfig.rootpath / "tests/integration/delta_lake/test_data/"
     )
 
-    for root, dirs, files in os.walk(test_resources_dir):
+    for root, _dirs, files in os.walk(test_resources_dir):
         for file in files:
             full_path = os.path.join(root, file)
             rel_path = os.path.relpath(full_path, test_resources_dir)
@@ -72,7 +74,6 @@ def populate_minio(pytestconfig, s3_bkt):
     yield
 
 
-@pytest.mark.slow_integration
 @freezegun.freeze_time("2023-01-01 00:00:00+00:00")
 def test_delta_lake_ingest(pytestconfig, tmp_path, test_resources_dir):
     # Run the metadata ingestion pipeline.
diff --git a/metadata-ingestion/tests/integration/hana/test_hana.py b/metadata-ingestion/tests/integration/hana/test_hana.py
index 0fa234d059e5e..726f8744167db 100644
--- a/metadata-ingestion/tests/integration/hana/test_hana.py
+++ b/metadata-ingestion/tests/integration/hana/test_hana.py
@@ -7,12 +7,12 @@
 from tests.test_helpers.click_helpers import run_datahub_cmd
 from tests.test_helpers.docker_helpers import wait_for_port
 
+pytestmark = pytest.mark.integration_batch_2
 FROZEN_TIME = "2020-04-14 07:00:00"
 
 
 @freeze_time(FROZEN_TIME)
 @pytest.mark.xfail  # TODO: debug the flakes for this test
-@pytest.mark.slow_integration
 @pytest.mark.skipif(
     platform.machine().lower() == "aarch64",
     reason="The hdbcli dependency is not available for aarch64",
diff --git a/metadata-ingestion/tests/integration/hive/test_hive.py b/metadata-ingestion/tests/integration/hive/test_hive.py
index ce166c3b336ac..caffb761380dd 100644
--- a/metadata-ingestion/tests/integration/hive/test_hive.py
+++ b/metadata-ingestion/tests/integration/hive/test_hive.py
@@ -12,6 +12,8 @@
 
 data_platform = "hive"
 
+pytestmark = pytest.mark.integration_batch_1
+
 
 @pytest.fixture(scope="module")
 def hive_runner(docker_compose_runner, pytestconfig):
@@ -54,7 +56,6 @@ def base_pipeline_config(events_file, db=None):
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_hive_ingest(
     loaded_hive, pytestconfig, test_resources_dir, tmp_path, mock_time
 ):
@@ -110,7 +111,6 @@ def test_hive_ingest_all_db(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_hive_instance_check(loaded_hive, test_resources_dir, tmp_path, pytestconfig):
     instance: str = "production_warehouse"
 
diff --git a/metadata-ingestion/tests/integration/iceberg/test_iceberg.py b/metadata-ingestion/tests/integration/iceberg/test_iceberg.py
index e2a86480672e5..65ede11c3f1c0 100644
--- a/metadata-ingestion/tests/integration/iceberg/test_iceberg.py
+++ b/metadata-ingestion/tests/integration/iceberg/test_iceberg.py
@@ -8,22 +8,31 @@
 
 from tests.test_helpers import mce_helpers
 from tests.test_helpers.click_helpers import run_datahub_cmd
-from tests.test_helpers.docker_helpers import wait_for_port
+from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port
 from tests.test_helpers.state_helpers import (
     get_current_checkpoint_from_pipeline,
     run_and_get_pipeline,
     validate_all_providers_have_committed_successfully,
 )
 
+pytestmark = [
+    pytest.mark.integration_batch_1,
+    # Skip tests if not on Python 3.8 or higher.
+    pytest.mark.skipif(
+        sys.version_info < (3, 8), reason="Requires python 3.8 or higher"
+    ),
+]
 FROZEN_TIME = "2020-04-14 07:00:00"
 GMS_PORT = 8080
 GMS_SERVER = f"http://localhost:{GMS_PORT}"
 
 
-@pytest.fixture(autouse=True)
-def skip_tests_if_python_before_3_8():
-    if sys.version_info < (3, 8):
-        pytest.skip("Requires python 3.8 or higher")
+@pytest.fixture(autouse=True, scope="module")
+def remove_docker_image():
+    yield
+
+    # The tabulario/spark-iceberg image is pretty large, so we remove it after the test.
+    cleanup_image("tabulario/spark-iceberg")
 
 
 def spark_submit(file_path: str, args: str = "") -> None:
@@ -36,7 +45,6 @@ def spark_submit(file_path: str, args: str = "") -> None:
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 def test_iceberg_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/iceberg/"
 
@@ -69,7 +77,6 @@ def test_iceberg_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 def test_iceberg_stateful_ingest(
     docker_compose_runner, pytestconfig, tmp_path, mock_time, mock_datahub_graph
 ):
@@ -189,7 +196,6 @@ def test_iceberg_stateful_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 def test_iceberg_profiling(docker_compose_runner, pytestconfig, tmp_path, mock_time):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/iceberg/"
 
diff --git a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
index 48063908e624f..8cf76cfb26af7 100644
--- a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
+++ b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
@@ -1,5 +1,5 @@
 import subprocess
-from typing import Any, Dict, List, cast
+from typing import Any, Dict, List, Optional, cast
 from unittest import mock
 
 import pytest
@@ -16,6 +16,7 @@
     validate_all_providers_have_committed_successfully,
 )
 
+pytestmark = pytest.mark.integration_batch_1
 FROZEN_TIME = "2021-10-25 13:00:00"
 GMS_PORT = 8080
 GMS_SERVER = f"http://localhost:{GMS_PORT}"
@@ -345,7 +346,6 @@ def loaded_kafka_connect(kafka_connect_runner):
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_kafka_connect_ingest(
     loaded_kafka_connect, pytestconfig, tmp_path, test_resources_dir
 ):
@@ -363,7 +363,6 @@ def test_kafka_connect_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_kafka_connect_mongosourceconnect_ingest(
     loaded_kafka_connect, pytestconfig, tmp_path, test_resources_dir
 ):
@@ -381,7 +380,6 @@ def test_kafka_connect_mongosourceconnect_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_kafka_connect_s3sink_ingest(
     loaded_kafka_connect, pytestconfig, tmp_path, test_resources_dir
 ):
@@ -399,7 +397,6 @@ def test_kafka_connect_s3sink_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_kafka_connect_ingest_stateful(
     loaded_kafka_connect, pytestconfig, tmp_path, mock_datahub_graph, test_resources_dir
 ):
@@ -536,7 +533,7 @@ def test_kafka_connect_ingest_stateful(
     assert sorted(deleted_job_urns) == sorted(difference_job_urns)
 
 
-def register_mock_api(request_mock: Any, override_data: dict = {}) -> None:
+def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) -> None:
     api_vs_response = {
         "http://localhost:28083": {
             "method": "GET",
@@ -549,7 +546,7 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None:
         },
     }
 
-    api_vs_response.update(override_data)
+    api_vs_response.update(override_data or {})
 
     for url in api_vs_response.keys():
         request_mock.register_uri(
diff --git a/metadata-ingestion/tests/integration/nifi/test_nifi.py b/metadata-ingestion/tests/integration/nifi/test_nifi.py
index 58efd32c6deb3..bf17ee7472258 100644
--- a/metadata-ingestion/tests/integration/nifi/test_nifi.py
+++ b/metadata-ingestion/tests/integration/nifi/test_nifi.py
@@ -7,7 +7,9 @@
 
 from datahub.ingestion.run.pipeline import Pipeline
 from tests.test_helpers import fs_helpers, mce_helpers
-from tests.test_helpers.docker_helpers import wait_for_port
+from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port
+
+pytestmark = pytest.mark.integration_batch_2
 
 FROZEN_TIME = "2021-12-03 12:00:00"
 
@@ -48,9 +50,11 @@ def loaded_nifi(docker_compose_runner, test_resources_dir):
         )
         yield docker_services
 
+    # The nifi image is pretty large, so we remove it after the test.
+    cleanup_image("apache/nifi")
+
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.slow_integration
 def test_nifi_ingest_standalone(
     loaded_nifi, pytestconfig, tmp_path, test_resources_dir
 ):
@@ -106,7 +110,6 @@ def test_nifi_ingest_standalone(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.slow_integration
 def test_nifi_ingest_cluster(loaded_nifi, pytestconfig, tmp_path, test_resources_dir):
     # Wait for nifi cluster to execute all lineage processors, max wait time 120 seconds
     url = "http://localhost:9080/nifi-api/flow/process-groups/root"
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index e77a12aa4088e..2fcbf5a0c0860 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -18,6 +18,8 @@
 from datahub.ingestion.source.powerbi.m_query import parser, tree_function
 from datahub.ingestion.source.powerbi.m_query.resolver import DataPlatformTable
 
+pytestmark = pytest.mark.slow
+
 M_QUERIES = [
     'let\n    Source = Snowflake.Databases("bu10758.ap-unknown-2.fakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table',
     'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu10758.ap-unknown-2.fakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n    #"ADDed Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n    #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n    #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n    #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n    then [UNIT] * 361\nelse 0),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n    #"Added Custom2"',
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 5036f758a7de9..044532021a19c 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -20,6 +20,7 @@
 )
 from tests.test_helpers import mce_helpers
 
+pytestmark = pytest.mark.slow
 FROZEN_TIME = "2022-02-03 07:00:00"
 
 
diff --git a/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py b/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py
index 17e21f3790070..31d801ccf7dee 100644
--- a/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py
+++ b/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py
@@ -10,6 +10,7 @@
 from tests.test_helpers import fs_helpers, mce_helpers
 from tests.test_helpers.docker_helpers import wait_for_port
 
+pytestmark = pytest.mark.integration_batch_1
 FROZEN_TIME = "2021-09-23 12:00:00"
 
 data_platform = "presto-on-hive"
@@ -51,7 +52,6 @@ def loaded_presto_on_hive(presto_on_hive_runner):
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 @pytest.mark.parametrize(
     "mode,use_catalog_subtype,use_dataset_pascalcase_subtype,include_catalog_name_in_ids,simplify_nested_field_paths,"
     "test_suffix",
@@ -137,7 +137,6 @@ def test_presto_on_hive_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_presto_on_hive_instance_ingest(
     loaded_presto_on_hive, test_resources_dir, pytestconfig, tmp_path, mock_time
 ):
diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
index 71428a7847953..53b8519a886d3 100644
--- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
+++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
@@ -757,7 +757,7 @@ def test_tableau_no_verify():
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.slow_unit
+@pytest.mark.slow
 def test_tableau_signout_timeout(pytestconfig, tmp_path, mock_datahub_graph):
     enable_logging()
     output_file_name: str = "tableau_signout_timeout_mces.json"
diff --git a/metadata-ingestion/tests/test_helpers/docker_helpers.py b/metadata-ingestion/tests/test_helpers/docker_helpers.py
index f0db2d91e362c..30157c3a78094 100644
--- a/metadata-ingestion/tests/test_helpers/docker_helpers.py
+++ b/metadata-ingestion/tests/test_helpers/docker_helpers.py
@@ -73,3 +73,26 @@ def run(
             yield docker_services
 
     return run
+
+
+def cleanup_image(image_name: str) -> None:
+    assert ":" not in image_name, "image_name should not contain a tag"
+
+    images_proc = subprocess.run(
+        f"docker image ls --filter 'reference={image_name}*' -q",
+        shell=True,
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+
+    if not images_proc.stdout:
+        logger.debug(f"No images to cleanup for {image_name}")
+        return
+
+    image_ids = images_proc.stdout.splitlines()
+    subprocess.run(
+        f"docker image rm {' '.join(image_ids)}",
+        shell=True,
+        check=True,
+    )

From 419b8a7cc2a506d4f64704a352f5328504d3518f Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Tue, 3 Oct 2023 23:20:32 -0400
Subject: [PATCH 20/25] fix(cli): make quickstart docker compose up command
 more robust (#8929)

---
 .../src/datahub/cli/docker_cli.py             | 23 ++++++++++++-------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py
index 9fde47c82873c..4afccfe711e34 100644
--- a/metadata-ingestion/src/datahub/cli/docker_cli.py
+++ b/metadata-ingestion/src/datahub/cli/docker_cli.py
@@ -426,7 +426,7 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
     return quickstart_arch
 
 
-@docker.command()
+@docker.command()  # noqa: C901
 @click.option(
     "--version",
     type=str,
@@ -588,7 +588,7 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
         "arch",
     ]
 )
-def quickstart(
+def quickstart(  # noqa: C901
     version: Optional[str],
     build_locally: bool,
     pull_images: bool,
@@ -755,14 +755,21 @@ def quickstart(
             up_attempts += 1
 
             logger.debug(f"Executing docker compose up command, attempt #{up_attempts}")
+            up_process = subprocess.Popen(
+                base_command + ["up", "-d", "--remove-orphans"],
+                env=_docker_subprocess_env(),
+            )
             try:
-                subprocess.run(
-                    base_command + ["up", "-d", "--remove-orphans"],
-                    env=_docker_subprocess_env(),
-                    timeout=_QUICKSTART_UP_TIMEOUT.total_seconds(),
-                )
+                up_process.wait(timeout=_QUICKSTART_UP_TIMEOUT.total_seconds())
             except subprocess.TimeoutExpired:
-                logger.debug("docker compose up timed out, will retry")
+                logger.debug("docker compose up timed out, sending SIGTERM")
+                up_process.terminate()
+                try:
+                    up_process.wait(timeout=3)
+                except subprocess.TimeoutExpired:
+                    logger.debug("docker compose up still running, sending SIGKILL")
+                    up_process.kill()
+                    up_process.wait()
 
         # Check docker health every few seconds.
         status = check_docker_quickstart()

From ad313ad28203ff995b2cfc67f3026228fde81ac7 Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Wed, 4 Oct 2023 14:06:03 +0530
Subject: [PATCH 21/25] feat(transfomer): add transformer to get ownership from
 tags (#8748)

---
 docs/how/add-custom-data-platform.md          |  2 +-
 docs/how/add-user-data.md                     |  2 +-
 docs/ownership/ownership-types.md             |  2 +-
 .../docs/transformer/dataset_transformer.md   | 24 ++++-
 metadata-ingestion/setup.py                   |  1 +
 .../extract_ownership_from_tags.py            | 91 +++++++++++++++++++
 .../tests/unit/test_transform_dataset.py      | 89 ++++++++++++++++++
 7 files changed, 207 insertions(+), 4 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py

diff --git a/docs/how/add-custom-data-platform.md b/docs/how/add-custom-data-platform.md
index a4ea32af455c1..5dcd423e77569 100644
--- a/docs/how/add-custom-data-platform.md
+++ b/docs/how/add-custom-data-platform.md
@@ -77,7 +77,7 @@ datahub put platform --name MyCustomDataPlatform --display_name "My Custom Data
 source:
   type: "file"
   config:
-    filename: "./my-custom-data-platform.json"
+    path: "./my-custom-data-platform.json"
 
 # see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
 sink:
diff --git a/docs/how/add-user-data.md b/docs/how/add-user-data.md
index ea76c97163ddd..035821ab75879 100644
--- a/docs/how/add-user-data.md
+++ b/docs/how/add-user-data.md
@@ -57,7 +57,7 @@ Define an [ingestion recipe](https://datahubproject.io/docs/metadata-ingestion/#
 source:
   type: "file"
   config:
-    filename: "./my-user.json"
+    path: "./my-user.json"
 
 # see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
 sink:
diff --git a/docs/ownership/ownership-types.md b/docs/ownership/ownership-types.md
index 243f638a324ad..dbb08dd71ce6b 100644
--- a/docs/ownership/ownership-types.md
+++ b/docs/ownership/ownership-types.md
@@ -85,7 +85,7 @@ source:
   type: "file"
   config:
     # path to json file
-    filename: "metadata-ingestion/examples/ownership/ownership_type.json"
+    path: "metadata-ingestion/examples/ownership/ownership_type.json"
 
 # see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
 sink:
diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md
index f0fa44687a109..d1a1555a3ca02 100644
--- a/metadata-ingestion/docs/transformer/dataset_transformer.md
+++ b/metadata-ingestion/docs/transformer/dataset_transformer.md
@@ -7,7 +7,7 @@ The below table shows transformer which can transform aspects of entity [Dataset
 | Dataset Aspect      | Transformer                                                                                                                                                                                                       |                                                                                               
 |---------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | `status`            | - [Mark Dataset status](#mark-dataset-status)                                                                                                                                                                     |
-| `ownership`         | - [Simple Add Dataset ownership](#simple-add-dataset-ownership)<br/> - [Pattern Add Dataset ownership](#pattern-add-dataset-ownership)<br/> - [Simple Remove Dataset Ownership](#simple-remove-dataset-ownership) |
+| `ownership`         | - [Simple Add Dataset ownership](#simple-add-dataset-ownership)<br/> - [Pattern Add Dataset ownership](#pattern-add-dataset-ownership)<br/> - [Simple Remove Dataset Ownership](#simple-remove-dataset-ownership)<br/> - [Extract Ownership from Tags](#extract-ownership-from-tags) |
 | `globalTags`        | - [Simple Add Dataset globalTags ](#simple-add-dataset-globaltags)<br/> - [Pattern Add Dataset globalTags](#pattern-add-dataset-globaltags)<br/> - [Add Dataset globalTags](#add-dataset-globaltags)              |
 | `browsePaths`       | - [Set Dataset browsePath](#set-dataset-browsepath)                                                                                                                                                               |
 | `glossaryTerms`     | - [Simple Add Dataset glossaryTerms ](#simple-add-dataset-glossaryterms)<br/> - [Pattern Add Dataset glossaryTerms](#pattern-add-dataset-glossaryterms)                                                           |
@@ -15,6 +15,28 @@ The below table shows transformer which can transform aspects of entity [Dataset
 | `datasetProperties` | - [Simple Add Dataset datasetProperties](#simple-add-dataset-datasetproperties)<br/> - [Add Dataset datasetProperties](#add-dataset-datasetproperties)                                                            |
 | `domains`           | - [Simple Add Dataset domains](#simple-add-dataset-domains)<br/> - [Pattern Add Dataset domains](#pattern-add-dataset-domains)                                                                                      | 
 
+## Extract Ownership from Tags
+### Config Details
+| Field                       | Required | Type    | Default       | Description                                 |
+|-----------------------------|----------|---------|---------------|---------------------------------------------|
+| `semantics`                 |          | enum    | `OVERWRITE`   | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. |
+| `tag_prefix`                |          | str     |               | Regex to use for tags to match against. Supports Regex to match a prefix which is used to remove content. Rest of string is considered owner ID for creating owner URN. |
+| `is_user`                 |          | bool    | `true`   | Whether should be consider a user or not. If `false` then considered a group. |
+| `email_domain` |          | str    |    | If set then this is appended to create owner URN. |
+| `owner_type` |          | str    |  `TECHNICAL_OWNER`   | Ownership type. |
+| `owner_type_urn` |          | str    |  `None`   | Set to a custom ownership type's URN if using custom ownership. |
+
+Matches against a tag prefix and considers string in tags after that prefix as owner to create ownership.
+
+```yaml
+transformers:
+  - type: "extract_ownership_from_tags"
+    config:
+      tag_prefix: "dbt:techno-genie:"
+      is_user: true
+      email_domain: "coolcompany.com"
+```
+
 ## Mark Dataset Status
 ### Config Details
 | Field                       | Required | Type    | Default       | Description                                 |
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 71e4ea6cb3b85..8fb7b5f29cc22 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -637,6 +637,7 @@ def get_long_description():
         "simple_add_dataset_properties = datahub.ingestion.transformer.add_dataset_properties:SimpleAddDatasetProperties",
         "pattern_add_dataset_schema_terms = datahub.ingestion.transformer.add_dataset_schema_terms:PatternAddDatasetSchemaTerms",
         "pattern_add_dataset_schema_tags = datahub.ingestion.transformer.add_dataset_schema_tags:PatternAddDatasetSchemaTags",
+        "extract_owners_from_tags = datahub.ingestion.transformer.extract_ownership_from_tags:ExtractOwnersFromTagsTransformer",
     ],
     "datahub.ingestion.sink.plugins": [
         "file = datahub.ingestion.sink.file:FileSink",
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py
new file mode 100644
index 0000000000000..64f70988ea3a7
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py
@@ -0,0 +1,91 @@
+import re
+from functools import lru_cache
+from typing import List, Optional, cast
+
+from datahub.configuration.common import TransformerSemanticsConfigModel
+from datahub.emitter.mce_builder import Aspect
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.transformer.dataset_transformer import DatasetTagsTransformer
+from datahub.metadata.schema_classes import (
+    GlobalTagsClass,
+    OwnerClass,
+    OwnershipClass,
+    OwnershipTypeClass,
+)
+from datahub.utilities.urns.corp_group_urn import CorpGroupUrn
+from datahub.utilities.urns.corpuser_urn import CorpuserUrn
+from datahub.utilities.urns.tag_urn import TagUrn
+
+
+class ExtractOwnersFromTagsConfig(TransformerSemanticsConfigModel):
+    tag_prefix: str
+    is_user: bool = True
+    email_domain: Optional[str] = None
+    owner_type: str = "TECHNICAL_OWNER"
+    owner_type_urn: Optional[str] = None
+
+
+@lru_cache(maxsize=10)
+def get_owner_type(owner_type_str: str) -> str:
+    for item in dir(OwnershipTypeClass):
+        if str(item) == owner_type_str:
+            return item
+    return OwnershipTypeClass.CUSTOM
+
+
+class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer):
+    """Transformer that can be used to set extract ownership from entity tags (currently does not support column level tags)"""
+
+    ctx: PipelineContext
+    config: ExtractOwnersFromTagsConfig
+
+    def __init__(self, config: ExtractOwnersFromTagsConfig, ctx: PipelineContext):
+        super().__init__()
+        self.ctx = ctx
+        self.config = config
+
+    @classmethod
+    def create(
+        cls, config_dict: dict, ctx: PipelineContext
+    ) -> "ExtractOwnersFromTagsTransformer":
+        config = ExtractOwnersFromTagsConfig.parse_obj(config_dict)
+        return cls(config, ctx)
+
+    def get_owner_urn(self, owner_str: str) -> str:
+        if self.config.email_domain is not None:
+            return owner_str + "@" + self.config.email_domain
+        return owner_str
+
+    def transform_aspect(
+        self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect]
+    ) -> Optional[Aspect]:
+        in_tags_aspect: Optional[GlobalTagsClass] = cast(GlobalTagsClass, aspect)
+        if in_tags_aspect is None:
+            return None
+        tags = in_tags_aspect.tags
+        owners: List[OwnerClass] = []
+        for tag_class in tags:
+            tag_urn = TagUrn.create_from_string(tag_class.tag)
+            tag_str = tag_urn.get_entity_id()[0]
+            re_match = re.search(self.config.tag_prefix, tag_str)
+            if re_match:
+                owner_str = tag_str[re_match.end() :].strip()
+                owner_urn_str = self.get_owner_urn(owner_str)
+                if self.config.is_user:
+                    owner_urn = str(CorpuserUrn.create_from_id(owner_urn_str))
+                else:
+                    owner_urn = str(CorpGroupUrn.create_from_id(owner_urn_str))
+                owner_type = get_owner_type(self.config.owner_type)
+                if owner_type == OwnershipTypeClass.CUSTOM:
+                    assert (
+                        self.config.owner_type_urn is not None
+                    ), "owner_type_urn must be set if owner_type is CUSTOM"
+                owner = OwnerClass(
+                    owner=owner_urn,
+                    type=owner_type,
+                    typeUrn=self.config.owner_type_urn,
+                )
+                owners.append(owner)
+
+        owner_aspect = OwnershipClass(owners=owners)
+        return cast(Aspect, owner_aspect)
diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py
index 8b2535eea1fe9..bc95451620d22 100644
--- a/metadata-ingestion/tests/unit/test_transform_dataset.py
+++ b/metadata-ingestion/tests/unit/test_transform_dataset.py
@@ -62,6 +62,9 @@
 )
 from datahub.ingestion.transformer.dataset_transformer import DatasetTransformer
 from datahub.ingestion.transformer.extract_dataset_tags import ExtractDatasetTags
+from datahub.ingestion.transformer.extract_ownership_from_tags import (
+    ExtractOwnersFromTagsTransformer,
+)
 from datahub.ingestion.transformer.mark_dataset_status import MarkDatasetStatus
 from datahub.ingestion.transformer.remove_dataset_ownership import (
     SimpleRemoveDatasetOwnership,
@@ -72,6 +75,7 @@
     GlobalTagsClass,
     MetadataChangeEventClass,
     OwnershipClass,
+    OwnershipTypeClass,
     StatusClass,
     TagAssociationClass,
 )
@@ -586,6 +590,91 @@ def test_mark_status_dataset(tmp_path):
     )
 
 
+def test_extract_owners_from_tags():
+    def _test_owner(
+        tag: str,
+        config: Dict,
+        expected_owner: str,
+        expected_owner_type: Optional[str] = None,
+    ) -> None:
+        dataset = make_generic_dataset(
+            aspects=[
+                models.GlobalTagsClass(
+                    tags=[TagAssociationClass(tag=builder.make_tag_urn(tag))]
+                )
+            ]
+        )
+        transformer = ExtractOwnersFromTagsTransformer.create(
+            config,
+            PipelineContext(run_id="test"),
+        )
+        transformed = list(
+            transformer.transform(
+                [
+                    RecordEnvelope(dataset, metadata={}),
+                ]
+            )
+        )
+        owners_aspect = transformed[0].record.proposedSnapshot.aspects[0]
+        owners = owners_aspect.owners
+        owner = owners[0]
+        if expected_owner_type is not None:
+            assert owner.type == expected_owner_type
+        assert owner.owner == expected_owner
+
+    _test_owner(
+        tag="owner:foo",
+        config={
+            "tag_prefix": "owner:",
+        },
+        expected_owner="urn:li:corpuser:foo",
+    )
+    _test_owner(
+        tag="abcdef-owner:foo",
+        config={
+            "tag_prefix": ".*owner:",
+        },
+        expected_owner="urn:li:corpuser:foo",
+    )
+    _test_owner(
+        tag="owner:foo",
+        config={
+            "tag_prefix": "owner:",
+            "is_user": False,
+        },
+        expected_owner="urn:li:corpGroup:foo",
+    )
+    _test_owner(
+        tag="owner:foo",
+        config={
+            "tag_prefix": "owner:",
+            "email_domain": "example.com",
+        },
+        expected_owner="urn:li:corpuser:foo@example.com",
+    )
+    _test_owner(
+        tag="owner:foo",
+        config={
+            "tag_prefix": "owner:",
+            "email_domain": "example.com",
+            "owner_type": "TECHNICAL_OWNER",
+        },
+        expected_owner="urn:li:corpuser:foo@example.com",
+        expected_owner_type=OwnershipTypeClass.TECHNICAL_OWNER,
+    )
+    _test_owner(
+        tag="owner:foo",
+        config={
+            "tag_prefix": "owner:",
+            "email_domain": "example.com",
+            "owner_type": "AUTHOR",
+            "owner_type_urn": "urn:li:ownershipType:ad8557d6-dcb9-4d2a-83fc-b7d0d54f3e0f",
+        },
+        expected_owner="urn:li:corpuser:foo@example.com",
+        expected_owner_type=OwnershipTypeClass.CUSTOM,
+    )
+
+
 def test_add_dataset_browse_paths():
     dataset = make_generic_dataset()
 

From 52156193b623d554acc2c9564232f033ca5b8989 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Wed, 4 Oct 2023 17:43:59 +0900
Subject: [PATCH 22/25] docs(lineage): Lineage docs refactoring (#8899)

---
 docs-website/generateDocsDir.ts            |   2 +-
 docs-website/sidebars.js                   |   5 +-
 docs/act-on-metadata/impact-analysis.md    |   2 +-
 docs/api/tutorials/lineage.md              |   3 +-
 docs/features/feature-guides/ui-lineage.md |  58 ++++++
 docs/lineage/lineage-feature-guide.md      | 222 ---------------------
 metadata-ingestion/scripts/docgen.py       | 144 +++++++++++++
 7 files changed, 210 insertions(+), 226 deletions(-)
 create mode 100644 docs/features/feature-guides/ui-lineage.md
 delete mode 100644 docs/lineage/lineage-feature-guide.md

diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts
index 892d02c43fe97..a321146e10efa 100644
--- a/docs-website/generateDocsDir.ts
+++ b/docs-website/generateDocsDir.ts
@@ -66,7 +66,7 @@ function list_markdown_files(): string[] {
     .trim()
     .split("\n");
   let all_generated_markdown_files = execSync(
-    "cd .. && ls docs/generated/**/**/*.md"
+    "cd .. && ls docs/generated/**/**/*.md && ls docs/generated/**/*.md"
   )
     .toString()
     .trim()
diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index d8b85da79b31b..bdf3926c17e0d 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -432,7 +432,7 @@ module.exports = {
         "docs/features/dataset-usage-and-query-history",
         "docs/posts",
         "docs/sync-status",
-        "docs/lineage/lineage-feature-guide",
+        "docs/generated/lineage/lineage-feature-guide",
         {
           type: "doc",
           id: "docs/tests/metadata-tests",
@@ -446,6 +446,9 @@ module.exports = {
             "docs/managed-datahub/observe/custom-sql-assertions",
           ],
         },
+        {
+          Guides: ["docs/features/feature-guides/ui-lineage"],
+        },
       ],
     },
     {
diff --git a/docs/act-on-metadata/impact-analysis.md b/docs/act-on-metadata/impact-analysis.md
index 9728a480efe32..e1143dd436d9c 100644
--- a/docs/act-on-metadata/impact-analysis.md
+++ b/docs/act-on-metadata/impact-analysis.md
@@ -92,4 +92,4 @@ We currently limit the list of dependencies to 10,000 records; we suggest applyi
 
 ### Related Features
 
-* [DataHub Lineage](../lineage/lineage-feature-guide.md)
+* [DataHub Lineage](../generated/lineage/lineage-feature-guide.md)
diff --git a/docs/api/tutorials/lineage.md b/docs/api/tutorials/lineage.md
index dc43cb178f949..4baad09099d07 100644
--- a/docs/api/tutorials/lineage.md
+++ b/docs/api/tutorials/lineage.md
@@ -6,7 +6,8 @@ import TabItem from '@theme/TabItem';
 ## Why Would You Use Lineage?
 
 Lineage is used to capture data dependencies within an organization. It allows you to track the inputs from which a data asset is derived, along with the data assets that depend on it downstream.
-For more information about lineage, refer to [About DataHub Lineage](/docs/lineage/lineage-feature-guide.md).
+
+For more information about lineage, refer to [About DataHub Lineage](/docs/generated/lineage/lineage-feature-guide.md).
 
 ### Goal Of This Guide
 
diff --git a/docs/features/feature-guides/ui-lineage.md b/docs/features/feature-guides/ui-lineage.md
new file mode 100644
index 0000000000000..18e4f77e793b2
--- /dev/null
+++ b/docs/features/feature-guides/ui-lineage.md
@@ -0,0 +1,58 @@
+# Managing Lineage via UI
+
+## Viewing lineage
+The UI shows the latest version of the lineage. The time picker can be used to filter out edges within the latest version to exclude those that were last updated outside of the time window. Selecting time windows in the patch will not show you historical lineages. It will only filter the view of the latest version of the lineage. 
+
+## Editing from Lineage Graph View
+
+The first place that you can edit lineage for entities is from the Lineage Visualization screen. Click on the "Lineage" button on the top right of an entity's profile to get to this view.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-viz-button.png"/>
+</p>
+
+Once you find the entity that you want to edit the lineage of, click on the three-dot menu dropdown to select whether you want to edit lineage in the upstream direction or the downstream direction.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/edit-lineage-menu.png"/>
+</p>
+
+If you want to edit upstream lineage for entities downstream of the center node or downstream lineage for entities upstream of the center node, you can simply re-center to focus on the node you want to edit. Once focused on the desired node, you can edit lineage in either direction.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/focus-to-edit.png"/>
+</p>
+
+### Adding Lineage Edges
+
+Once you click "Edit Upstream" or "Edit Downstream," a modal will open that allows you to manage lineage for the selected entity in the chosen direction. In order to add a lineage edge to a new entity, search for it by name in the provided search bar and select it. Once you're satisfied with everything you've added, click "Save Changes." If you change your mind, you can always cancel or exit without saving the changes you've made.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/add-upstream.png"/>
+</p>
+
+### Removing Lineage Edges
+
+You can remove lineage edges from the same modal used to add lineage edges. Find the edge(s) that you want to remove, and click the "X" on the right side of it. And just like adding, you need to click "Save Changes" to save and if you exit without saving, your changes won't be applied.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/remove-lineage-edge.png"/>
+</p>
+
+### Reviewing Changes
+
+Any time lineage is edited manually, we keep track of who made the change and when they made it. You can see this information in the modal where you add and remove edges. If an edge was added manually, a user avatar will be in line with the edge that was added. You can hover over this avatar in order to see who added it and when.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-edge-audit-stamp.png"/>
+</p>
+
+## Editing from Lineage Tab
+
+The other place that you can edit lineage for entities is from the Lineage Tab on an entity's profile. Click on the "Lineage" tab in an entity's profile and then find the "Edit" dropdown that allows you to edit upstream or downstream lineage for the given entity.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/edit-from-lineage-tab.png"/>
+</p>
+
+Using the modal from this view will work the same as described above for editing from the Lineage Visualization screen.
\ No newline at end of file
diff --git a/docs/lineage/lineage-feature-guide.md b/docs/lineage/lineage-feature-guide.md
deleted file mode 100644
index 678afce4c46a0..0000000000000
--- a/docs/lineage/lineage-feature-guide.md
+++ /dev/null
@@ -1,222 +0,0 @@
-import FeatureAvailability from '@site/src/components/FeatureAvailability';
-
-# About DataHub Lineage
-
-<FeatureAvailability/>
-
-Lineage is used to capture data dependencies within an organization. It allows you to track the inputs from which a data asset is derived, along with the data assets that depend on it downstream.
-
-If you're using an ingestion source that supports extraction of Lineage (e.g. the "Table Lineage Capability"), then lineage information can be extracted automatically. For detailed instructions, refer to the source documentation for the source you are using. If you are not using a Lineage-support ingestion source, you can programmatically emit lineage edges between entities via API.
-
-Alternatively, as of `v0.9.5`, DataHub supports the manual editing of lineage between entities. Data experts are free to add or remove upstream and downstream lineage edges in both the Lineage Visualization screen as well as the Lineage tab on entity pages. Use this feature to supplement automatic lineage extraction or establish important entity relationships in sources that do not support automatic extraction. Editing lineage by hand is supported for Datasets, Charts, Dashboards, and Data Jobs.
-
-:::note
-
-Lineage added by hand and programmatically may conflict with one another to cause unwanted overwrites. It is strongly recommend that lineage is edited manually in cases where lineage information is not also extracted in automated fashion, e.g. by running an ingestion source.
-
-:::
-
-Types of lineage connections supported in DataHub are:
-
-* Dataset-to-dataset
-* Pipeline lineage (dataset-to-job-to-dataset)
-* Dashboard-to-chart lineage
-* Chart-to-dataset lineage
-* Job-to-dataflow (dbt lineage)
-
-## Lineage Setup, Prerequisites, and Permissions
-
-To edit lineage for an entity, you'll need the following [Metadata Privilege](../authorization/policies.md):
-
-* **Edit Lineage** metadata privilege to edit lineage at the entity level
-
-It is important to know that the **Edit Lineage** privilege is required for all entities whose lineage is affected by the changes. For example, in order to add "Dataset B" as an upstream dependency of "Dataset A", you'll need the **Edit Lineage** privilege for both Dataset A and Dataset B.
-
-## Managing Lineage via the DataHub UI
-
-### Viewing lineage on the Datahub UI
-The UI shows the latest version of the lineage. The time picker can be used to filter out edges within the latest version to exclude those that were last updated outside of the time window. Selecting time windows in the patch will not show you historical lineages. It will only filter the view of the latest version of the lineage. 
-
-### Editing from Lineage Graph View
-
-The first place that you can edit lineage for entities is from the Lineage Visualization screen. Click on the "Lineage" button on the top right of an entity's profile to get to this view.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-viz-button.png"/>
-</p>
-
-Once you find the entity that you want to edit the lineage of, click on the three-dot menu dropdown to select whether you want to edit lineage in the upstream direction or the downstream direction.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/edit-lineage-menu.png"/>
-</p>
-
-If you want to edit upstream lineage for entities downstream of the center node or downstream lineage for entities upstream of the center node, you can simply re-center to focus on the node you want to edit. Once focused on the desired node, you can edit lineage in either direction.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/focus-to-edit.png"/>
-</p>
-
-#### Adding Lineage Edges
-
-Once you click "Edit Upstream" or "Edit Downstream," a modal will open that allows you to manage lineage for the selected entity in the chosen direction. In order to add a lineage edge to a new entity, search for it by name in the provided search bar and select it. Once you're satisfied with everything you've added, click "Save Changes." If you change your mind, you can always cancel or exit without saving the changes you've made.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/add-upstream.png"/>
-</p>
-
-#### Removing Lineage Edges
-
-You can remove lineage edges from the same modal used to add lineage edges. Find the edge(s) that you want to remove, and click the "X" on the right side of it. And just like adding, you need to click "Save Changes" to save and if you exit without saving, your changes won't be applied.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/remove-lineage-edge.png"/>
-</p>
-
-#### Reviewing Changes
-
-Any time lineage is edited manually, we keep track of who made the change and when they made it. You can see this information in the modal where you add and remove edges. If an edge was added manually, a user avatar will be in line with the edge that was added. You can hover over this avatar in order to see who added it and when.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-edge-audit-stamp.png"/>
-</p>
-
-### Editing from Lineage Tab
-
-The other place that you can edit lineage for entities is from the Lineage Tab on an entity's profile. Click on the "Lineage" tab in an entity's profile and then find the "Edit" dropdown that allows you to edit upstream or downstream lineage for the given entity.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/edit-from-lineage-tab.png"/>
-</p>
-
-Using the modal from this view will work the same as described above for editing from the Lineage Visualization screen.
-
-## Managing Lineage via API
-
-:::note
-
-   When you emit any lineage aspect, the existing aspect gets completely overwritten, unless specifically using patch semantics.
-This means that the latest version visible in the UI will be your version. 
-
-:::
-
-### Using Dataset-to-Dataset Lineage
-
-This relationship model uses dataset -> dataset connection through the UpstreamLineage aspect in the Dataset entity.
-
-Here are a few samples for the usage of this type of lineage:
-
-* [lineage_emitter_mcpw_rest.py](../../metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py) - emits simple bigquery table-to-table (dataset-to-dataset) lineage via REST as MetadataChangeProposalWrapper.
-* [lineage_emitter_rest.py](../../metadata-ingestion/examples/library/lineage_emitter_rest.py) - emits simple dataset-to-dataset lineage via REST as MetadataChangeEvent.
-* [lineage_emitter_kafka.py](../../metadata-ingestion/examples/library/lineage_emitter_kafka.py) - emits simple dataset-to-dataset lineage via Kafka as MetadataChangeEvent.
-* [lineage_emitter_dataset_finegrained.py](../../metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py) - emits fine-grained dataset-dataset lineage via REST as MetadataChangeProposalWrapper.
-* [Datahub Snowflake Lineage](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py) - emits Datahub's Snowflake lineage as MetadataChangeProposalWrapper.
-* [Datahub BigQuery Lineage](https://github.com/datahub-project/datahub/blob/3022c2d12e68d221435c6134362c1a2cba2df6b3/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py#L1028) - emits Datahub's Bigquery lineage as MetadataChangeProposalWrapper. **Use the patch feature to add to rather than overwrite the current lineage.**
-
-### Using dbt Lineage
-
-This model captures dbt specific nodes (tables, views, etc.) and
-
-* uses datasets as the base entity type and
-* extends subclass datasets for each dbt-specific concept, and
-* links them together for dataset-to-dataset lineage
-
-Here is a sample usage of this lineage:
-
-* [Datahub dbt Lineage](https://github.com/datahub-project/datahub/blob/a9754ebe83b6b73bc2bfbf49d9ebf5dbd2ca5a8f/metadata-ingestion/src/datahub/ingestion/source/dbt.py#L625,L630) - emits Datahub's dbt lineage as MetadataChangeEvent.
-
-### Using Pipeline Lineage
-
-The relationship model for this is datajob-to-dataset through the dataJobInputOutput aspect in the DataJob entity.
-
-For Airflow, this lineage is supported using Airflow’s lineage backend which allows you to specify the inputs to and output from that task.
- 
-If you annotate that on your task we can pick up that information and push that as lineage edges into datahub automatically. You can install this package from Airflow’s Astronomer marketplace [here](https://registry.astronomer.io/providers/datahub).
-
-Here are a few samples for the usage of this type of lineage:
-
-* [lineage_dataset_job_dataset.py](../../metadata-ingestion/examples/library/lineage_dataset_job_dataset.py) - emits mysql-to-airflow-to-kafka (dataset-to-job-to-dataset) lineage via REST as MetadataChangeProposalWrapper.
-* [lineage_job_dataflow.py](../../metadata-ingestion/examples/library/lineage_job_dataflow.py) - emits the job-to-dataflow lineage via REST as MetadataChangeProposalWrapper.
-
-### Using Dashboard-to-Chart Lineage
-
-This relationship model uses the dashboardInfo aspect of the Dashboard entity and models an explicit edge between a dashboard and a chart (such that charts can be attached to multiple dashboards).
-
-Here is a sample usage of this lineage:
-
-* [lineage_chart_dashboard.py](../../metadata-ingestion/examples/library/lineage_chart_dashboard.py) - emits the chart-to-dashboard lineage via REST as MetadataChangeProposalWrapper.
-
-### Using Chart-to-Dataset Lineage
-
-This relationship model uses the chartInfo aspect of the Chart entity.
-
-Here is a sample usage of this lineage:
-
-* [lineage_dataset_chart.py](../../metadata-ingestion/examples/library/lineage_dataset_chart.py) - emits the dataset-to-chart lineage via REST as MetadataChangeProposalWrapper.
-
-## Additional Resources
-
-### Videos
-
-**DataHub Basics: Lineage 101**
-
-<p align="center">
-<iframe width="560" height="315" src="https://www.youtube.com/embed/rONGpsndzRw" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
-</p>
-
-**DataHub November 2022 Town Hall - Including Manual Lineage Demo**
-
-<p align="center">
-<iframe width="560" height="315" src="https://www.youtube.com/embed/BlCLhG8lGoY" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
-</p>
-
-### GraphQL
-
-* [updateLineage](../../graphql/mutations.md#updatelineage)
-* [searchAcrossLineage](../../graphql/queries.md#searchacrosslineage)
-* [searchAcrossLineageInput](../../graphql/inputObjects.md#searchacrosslineageinput)
-
-#### Examples
-
-**Updating Lineage**
-
-```graphql
-mutation updateLineage {
-  updateLineage(input: {
-    edgesToAdd: [
-      {
-        downstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)",
-        upstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:datahub,Dataset,PROD)"
-      }
-    ],
-    edgesToRemove: [
-      {
-        downstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)",
-        upstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"
-      }
-    ]
-  })
-}
-```
-
-### DataHub Blog
-
-* [Acryl Data introduces lineage support and automated propagation of governance information for Snowflake in DataHub](https://blog.datahubproject.io/acryl-data-introduces-lineage-support-and-automated-propagation-of-governance-information-for-339c99536561)
-* [Data in Context: Lineage Explorer in DataHub](https://blog.datahubproject.io/data-in-context-lineage-explorer-in-datahub-a53a9a476dc4)
-* [Harnessing the Power of Data Lineage with DataHub](https://blog.datahubproject.io/harnessing-the-power-of-data-lineage-with-datahub-ad086358dec4)
-
-## FAQ and Troubleshooting
-
-**The Lineage Tab is greyed out - why can’t I click on it?**
-
-This means you have not yet ingested lineage metadata for that entity. Please ingest lineage to proceed.
-
-**Are there any recommended practices for emitting lineage?**
-
-We recommend emitting aspects as MetadataChangeProposalWrapper over emitting them via the MetadataChangeEvent.
-
-*Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!*
-
-### Related Features
-
-* [DataHub Lineage Impact Analysis](../act-on-metadata/impact-analysis.md)
diff --git a/metadata-ingestion/scripts/docgen.py b/metadata-ingestion/scripts/docgen.py
index b9f558011fc90..1a4db09e961ce 100644
--- a/metadata-ingestion/scripts/docgen.py
+++ b/metadata-ingestion/scripts/docgen.py
@@ -883,6 +883,150 @@ def generate(
     if metrics["plugins"].get("failed", 0) > 0:  # type: ignore
         sys.exit(1)
 
+    ### Create Lineage doc
+
+    source_dir = "../docs/generated/lineage"
+    os.makedirs(source_dir, exist_ok=True)
+    doc_file = f"{source_dir}/lineage-feature-guide.md"
+    with open(doc_file, "w+") as f:
+        f.write("import FeatureAvailability from '@site/src/components/FeatureAvailability';\n\n")
+        f.write(f"# About DataHub Lineage\n\n")
+        f.write("<FeatureAvailability/>\n")
+
+        f.write("""
+Lineage is used to capture data dependencies within an organization. It allows you to track the inputs from which a data asset is derived, along with the data assets that depend on it downstream.
+
+## Viewing Lineage
+
+You can view lineage under **Lineage** tab or **Lineage Visualization** screen.
+
+<p align="center">
+<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-tab.png" />
+</p>
+
+The UI shows the latest version of the lineage. The time picker can be used to filter out edges within the latest version to exclude those that were last updated outside of the time window. Selecting time windows in the patch will not show you historical lineages. It will only filter the view of the latest version of the lineage.
+
+<p align="center">
+<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-view.png" />
+</p>
+
+
+:::tip The Lineage Tab is greyed out - why can’t I click on it?
+This means you have not yet ingested lineage metadata for that entity. Please ingest lineage to proceed.
+
+:::
+
+## Adding Lineage
+
+### Ingestion Source
+
+If you're using an ingestion source that supports extraction of Lineage (e.g. **Table Lineage Capability**), then lineage information can be extracted automatically.
+For detailed instructions, refer to the [source documentation](https://datahubproject.io/integrations) for the source you are using.
+
+### UI
+
+As of `v0.9.5`, DataHub supports the manual editing of lineage between entities. Data experts are free to add or remove upstream and downstream lineage edges in both the Lineage Visualization screen as well as the Lineage tab on entity pages. Use this feature to supplement automatic lineage extraction or establish important entity relationships in sources that do not support automatic extraction. Editing lineage by hand is supported for Datasets, Charts, Dashboards, and Data Jobs.
+Please refer to our [UI Guides on Lineage](../../features/feature-guides/ui-lineage.md) for more information.
+
+:::caution Recommendation on UI-based lineage
+
+Lineage added by hand and programmatically may conflict with one another to cause unwanted overwrites.
+It is strongly recommend that lineage is edited manually in cases where lineage information is not also extracted in automated fashion, e.g. by running an ingestion source.
+
+:::
+
+### API
+
+If you are not using a Lineage-support ingestion source, you can programmatically emit lineage edges between entities via API.
+Please refer to [API Guides on Lineage](../../api/tutorials/lineage.md) for more information.
+
+
+## Lineage Support
+
+### Automatic Lineage Extraction Support
+
+This is a summary of automatic lineage extraciton support in our data source. Please refer to the **Important Capabilities** table in the source documentation. Note that even if the source does not support automatic extraction, you can still add lineage manually using our API & SDKs.\n""")
+
+        f.write("\n| Source | Table-Level Lineage | Column-Level Lineage | Related Configs |\n")
+        f.write("| ---------- | ------ | ----- |----- |\n")
+
+        for platform_id, platform_docs in sorted(
+                source_documentation.items(),
+                key=lambda x: (x[1]["name"].casefold(), x[1]["name"])
+                if "name" in x[1]
+                else (x[0].casefold(), x[0]),
+        ):
+            for plugin, plugin_docs in sorted(
+                    platform_docs["plugins"].items(),
+                    key=lambda x: str(x[1].get("doc_order"))
+                    if x[1].get("doc_order")
+                    else x[0],
+            ):
+                platform_name = platform_docs['name']
+                if len(platform_docs["plugins"].keys()) > 1:
+                    # We only need to show this if there are multiple modules.
+                    platform_name = f"{platform_name} `{plugin}`"
+
+                # Initialize variables
+                table_level_supported = "❌"
+                column_level_supported = "❌"
+                config_names = ''
+
+                if "capabilities" in plugin_docs:
+                    plugin_capabilities = plugin_docs["capabilities"]
+
+                    for cap_setting in plugin_capabilities:
+                        capability_text = get_capability_text(cap_setting.capability)
+                        capability_supported = get_capability_supported_badge(cap_setting.supported)
+
+                        if capability_text == "Table-Level Lineage" and capability_supported == "✅":
+                            table_level_supported = "✅"
+
+                        if capability_text == "Column-level Lineage" and capability_supported == "✅":
+                            column_level_supported = "✅"
+
+                if not (table_level_supported == "❌" and column_level_supported == "❌"):
+                    if "config_schema" in plugin_docs:
+                        config_properties = json.loads(plugin_docs['config_schema']).get('properties', {})
+                        config_names = '<br />'.join(
+                            [f'- {property_name}' for property_name in config_properties if 'lineage' in property_name])
+                lineage_not_applicable_sources = ['azure-ad', 'csv', 'demo-data', 'dynamodb', 'iceberg', 'json-schema', 'ldap', 'openapi', 'pulsar', 'sqlalchemy' ]
+                if platform_id not in lineage_not_applicable_sources :
+                    f.write(
+                        f"| [{platform_name}](../../generated/ingestion/sources/{platform_id}.md) | {table_level_supported} | {column_level_supported} | {config_names}|\n"
+                    )
+
+        f.write("""
+
+### Types of Lineage Connections
+
+Types of lineage connections supported in DataHub and the example codes are as follows.
+
+| Connection          | Examples                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | A.K.A           |
+|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|
+| Dataset to Dataset  | - [lineage_emitter_mcpw_rest.py](../../../metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py) <br /> - [lineage_emitter_rest.py](../../../metadata-ingestion/examples/library/lineage_emitter_rest.py) <br /> - [lineage_emitter_kafka.py](../../../metadata-ingestion/examples/library/lineage_emitter_kafka.py) <br /> - [lineage_emitter_dataset_finegrained.py](../../../metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py) <br /> - [Datahub BigQuery Lineage](https://github.com/datahub-project/datahub/blob/a1bf95307b040074c8d65ebb86b5eb177fdcd591/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py#L229) <br /> - [Datahub Snowflake Lineage](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py#L249) |
+| DataJob to DataFlow | - [lineage_job_dataflow.py](../../../metadata-ingestion/examples/library/lineage_job_dataflow.py)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |    |
+| DataJob to Dataset  | - [lineage_dataset_job_dataset.py](../../../metadata-ingestion/examples/library/lineage_dataset_job_dataset.py) <br /> | Pipeline Lineage |
+| Chart to Dashboard  | - [lineage_chart_dashboard.py](../../../metadata-ingestion/examples/library/lineage_chart_dashboard.py)  |  |
+| Chart to Dataset    | - [lineage_dataset_chart.py](../../../metadata-ingestion/examples/library/lineage_dataset_chart.py)  |  |
+
+
+:::tip Our Roadmap
+We're actively working on expanding lineage support for new data sources.
+Visit our [Official Roadmap](https://feature-requests.datahubproject.io/roadmap) for upcoming updates!
+:::
+
+## References
+
+- [DataHub Basics: Lineage 101](https://www.youtube.com/watch?v=rONGpsndzRw&t=1s)
+- [DataHub November 2022 Town Hall](https://www.youtube.com/watch?v=BlCLhG8lGoY&t=1s) - Including Manual Lineage Demo
+- [Acryl Data introduces lineage support and automated propagation of governance information for Snowflake in DataHub](https://blog.datahubproject.io/acryl-data-introduces-lineage-support-and-automated-propagation-of-governance-information-for-339c99536561)
+- [Data in Context: Lineage Explorer in DataHub](https://blog.datahubproject.io/data-in-context-lineage-explorer-in-datahub-a53a9a476dc4)
+- [Harnessing the Power of Data Lineage with DataHub](https://blog.datahubproject.io/harnessing-the-power-of-data-lineage-with-datahub-ad086358dec4)
+- [DataHub Lineage Impact Analysis](https://datahubproject.io/docs/next/act-on-metadata/impact-analysis)
+                        """)
+
+    print("Lineage Documentation Generation Complete")
 
 if __name__ == "__main__":
     logger.setLevel("INFO")

From c415d63ddae884de4e7a5d4ff3311f82057d3a78 Mon Sep 17 00:00:00 2001
From: siddiquebagwan-gslab <mohdsiddique.bagwan@gslab.com>
Date: Wed, 4 Oct 2023 16:22:51 +0530
Subject: [PATCH 23/25] feat(ingestion/powerbi): column level lineage
 extraction for M-Query (#8796)

---
 .../docs/sources/powerbi/powerbi_pre.md       |    2 +-
 .../ingestion/source/powerbi/config.py        |   36 +
 .../powerbi/m_query/native_sql_parser.py      |    6 +-
 .../source/powerbi/m_query/parser.py          |    2 +-
 .../source/powerbi/m_query/resolver.py        |  189 ++-
 .../ingestion/source/powerbi/powerbi.py       |  102 +-
 .../integration/powerbi/golden_test_cll.json  | 1357 +++++++++++++++++
 .../integration/powerbi/test_m_parser.py      |  155 +-
 .../tests/integration/powerbi/test_powerbi.py |   95 +-
 9 files changed, 1804 insertions(+), 140 deletions(-)
 create mode 100644 metadata-ingestion/tests/integration/powerbi/golden_test_cll.json

diff --git a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
index 0323e214045ae..fcfae6cd1e6d7 100644
--- a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
+++ b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
@@ -40,7 +40,7 @@ PowerBI Source supports M-Query expression for below listed PowerBI Data Sources
 4.  Microsoft SQL Server
 5.  Google BigQuery
 
-Native SQL query parsing is supported for `Snowflake` and `Amazon Redshift` data-sources and only first table from `FROM` clause will be ingested as upstream table. Advance SQL construct like JOIN and SUB-QUERIES in `FROM` clause are not supported.
+Native SQL query parsing is supported for `Snowflake` and `Amazon Redshift` data-sources.
 
 For example refer below native SQL query. The table `OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_UNIT_TARGET` will be ingested as upstream table.
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index ffa685fb25826..a8c7e48f3785c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -397,6 +397,42 @@ class PowerBiDashboardSourceConfig(
         "as this option generates the upstream datasets URN in lowercase.",
     )
 
+    # Enable CLL extraction
+    extract_column_level_lineage: bool = pydantic.Field(
+        default=False,
+        description="Whether to extract column level lineage. "
+        "Works only if configs `native_query_parsing`, `enable_advance_lineage_sql_construct` & `extract_lineage` are enabled.  "
+        "Works for M-Query where native SQL is used for transformation.",
+    )
+
+    @root_validator
+    @classmethod
+    def validate_extract_column_level_lineage(cls, values: Dict) -> Dict:
+        flags = [
+            "native_query_parsing",
+            "enable_advance_lineage_sql_construct",
+            "extract_lineage",
+        ]
+
+        if (
+            "extract_column_level_lineage" in values
+            and values["extract_column_level_lineage"] is False
+        ):
+            # Flag is not set. skip validation
+            return values
+
+        logger.debug(f"Validating additional flags: {flags}")
+
+        is_flag_enabled: bool = True
+        for flag in flags:
+            if flag not in values or values[flag] is False:
+                is_flag_enabled = False
+
+        if not is_flag_enabled:
+            raise ValueError(f"Enable all these flags in recipe: {flags} ")
+
+        return values
+
     @validator("dataset_type_mapping")
     @classmethod
     def map_data_platform(cls, value):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
index 021c429c3c633..0afa8e7ff4564 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
@@ -9,7 +9,7 @@
 
 SPECIAL_CHARACTERS = ["#(lf)", "(lf)"]
 
-logger = logging.getLogger()
+logger = logging.getLogger(__name__)
 
 
 def remove_special_characters(native_query: str) -> str:
@@ -21,7 +21,7 @@ def remove_special_characters(native_query: str) -> str:
 
 def get_tables(native_query: str) -> List[str]:
     native_query = remove_special_characters(native_query)
-    logger.debug(f"Processing query = {native_query}")
+    logger.debug(f"Processing native query = {native_query}")
     tables: List[str] = []
     parsed = sqlparse.parse(native_query)[0]
     tokens: List[sqlparse.sql.Token] = list(parsed.tokens)
@@ -65,7 +65,7 @@ def parse_custom_sql(
 
     sql_query = remove_special_characters(query)
 
-    logger.debug(f"Parsing sql={sql_query}")
+    logger.debug(f"Processing native query = {sql_query}")
 
     return sqlglot_l.create_lineage_sql_parsed_result(
         query=sql_query,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
index 8cc38c366c42a..9134932c39fe0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -56,7 +56,7 @@ def get_upstream_tables(
     ctx: PipelineContext,
     config: PowerBiDashboardSourceConfig,
     parameters: Dict[str, str] = {},
-) -> List[resolver.DataPlatformTable]:
+) -> List[resolver.Lineage]:
     if table.expression is None:
         logger.debug(f"Expression is none for table {table.full_name}")
         return []
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index 479f1decff903..e200ff41f71c2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -27,7 +27,7 @@
     IdentifierAccessor,
 )
 from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import Table
-from datahub.utilities.sqlglot_lineage import SqlParsingResult
+from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult
 
 logger = logging.getLogger(__name__)
 
@@ -38,6 +38,16 @@ class DataPlatformTable:
     urn: str
 
 
+@dataclass
+class Lineage:
+    upstreams: List[DataPlatformTable]
+    column_lineage: List[ColumnLineageInfo]
+
+    @staticmethod
+    def empty() -> "Lineage":
+        return Lineage(upstreams=[], column_lineage=[])
+
+
 def urn_to_lowercase(value: str, flag: bool) -> str:
     if flag is True:
         return value.lower()
@@ -120,9 +130,9 @@ def __init__(
         self.platform_instance_resolver = platform_instance_resolver
 
     @abstractmethod
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         pass
 
     @abstractmethod
@@ -147,7 +157,7 @@ def get_db_detail_from_argument(
 
     def parse_custom_sql(
         self, query: str, server: str, database: Optional[str], schema: Optional[str]
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
 
         dataplatform_tables: List[DataPlatformTable] = []
 
@@ -174,7 +184,7 @@ def parse_custom_sql(
 
         if parsed_result is None:
             logger.debug("Failed to parse query")
-            return dataplatform_tables
+            return Lineage.empty()
 
         for urn in parsed_result.in_tables:
             dataplatform_tables.append(
@@ -184,9 +194,15 @@ def parse_custom_sql(
                 )
             )
 
+        logger.debug(f"Native Query parsed result={parsed_result}")
         logger.debug(f"Generated dataplatform_tables={dataplatform_tables}")
 
-        return dataplatform_tables
+        return Lineage(
+            upstreams=dataplatform_tables,
+            column_lineage=parsed_result.column_lineage
+            if parsed_result.column_lineage is not None
+            else [],
+        )
 
 
 class AbstractDataAccessMQueryResolver(ABC):
@@ -215,7 +231,7 @@ def resolve_to_data_platform_table_list(
         ctx: PipelineContext,
         config: PowerBiDashboardSourceConfig,
         platform_instance_resolver: AbstractDataPlatformInstanceResolver,
-    ) -> List[DataPlatformTable]:
+    ) -> List[Lineage]:
         pass
 
 
@@ -471,8 +487,8 @@ def resolve_to_data_platform_table_list(
         ctx: PipelineContext,
         config: PowerBiDashboardSourceConfig,
         platform_instance_resolver: AbstractDataPlatformInstanceResolver,
-    ) -> List[DataPlatformTable]:
-        data_platform_tables: List[DataPlatformTable] = []
+    ) -> List[Lineage]:
+        lineage: List[Lineage] = []
 
         # Find out output variable as we are doing backtracking in M-Query
         output_variable: Optional[str] = tree_function.get_output_variable(
@@ -484,7 +500,7 @@ def resolve_to_data_platform_table_list(
                 f"{self.table.full_name}-output-variable",
                 "output-variable not found in table expression",
             )
-            return data_platform_tables
+            return lineage
 
         # Parse M-Query and use output_variable as root of tree and create instance of DataAccessFunctionDetail
         table_links: List[
@@ -509,7 +525,7 @@ def resolve_to_data_platform_table_list(
 
             # From supported_resolver enum get respective resolver like AmazonRedshift or Snowflake or Oracle or NativeQuery and create instance of it
             # & also pass additional information that will be need to generate urn
-            table_full_name_creator: AbstractDataPlatformTableCreator = (
+            table_qualified_name_creator: AbstractDataPlatformTableCreator = (
                 supported_resolver.get_table_full_name_creator()(
                     ctx=ctx,
                     config=config,
@@ -517,11 +533,9 @@ def resolve_to_data_platform_table_list(
                 )
             )
 
-            data_platform_tables.extend(
-                table_full_name_creator.create_dataplatform_tables(f_detail)
-            )
+            lineage.append(table_qualified_name_creator.create_lineage(f_detail))
 
-        return data_platform_tables
+        return lineage
 
 
 class DefaultTwoStepDataAccessSources(AbstractDataPlatformTableCreator, ABC):
@@ -536,7 +550,7 @@ class DefaultTwoStepDataAccessSources(AbstractDataPlatformTableCreator, ABC):
 
     def two_level_access_pattern(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         logger.debug(
             f"Processing {self.get_platform_pair().powerbi_data_platform_name} data-access function detail {data_access_func_detail}"
         )
@@ -545,7 +559,7 @@ def two_level_access_pattern(
             data_access_func_detail.arg_list
         )
         if server is None or db_name is None:
-            return []  # Return empty list
+            return Lineage.empty()  # Return empty list
 
         schema_name: str = cast(
             IdentifierAccessor, data_access_func_detail.identifier_accessor
@@ -568,19 +582,21 @@ def two_level_access_pattern(
             server=server,
             qualified_table_name=qualified_table_name,
         )
-
-        return [
-            DataPlatformTable(
-                data_platform_pair=self.get_platform_pair(),
-                urn=urn,
-            )
-        ]
+        return Lineage(
+            upstreams=[
+                DataPlatformTable(
+                    data_platform_pair=self.get_platform_pair(),
+                    urn=urn,
+                )
+            ],
+            column_lineage=[],
+        )
 
 
 class PostgresDataPlatformTableCreator(DefaultTwoStepDataAccessSources):
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         return self.two_level_access_pattern(data_access_func_detail)
 
     def get_platform_pair(self) -> DataPlatformPair:
@@ -630,10 +646,10 @@ def create_urn_using_old_parser(
 
         return dataplatform_tables
 
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
-        dataplatform_tables: List[DataPlatformTable] = []
+    ) -> Lineage:
+
         arguments: List[str] = tree_function.strip_char_from_list(
             values=tree_function.remove_whitespaces_from_list(
                 tree_function.token_values(data_access_func_detail.arg_list)
@@ -647,14 +663,17 @@ def create_dataplatform_tables(
 
         if len(arguments) >= 4 and arguments[2] != "Query":
             logger.debug("Unsupported case is found. Second index is not the Query")
-            return dataplatform_tables
+            return Lineage.empty()
 
         if self.config.enable_advance_lineage_sql_construct is False:
             # Use previous parser to generate URN to keep backward compatibility
-            return self.create_urn_using_old_parser(
-                query=arguments[3],
-                db_name=arguments[1],
-                server=arguments[0],
+            return Lineage(
+                upstreams=self.create_urn_using_old_parser(
+                    query=arguments[3],
+                    db_name=arguments[1],
+                    server=arguments[0],
+                ),
+                column_lineage=[],
             )
 
         return self.parse_custom_sql(
@@ -684,9 +703,9 @@ def _get_server_and_db_name(value: str) -> Tuple[Optional[str], Optional[str]]:
 
         return tree_function.strip_char_from_list([splitter_result[0]])[0], db_name
 
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         logger.debug(
             f"Processing Oracle data-access function detail {data_access_func_detail}"
         )
@@ -698,7 +717,7 @@ def create_dataplatform_tables(
         server, db_name = self._get_server_and_db_name(arguments[0])
 
         if db_name is None or server is None:
-            return []
+            return Lineage.empty()
 
         schema_name: str = cast(
             IdentifierAccessor, data_access_func_detail.identifier_accessor
@@ -719,18 +738,21 @@ def create_dataplatform_tables(
             qualified_table_name=qualified_table_name,
         )
 
-        return [
-            DataPlatformTable(
-                data_platform_pair=self.get_platform_pair(),
-                urn=urn,
-            )
-        ]
+        return Lineage(
+            upstreams=[
+                DataPlatformTable(
+                    data_platform_pair=self.get_platform_pair(),
+                    urn=urn,
+                )
+            ],
+            column_lineage=[],
+        )
 
 
 class DatabrickDataPlatformTableCreator(AbstractDataPlatformTableCreator):
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         logger.debug(
             f"Processing Databrick data-access function detail {data_access_func_detail}"
         )
@@ -749,7 +771,7 @@ def create_dataplatform_tables(
                 logger.debug(
                     "expecting instance to be IdentifierAccessor, please check if parsing is done properly"
                 )
-                return []
+                return Lineage.empty()
 
         db_name: str = value_dict["Database"]
         schema_name: str = value_dict["Schema"]
@@ -762,7 +784,7 @@ def create_dataplatform_tables(
             logger.info(
                 f"server information is not available for {qualified_table_name}. Skipping upstream table"
             )
-            return []
+            return Lineage.empty()
 
         urn = urn_creator(
             config=self.config,
@@ -772,12 +794,15 @@ def create_dataplatform_tables(
             qualified_table_name=qualified_table_name,
         )
 
-        return [
-            DataPlatformTable(
-                data_platform_pair=self.get_platform_pair(),
-                urn=urn,
-            )
-        ]
+        return Lineage(
+            upstreams=[
+                DataPlatformTable(
+                    data_platform_pair=self.get_platform_pair(),
+                    urn=urn,
+                )
+            ],
+            column_lineage=[],
+        )
 
     def get_platform_pair(self) -> DataPlatformPair:
         return SupportedDataPlatform.DATABRICK_SQL.value
@@ -789,9 +814,9 @@ def get_datasource_server(
     ) -> str:
         return tree_function.strip_char_from_list([arguments[0]])[0]
 
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         logger.debug(
             f"Processing {self.get_platform_pair().datahub_data_platform_name} function detail {data_access_func_detail}"
         )
@@ -826,12 +851,15 @@ def create_dataplatform_tables(
             qualified_table_name=qualified_table_name,
         )
 
-        return [
-            DataPlatformTable(
-                data_platform_pair=self.get_platform_pair(),
-                urn=urn,
-            )
-        ]
+        return Lineage(
+            upstreams=[
+                DataPlatformTable(
+                    data_platform_pair=self.get_platform_pair(),
+                    urn=urn,
+                )
+            ],
+            column_lineage=[],
+        )
 
 
 class SnowflakeDataPlatformTableCreator(DefaultThreeStepDataAccessSources):
@@ -859,9 +887,9 @@ class AmazonRedshiftDataPlatformTableCreator(AbstractDataPlatformTableCreator):
     def get_platform_pair(self) -> DataPlatformPair:
         return SupportedDataPlatform.AMAZON_REDSHIFT.value
 
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         logger.debug(
             f"Processing AmazonRedshift data-access function detail {data_access_func_detail}"
         )
@@ -870,7 +898,7 @@ def create_dataplatform_tables(
             data_access_func_detail.arg_list
         )
         if db_name is None or server is None:
-            return []  # Return empty list
+            return Lineage.empty()  # Return empty list
 
         schema_name: str = cast(
             IdentifierAccessor, data_access_func_detail.identifier_accessor
@@ -891,12 +919,15 @@ def create_dataplatform_tables(
             qualified_table_name=qualified_table_name,
         )
 
-        return [
-            DataPlatformTable(
-                data_platform_pair=self.get_platform_pair(),
-                urn=urn,
-            )
-        ]
+        return Lineage(
+            upstreams=[
+                DataPlatformTable(
+                    data_platform_pair=self.get_platform_pair(),
+                    urn=urn,
+                )
+            ],
+            column_lineage=[],
+        )
 
 
 class NativeQueryDataPlatformTableCreator(AbstractDataPlatformTableCreator):
@@ -916,9 +947,7 @@ def is_native_parsing_supported(data_access_function_name: str) -> bool:
             in NativeQueryDataPlatformTableCreator.SUPPORTED_NATIVE_QUERY_DATA_PLATFORM
         )
 
-    def create_urn_using_old_parser(
-        self, query: str, server: str
-    ) -> List[DataPlatformTable]:
+    def create_urn_using_old_parser(self, query: str, server: str) -> Lineage:
         dataplatform_tables: List[DataPlatformTable] = []
 
         tables: List[str] = native_sql_parser.get_tables(query)
@@ -947,12 +976,14 @@ def create_urn_using_old_parser(
 
         logger.debug(f"Generated dataplatform_tables {dataplatform_tables}")
 
-        return dataplatform_tables
+        return Lineage(
+            upstreams=dataplatform_tables,
+            column_lineage=[],
+        )
 
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
-        dataplatform_tables: List[DataPlatformTable] = []
+    ) -> Lineage:
         t1: Tree = cast(
             Tree, tree_function.first_arg_list_func(data_access_func_detail.arg_list)
         )
@@ -963,7 +994,7 @@ def create_dataplatform_tables(
                 f"Expecting 2 argument, actual argument count is {len(flat_argument_list)}"
             )
             logger.debug(f"Flat argument list = {flat_argument_list}")
-            return dataplatform_tables
+            return Lineage.empty()
         data_access_tokens: List[str] = tree_function.remove_whitespaces_from_list(
             tree_function.token_values(flat_argument_list[0])
         )
@@ -981,7 +1012,7 @@ def create_dataplatform_tables(
                 f"Server is not available in argument list for data-platform {data_access_tokens[0]}. Returning empty "
                 "list"
             )
-            return dataplatform_tables
+            return Lineage.empty()
 
         self.current_data_platform = self.SUPPORTED_NATIVE_QUERY_DATA_PLATFORM[
             data_access_tokens[0]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 5d477ee090e7e..52bcef66658c8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -44,6 +44,11 @@
     StatefulIngestionSourceBase,
 )
 from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
+from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
+    FineGrainedLineage,
+    FineGrainedLineageDownstreamType,
+    FineGrainedLineageUpstreamType,
+)
 from datahub.metadata.schema_classes import (
     BrowsePathsClass,
     ChangeTypeClass,
@@ -71,6 +76,7 @@
     ViewPropertiesClass,
 )
 from datahub.utilities.dedup_list import deduplicate_list
+from datahub.utilities.sqlglot_lineage import ColumnLineageInfo
 
 # Logger instance
 logger = logging.getLogger(__name__)
@@ -165,6 +171,48 @@ def extract_dataset_schema(
         )
         return [schema_mcp]
 
+    def make_fine_grained_lineage_class(
+        self, lineage: resolver.Lineage, dataset_urn: str
+    ) -> List[FineGrainedLineage]:
+        fine_grained_lineages: List[FineGrainedLineage] = []
+
+        if (
+            self.__config.extract_column_level_lineage is False
+            or self.__config.extract_lineage is False
+        ):
+            return fine_grained_lineages
+
+        if lineage is None:
+            return fine_grained_lineages
+
+        logger.info("Extracting column level lineage")
+
+        cll: List[ColumnLineageInfo] = lineage.column_lineage
+
+        for cll_info in cll:
+            downstream = (
+                [builder.make_schema_field_urn(dataset_urn, cll_info.downstream.column)]
+                if cll_info.downstream is not None
+                and cll_info.downstream.column is not None
+                else []
+            )
+
+            upstreams = [
+                builder.make_schema_field_urn(column_ref.table, column_ref.column)
+                for column_ref in cll_info.upstreams
+            ]
+
+            fine_grained_lineages.append(
+                FineGrainedLineage(
+                    downstreamType=FineGrainedLineageDownstreamType.FIELD,
+                    downstreams=downstream,
+                    upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
+                    upstreams=upstreams,
+                )
+            )
+
+        return fine_grained_lineages
+
     def extract_lineage(
         self, table: powerbi_data_classes.Table, ds_urn: str
     ) -> List[MetadataChangeProposalWrapper]:
@@ -174,8 +222,9 @@ def extract_lineage(
         parameters = table.dataset.parameters if table.dataset else {}
 
         upstream: List[UpstreamClass] = []
+        cll_lineage: List[FineGrainedLineage] = []
 
-        upstream_dpts: List[resolver.DataPlatformTable] = parser.get_upstream_tables(
+        upstream_lineage: List[resolver.Lineage] = parser.get_upstream_tables(
             table=table,
             reporter=self.__reporter,
             platform_instance_resolver=self.__dataplatform_instance_resolver,
@@ -185,34 +234,49 @@ def extract_lineage(
         )
 
         logger.debug(
-            f"PowerBI virtual table {table.full_name} and it's upstream dataplatform tables = {upstream_dpts}"
+            f"PowerBI virtual table {table.full_name} and it's upstream dataplatform tables = {upstream_lineage}"
         )
 
-        for upstream_dpt in upstream_dpts:
-            if (
-                upstream_dpt.data_platform_pair.powerbi_data_platform_name
-                not in self.__config.dataset_type_mapping.keys()
-            ):
-                logger.debug(
-                    f"Skipping upstream table for {ds_urn}. The platform {upstream_dpt.data_platform_pair.powerbi_data_platform_name} is not part of dataset_type_mapping",
+        for lineage in upstream_lineage:
+            for upstream_dpt in lineage.upstreams:
+                if (
+                    upstream_dpt.data_platform_pair.powerbi_data_platform_name
+                    not in self.__config.dataset_type_mapping.keys()
+                ):
+                    logger.debug(
+                        f"Skipping upstream table for {ds_urn}. The platform {upstream_dpt.data_platform_pair.powerbi_data_platform_name} is not part of dataset_type_mapping",
+                    )
+                    continue
+
+                upstream_table_class = UpstreamClass(
+                    upstream_dpt.urn,
+                    DatasetLineageTypeClass.TRANSFORMED,
                 )
-                continue
 
-            upstream_table_class = UpstreamClass(
-                upstream_dpt.urn,
-                DatasetLineageTypeClass.TRANSFORMED,
-            )
+                upstream.append(upstream_table_class)
 
-            upstream.append(upstream_table_class)
+                # Add column level lineage if any
+                cll_lineage.extend(
+                    self.make_fine_grained_lineage_class(
+                        lineage=lineage,
+                        dataset_urn=ds_urn,
+                    )
+                )
 
         if len(upstream) > 0:
-            upstream_lineage = UpstreamLineageClass(upstreams=upstream)
+
+            upstream_lineage_class: UpstreamLineageClass = UpstreamLineageClass(
+                upstreams=upstream,
+                fineGrainedLineages=cll_lineage or None,
+            )
+
             logger.debug(f"Dataset urn = {ds_urn} and its lineage = {upstream_lineage}")
+
             mcp = MetadataChangeProposalWrapper(
                 entityType=Constant.DATASET,
                 changeType=ChangeTypeClass.UPSERT,
                 entityUrn=ds_urn,
-                aspect=upstream_lineage,
+                aspect=upstream_lineage_class,
             )
             mcps.append(mcp)
 
@@ -1075,6 +1139,10 @@ def report_to_datahub_work_units(
     SourceCapability.OWNERSHIP,
     "Disabled by default, configured using `extract_ownership`",
 )
+@capability(
+    SourceCapability.LINEAGE_FINE,
+    "Disabled by default, configured using `extract_column_level_lineage`. ",
+)
 class PowerBiDashboardSource(StatefulIngestionSourceBase):
     """
     This plugin extracts the following:
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json b/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json
new file mode 100644
index 0000000000000..5f92cdcfb5bde
--- /dev/null
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json
@@ -0,0 +1,1357 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "dummy",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "public issue_history",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n    PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n    TESTTABLE_Table",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "SNOWFLAKE_TESTTABLE",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,PBI_TEST.TEST.TESTTABLE,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n    #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n    #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n    #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n    then [UNIT] * 361\nelse 0),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n    #\"Added Custom2\"",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "snowflake native-query",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,operations_analytics.transformed_prod.v_aps_sme_units_v4,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,operations_analytics.transformed_prod.v_aps_sme_units_v4,PROD),monthid)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,operations_analytics.transformed_prod.v_aps_sme_units_v4,PROD),seller)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV),agent_key)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,operations_analytics.transformed_prod.v_aps_sme_units_v4,PROD),client_director)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,operations_analytics.transformed_prod.v_aps_sme_units_v4,PROD),monthid)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV),cd_agent_key)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "big-query-with-parameter",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n    Source",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "snowflake native-query-with-join",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-test-project.universal.D_WH_DATE,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = Oracle.Database(\"localhost:1521/salesdb.GSLAB.COM\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD)",
+                    "type": "TRANSFORMED"
+                },
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_forecast,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD),name)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV),name)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD),name)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV),name)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "job-history",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,salesdb.HR.EMPLOYEES,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = PostgreSQL.Database(\"localhost\"  ,   \"mics\"      ),\n  public_order_date =    Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "postgres_test_table",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,mics.public.order_date,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details",
+            "name": "dbo_book_issue",
+            "description": "hr pbi test description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #\"Added Custom1\"",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details",
+            "name": "ms_sql_native_table",
+            "description": "hr pbi test description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,library.dbo.book_issue,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User1@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "corpUserKey",
+    "aspect": {
+        "json": {
+            "username": "User1@foo.com"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User2@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "corpUserKey",
+    "aspect": {
+        "json": {
+            "username": "User2@foo.com"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "chartInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "createdFrom": "Dataset",
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
+                "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details"
+            },
+            "title": "test_tile",
+            "description": "test_tile",
+            "lastModified": {
+                "created": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                },
+                "lastModified": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                }
+            },
+            "inputs": [
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "chartKey",
+    "aspect": {
+        "json": {
+            "dashboardTool": "powerbi",
+            "chartId": "powerbi.linkedin.com/charts/B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePaths",
+    "aspect": {
+        "json": {
+            "paths": [
+                "/powerbi/demo-workspace"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "demo-workspace"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "createdFrom": "Dataset",
+                "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed",
+                "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details"
+            },
+            "title": "yearly_sales",
+            "description": "yearly_sales",
+            "lastModified": {
+                "created": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                },
+                "lastModified": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                }
+            },
+            "inputs": [
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartKey",
+    "aspect": {
+        "json": {
+            "dashboardTool": "powerbi",
+            "chartId": "powerbi.linkedin.com/charts/23212598-23b5-4980-87cc-5fc0ecd84385"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePaths",
+    "aspect": {
+        "json": {
+            "paths": [
+                "/powerbi/demo-workspace"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "demo-workspace"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePaths",
+    "aspect": {
+        "json": {
+            "paths": [
+                "/powerbi/demo-workspace"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "dashboardInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "chartCount": "2",
+                "workspaceName": "demo-workspace",
+                "workspaceId": "64ED5CAD-7C10-4684-8180-826122881108"
+            },
+            "title": "test_dashboard",
+            "description": "Description of test dashboard",
+            "charts": [
+                "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+                "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)"
+            ],
+            "datasets": [],
+            "lastModified": {
+                "created": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                },
+                "lastModified": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                }
+            },
+            "dashboardUrl": "https://localhost/dashboards/web/1"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "dashboardKey",
+    "aspect": {
+        "json": {
+            "dashboardTool": "powerbi",
+            "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:users.User1@foo.com",
+                    "type": "NONE"
+                },
+                {
+                    "owner": "urn:li:corpuser:users.User2@foo.com",
+                    "type": "NONE"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "demo-workspace"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "dummy",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User1@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/91580e0e-1680-4b1c-bbf9-4f6764d7a5ff/details",
+            "name": "employee_ctc",
+            "description": "Employee Management",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User2@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 2fcbf5a0c0860..2e9c02ef759a5 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -15,8 +15,9 @@
     AbstractDataPlatformInstanceResolver,
     create_dataplatform_instance_resolver,
 )
-from datahub.ingestion.source.powerbi.m_query import parser, tree_function
-from datahub.ingestion.source.powerbi.m_query.resolver import DataPlatformTable
+from datahub.ingestion.source.powerbi.m_query import parser, resolver, tree_function
+from datahub.ingestion.source.powerbi.m_query.resolver import DataPlatformTable, Lineage
+from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, DownstreamColumnRef
 
 pytestmark = pytest.mark.slow
 
@@ -70,6 +71,15 @@ def get_default_instances(
     return PipelineContext(run_id="fake"), config, platform_instance_resolver
 
 
+def combine_upstreams_from_lineage(lineage: List[Lineage]) -> List[DataPlatformTable]:
+    data_platforms: List[DataPlatformTable] = []
+
+    for item in lineage:
+        data_platforms.extend(item.upstreams)
+
+    return data_platforms
+
+
 @pytest.mark.integration
 def test_parse_m_query1():
     expression: str = M_QUERIES[0]
@@ -182,7 +192,7 @@ def test_snowflake_regular_case():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -212,7 +222,7 @@ def test_postgres_regular_case():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -242,7 +252,7 @@ def test_databricks_regular_case():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -272,7 +282,7 @@ def test_oracle_regular_case():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -302,7 +312,7 @@ def test_mssql_regular_case():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -348,7 +358,7 @@ def test_mssql_with_query():
             ctx=ctx,
             config=config,
             platform_instance_resolver=platform_instance_resolver,
-        )
+        )[0].upstreams
 
         assert len(data_platform_tables) == 1
         assert data_platform_tables[0].urn == expected_tables[index]
@@ -388,7 +398,7 @@ def test_snowflake_native_query():
             ctx=ctx,
             config=config,
             platform_instance_resolver=platform_instance_resolver,
-        )
+        )[0].upstreams
 
         assert len(data_platform_tables) == 1
         assert data_platform_tables[0].urn == expected_tables[index]
@@ -410,7 +420,7 @@ def test_google_bigquery_1():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -442,7 +452,7 @@ def test_google_bigquery_2():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -472,7 +482,7 @@ def test_for_each_expression_1():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -501,7 +511,7 @@ def test_for_each_expression_2():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -523,15 +533,15 @@ def test_native_query_disabled():
     reporter = PowerBiDashboardSourceReport()
 
     ctx, config, platform_instance_resolver = get_default_instances()
-    config.native_query_parsing = False
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+    config.native_query_parsing = False  # Disable native query parsing
+    lineage: List[Lineage] = parser.get_upstream_tables(
         table,
         reporter,
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
     )
-    assert len(data_platform_tables) == 0
+    assert len(lineage) == 0
 
 
 @pytest.mark.integration
@@ -548,12 +558,14 @@ def test_multi_source_table():
 
     ctx, config, platform_instance_resolver = get_default_instances()
 
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-        table,
-        reporter,
-        ctx=ctx,
-        config=config,
-        platform_instance_resolver=platform_instance_resolver,
+    data_platform_tables: List[DataPlatformTable] = combine_upstreams_from_lineage(
+        parser.get_upstream_tables(
+            table,
+            reporter,
+            ctx=ctx,
+            config=config,
+            platform_instance_resolver=platform_instance_resolver,
+        )
     )
 
     assert len(data_platform_tables) == 2
@@ -581,12 +593,14 @@ def test_table_combine():
 
     ctx, config, platform_instance_resolver = get_default_instances()
 
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-        table,
-        reporter,
-        ctx=ctx,
-        config=config,
-        platform_instance_resolver=platform_instance_resolver,
+    data_platform_tables: List[DataPlatformTable] = combine_upstreams_from_lineage(
+        parser.get_upstream_tables(
+            table,
+            reporter,
+            ctx=ctx,
+            config=config,
+            platform_instance_resolver=platform_instance_resolver,
+        )
     )
 
     assert len(data_platform_tables) == 2
@@ -624,7 +638,7 @@ def test_expression_is_none():
 
     ctx, config, platform_instance_resolver = get_default_instances()
 
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+    lineage: List[Lineage] = parser.get_upstream_tables(
         table,
         reporter,
         ctx=ctx,
@@ -632,7 +646,7 @@ def test_expression_is_none():
         platform_instance_resolver=platform_instance_resolver,
     )
 
-    assert len(data_platform_tables) == 0
+    assert len(lineage) == 0
 
 
 def test_redshift_regular_case():
@@ -651,7 +665,7 @@ def test_redshift_regular_case():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -678,7 +692,7 @@ def test_redshift_native_query():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -708,7 +722,7 @@ def test_sqlglot_parser():
         }
     )
 
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+    lineage: List[resolver.Lineage] = parser.get_upstream_tables(
         table,
         reporter,
         ctx=ctx,
@@ -716,6 +730,8 @@ def test_sqlglot_parser():
         platform_instance_resolver=platform_instance_resolver,
     )
 
+    data_platform_tables: List[DataPlatformTable] = lineage[0].upstreams
+
     assert len(data_platform_tables) == 2
     assert (
         data_platform_tables[0].urn
@@ -725,3 +741,76 @@ def test_sqlglot_parser():
         data_platform_tables[1].urn
         == "urn:li:dataset:(urn:li:dataPlatform:snowflake,sales_deployment.operations_analytics.transformed_prod.v_sme_unit_targets,PROD)"
     )
+
+    assert lineage[0].column_lineage == [
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="client_director"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="tier"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column='upper("manager")'),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="team_type"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="date_target"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="monthid"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="target_team"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="seller_email"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="agent_key"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="sme_quota"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="revenue_quota"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="service_quota"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="bl_target"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="software_quota"),
+            upstreams=[],
+            logic=None,
+        ),
+    ]
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 044532021a19c..b0695e3ea9954 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -1,4 +1,5 @@
 import logging
+import re
 import sys
 from typing import Any, Dict, List, cast
 from unittest import mock
@@ -1127,7 +1128,7 @@ def test_dataset_type_mapping_error(
     """
     register_mock_api(request_mock=requests_mock)
 
-    try:
+    with pytest.raises(Exception, match=r"dataset_type_mapping is deprecated"):
         Pipeline.create(
             {
                 "run_id": "powerbi-test",
@@ -1150,11 +1151,6 @@ def test_dataset_type_mapping_error(
                 },
             }
         )
-    except Exception as e:
-        assert (
-            "dataset_type_mapping is deprecated. Use server_to_platform_instance only."
-            in str(e)
-        )
 
 
 @freeze_time(FROZEN_TIME)
@@ -1506,3 +1502,90 @@ def test_independent_datasets_extraction(
         output_path=tmp_path / "powerbi_independent_mces.json",
         golden_path=f"{test_resources_dir}/{golden_file}",
     )
+
+
+@freeze_time(FROZEN_TIME)
+@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
+def test_cll_extraction(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock):
+
+    test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
+
+    register_mock_api(
+        request_mock=requests_mock,
+    )
+
+    default_conf: dict = default_source_config()
+
+    del default_conf[
+        "dataset_type_mapping"
+    ]  # delete this key so that connector set it to default (all dataplatform)
+
+    pipeline = Pipeline.create(
+        {
+            "run_id": "powerbi-test",
+            "source": {
+                "type": "powerbi",
+                "config": {
+                    **default_conf,
+                    "extract_lineage": True,
+                    "extract_column_level_lineage": True,
+                    "enable_advance_lineage_sql_construct": True,
+                    "native_query_parsing": True,
+                    "extract_independent_datasets": True,
+                },
+            },
+            "sink": {
+                "type": "file",
+                "config": {
+                    "filename": f"{tmp_path}/powerbi_cll_mces.json",
+                },
+            },
+        }
+    )
+
+    pipeline.run()
+    pipeline.raise_from_status()
+    golden_file = "golden_test_cll.json"
+
+    mce_helpers.check_golden_file(
+        pytestconfig,
+        output_path=tmp_path / "powerbi_cll_mces.json",
+        golden_path=f"{test_resources_dir}/{golden_file}",
+    )
+
+
+@freeze_time(FROZEN_TIME)
+@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
+def test_cll_extraction_flags(
+    mock_msal, pytestconfig, tmp_path, mock_time, requests_mock
+):
+
+    register_mock_api(
+        request_mock=requests_mock,
+    )
+
+    default_conf: dict = default_source_config()
+    pattern: str = re.escape(
+        "Enable all these flags in recipe: ['native_query_parsing', 'enable_advance_lineage_sql_construct', 'extract_lineage']"
+    )
+
+    with pytest.raises(Exception, match=pattern):
+
+        Pipeline.create(
+            {
+                "run_id": "powerbi-test",
+                "source": {
+                    "type": "powerbi",
+                    "config": {
+                        **default_conf,
+                        "extract_column_level_lineage": True,
+                    },
+                },
+                "sink": {
+                    "type": "file",
+                    "config": {
+                        "filename": f"{tmp_path}/powerbi_cll_mces.json",
+                    },
+                },
+            }
+        )

From a300b39f15cd689b42b7c32ce9e5087ccf5a356e Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Wed, 4 Oct 2023 06:53:15 -0400
Subject: [PATCH 24/25] feat(ingest/airflow): airflow plugin v2 (#8853)

---
 .github/workflows/airflow-plugin.yml          |   25 +-
 .github/workflows/build-and-test.yml          |    9 +-
 docker/airflow/local_airflow.md               |    2 +-
 docs/how/updating-datahub.md                  |    3 +
 docs/lineage/airflow.md                       |  251 ++-
 .../airflow-plugin/build.gradle               |   23 +-
 .../airflow-plugin/pyproject.toml             |    1 +
 .../airflow-plugin/setup.cfg                  |   30 +-
 .../airflow-plugin/setup.py                   |   71 +-
 .../datahub_airflow_plugin/_airflow_shims.py  |   32 +
 .../src/datahub_airflow_plugin/_config.py     |   80 +
 .../_datahub_listener_module.py               |    7 +
 .../_datahub_ol_adapter.py                    |   23 +
 .../src/datahub_airflow_plugin/_extractors.py |  244 ++
 .../client/airflow_generator.py               |   69 +-
 .../datahub_listener.py                       |  494 +++++
 .../datahub_airflow_plugin/datahub_plugin.py  |  391 +---
 .../datahub_plugin_v22.py                     |  336 +++
 .../example_dags/lineage_emission_dag.py      |   22 +-
 .../datahub_airflow_plugin/hooks/datahub.py   |  115 +-
 .../{ => lineage}/_lineage_core.py            |   30 +-
 .../datahub_airflow_plugin/lineage/datahub.py |   28 +-
 .../operators/datahub.py                      |    4 +-
 .../airflow-plugin/tests/conftest.py          |    6 +
 .../tests/integration/dags/basic_iolets.py    |   34 +
 .../tests/integration/dags/simple_dag.py      |   34 +
 .../integration/dags/snowflake_operator.py    |   32 +
 .../tests/integration/dags/sqlite_operator.py |   75 +
 .../integration/goldens/v1_basic_iolets.json  |  533 +++++
 .../integration/goldens/v1_simple_dag.json    |  718 ++++++
 .../integration/goldens/v2_basic_iolets.json  |  535 +++++
 .../v2_basic_iolets_no_dag_listener.json      |  535 +++++
 .../integration/goldens/v2_simple_dag.json    |  666 ++++++
 .../v2_simple_dag_no_dag_listener.json        |  722 ++++++
 .../goldens/v2_snowflake_operator.json        |  507 +++++
 .../goldens/v2_sqlite_operator.json           | 1735 +++++++++++++++
 .../v2_sqlite_operator_no_dag_listener.json   | 1955 +++++++++++++++++
 .../integration/integration_test_dummy.py     |    2 -
 .../tests/integration/test_plugin.py          |  392 ++++
 .../airflow-plugin/tests/unit/test_airflow.py |   25 +-
 .../airflow-plugin/tests/unit/test_dummy.py   |    2 -
 .../tests/unit/test_packaging.py              |    8 +
 .../airflow-plugin/tox.ini                    |   39 +-
 metadata-ingestion/setup.py                   |   20 +-
 .../api/entities/corpgroup/corpgroup.py       |   33 +-
 .../datahub/api/entities/corpuser/corpuser.py |    9 +-
 .../datahub/api/entities/datajob/dataflow.py  |   19 +-
 .../datahub/api/entities/datajob/datajob.py   |   39 +-
 .../dataprocess/dataprocess_instance.py       |   21 +-
 .../api/entities/dataproduct/dataproduct.py   |   22 +-
 .../src/datahub/emitter/generic_emitter.py    |   31 +
 .../src/datahub/emitter/kafka_emitter.py      |    3 +-
 .../src/datahub/emitter/rest_emitter.py       |   16 +-
 .../emitter/synchronized_file_emitter.py      |   60 +
 .../src/datahub/ingestion/graph/client.py     |   17 +
 .../datahub/ingestion/source/kafka_connect.py |    4 +-
 .../ingestion/source/sql/sql_common.py        |   48 -
 .../source/sql/sqlalchemy_uri_mapper.py       |   47 +
 .../src/datahub/ingestion/source/superset.py  |    6 +-
 .../src/datahub/ingestion/source/tableau.py   |   11 +-
 .../integrations/great_expectations/action.py |    4 +-
 .../datahub/testing/compare_metadata_json.py  |   22 +-
 .../src/datahub/utilities/sqlglot_lineage.py  |   40 +-
 .../goldens/test_create_table_ddl.json        |    8 +
 .../unit/sql_parsing/test_sqlglot_lineage.py  |   15 +
 .../tests/unit/test_sql_common.py             |    7 +-
 66 files changed, 10457 insertions(+), 890 deletions(-)
 create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_listener_module.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
 rename metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/{ => lineage}/_lineage_core.py (72%)
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/conftest.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
 delete mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/integration_test_dummy.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
 delete mode 100644 metadata-ingestion-modules/airflow-plugin/tests/unit/test_dummy.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py
 create mode 100644 metadata-ingestion/src/datahub/emitter/generic_emitter.py
 create mode 100644 metadata-ingestion/src/datahub/emitter/synchronized_file_emitter.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py
 create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json

diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml
index 63bab821cc398..a250bddcc16d1 100644
--- a/.github/workflows/airflow-plugin.yml
+++ b/.github/workflows/airflow-plugin.yml
@@ -32,16 +32,21 @@ jobs:
     strategy:
       matrix:
         include:
-          - python-version: "3.7"
-            extraPythonRequirement: "apache-airflow~=2.1.0"
-          - python-version: "3.7"
-            extraPythonRequirement: "apache-airflow~=2.2.0"
+          - python-version: "3.8"
+            extra_pip_requirements: "apache-airflow~=2.1.4"
+            extra_pip_extras: plugin-v1
+          - python-version: "3.8"
+            extra_pip_requirements: "apache-airflow~=2.2.4"
+            extra_pip_extras: plugin-v1
           - python-version: "3.10"
-            extraPythonRequirement: "apache-airflow~=2.4.0"
+            extra_pip_requirements: "apache-airflow~=2.4.0"
+            extra_pip_extras: plugin-v2
           - python-version: "3.10"
-            extraPythonRequirement: "apache-airflow~=2.6.0"
+            extra_pip_requirements: "apache-airflow~=2.6.0"
+            extra_pip_extras: plugin-v2
           - python-version: "3.10"
-            extraPythonRequirement: "apache-airflow>2.6.0"
+            extra_pip_requirements: "apache-airflow>=2.7.0"
+            extra_pip_extras: plugin-v2
       fail-fast: false
     steps:
       - uses: actions/checkout@v3
@@ -51,13 +56,13 @@ jobs:
           cache: "pip"
       - name: Install dependencies
         run: ./metadata-ingestion/scripts/install_deps.sh
-      - name: Install airflow package and test  (extras ${{ matrix.extraPythonRequirement }})
-        run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:airflow-plugin:lint :metadata-ingestion-modules:airflow-plugin:testQuick
+      - name: Install airflow package and test (extras ${{ matrix.extra_pip_requirements }})
+        run: ./gradlew -Pextra_pip_requirements='${{ matrix.extra_pip_requirements }}' -Pextra_pip_extras='${{ matrix.extra_pip_extras }}' :metadata-ingestion-modules:airflow-plugin:lint :metadata-ingestion-modules:airflow-plugin:testQuick
       - name: pip freeze show list installed
         if: always()
         run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && pip freeze
       - uses: actions/upload-artifact@v3
-        if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'apache-airflow>2.6.0' }}
+        if: ${{ always() && matrix.python-version == '3.10' && matrix.extra_pip_requirements == 'apache-airflow>=2.7.0' }}
         with:
           name: Test Results (Airflow Plugin ${{ matrix.python-version}})
           path: |
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
index f6320e1bd5c9f..3f409878b191f 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -26,9 +26,9 @@ jobs:
       matrix:
         command:
           [
-            "./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :datahub-frontend:build -x :datahub-web-react:build --parallel",
+            # metadata-ingestion and airflow-plugin each have dedicated build jobs
+            "./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:airflow-plugin:check -x :datahub-frontend:build -x :datahub-web-react:build --parallel",
             "./gradlew :datahub-frontend:build :datahub-web-react:build --parallel",
-            "./gradlew :metadata-ingestion-modules:airflow-plugin:build --parallel"
           ]
         timezone:
           [
@@ -51,7 +51,8 @@ jobs:
           java-version: 11
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.7"
+          python-version: "3.10"
+          cache: pip
       - name: Gradle build (and test)
         run: |
           ${{ matrix.command }}
@@ -81,7 +82,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.7"
+          python-version: "3.10"
       - name: Download YQ
         uses: chrisdickinson/setup-yq@v1.0.1
         with:
diff --git a/docker/airflow/local_airflow.md b/docker/airflow/local_airflow.md
index 55a64f5c122c5..fbfc1d17327c5 100644
--- a/docker/airflow/local_airflow.md
+++ b/docker/airflow/local_airflow.md
@@ -1,6 +1,6 @@
 :::caution
 
-This feature is currently unmaintained. As of 0.10.0 the container described is not published alongside the DataHub CLI. If you'd like to use it, please reach out to us on the [community slack.](docs/slack.md)
+This guide is currently unmaintained. As of 0.10.0 the container described is not published alongside the DataHub CLI. If you'd like to use it, please reach out to us on the [community slack.](docs/slack.md)
 
 :::
 
diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 9b19291ee246a..4df8d435cf1c4 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -5,7 +5,10 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
 ## Next
 
 ### Breaking Changes
+
 - #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
+- #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details.
+- #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.
 
 ### Potential Downtime
 
diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md
index 49de5352f6d58..19ed1598d4c5a 100644
--- a/docs/lineage/airflow.md
+++ b/docs/lineage/airflow.md
@@ -1,74 +1,137 @@
 # Airflow Integration
 
-DataHub supports integration of
+:::note
 
-- Airflow Pipeline (DAG) metadata
-- DAG and Task run information as well as
-- Lineage information when present
+If you're looking to schedule DataHub ingestion using Airflow, see the guide on [scheduling ingestion with Airflow](../../metadata-ingestion/schedule_docs/airflow.md).
 
-You can use either the DataHub Airflow lineage plugin (recommended) or the Airflow lineage backend (deprecated).
+:::
 
-## Using Datahub's Airflow lineage plugin
+The DataHub Airflow plugin supports:
 
-:::note
+- Automatic column-level lineage extraction from various operators e.g. `SqlOperator`s (including `MySqlOperator`, `PostgresOperator`, `SnowflakeOperator`, and more), `S3FileTransformOperator`, and a few others.
+- Airflow DAG and tasks, including properties, ownership, and tags.
+- Task run information, including task successes and failures.
+- Manual lineage annotations using `inlets` and `outlets` on Airflow operators.
 
-The Airflow lineage plugin is only supported with Airflow version >= 2.0.2 or on MWAA with an Airflow version >= 2.0.2.
+There's two actively supported implementations of the plugin, with different Airflow version support.
 
-If you're using Airflow 1.x, use the Airflow lineage plugin with acryl-datahub-airflow-plugin <= 0.9.1.0.
+| Approach  | Airflow Version | Notes                                                                       |
+| --------- | --------------- | --------------------------------------------------------------------------- |
+| Plugin v2 | 2.3+            | Recommended. Requires Python 3.8+                                           |
+| Plugin v1 | 2.1+            | No automatic lineage extraction; may not extract lineage if the task fails. |
 
-:::
+If you're using Airflow older than 2.1, it's possible to use the v1 plugin with older versions of `acryl-datahub-airflow-plugin`. See the [compatibility section](#compatibility) for more details.
 
-This plugin registers a task success/failure callback on every task with a cluster policy and emits DataHub events from that. This allows this plugin to be able to register both task success as well as failures compared to the older Airflow Lineage Backend which could only support emitting task success.
+<!-- TODO: Update the local Airflow guide and link to it here. -->
+<!-- If you are looking to run Airflow and DataHub using docker locally, follow the guide [here](../../docker/airflow/local_airflow.md). -->
 
-### Setup
+## DataHub Plugin v2
 
-1. You need to install the required dependency in your airflow.
+### Installation
+
+The v2 plugin requires Airflow 2.3+ and Python 3.8+. If you don't meet these requirements, use the v1 plugin instead.
 
 ```shell
-pip install acryl-datahub-airflow-plugin
+pip install 'acryl-datahub-airflow-plugin[plugin-v2]'
 ```
 
-:::note
+### Configuration
 
-The [DataHub Rest](../../metadata-ingestion/sink_docs/datahub.md#datahub-rest) emitter is included in the plugin package by default. To use [DataHub Kafka](../../metadata-ingestion/sink_docs/datahub.md#datahub-kafka) install `pip install acryl-datahub-airflow-plugin[datahub-kafka]`.
+Set up a DataHub connection in Airflow.
 
-:::
+```shell
+airflow connections add  --conn-type 'datahub-rest' 'datahub_rest_default' --conn-host 'http://datahub-gms:8080' --conn-password '<optional datahub auth token>'
+```
+
+No additional configuration is required to use the plugin. However, there are some optional configuration parameters that can be set in the `airflow.cfg` file.
+
+```ini title="airflow.cfg"
+[datahub]
+# Optional - additional config here.
+enabled = True  # default
+```
+
+| Name                       | Default value        | Description                                                                              |
+| -------------------------- | -------------------- | ---------------------------------------------------------------------------------------- |
+| enabled                    | true                 | If the plugin should be enabled.                                                         |
+| conn_id                    | datahub_rest_default | The name of the datahub rest connection.                                                 |
+| cluster                    | prod                 | name of the airflow cluster                                                              |
+| capture_ownership_info     | true                 | Extract DAG ownership.                                                                   |
+| capture_tags_info          | true                 | Extract DAG tags.                                                                        |
+| capture_executions         | true                 | Extract task runs and success/failure statuses. This will show up in DataHub "Runs" tab. |
+| enable_extractors          | true                 | Enable automatic lineage extraction.                                                     |
+| disable_openlineage_plugin | true                 | Disable the OpenLineage plugin to avoid duplicative processing.                          |
+| log_level                  | _no change_          | [debug] Set the log level for the plugin.                                                |
+| debug_emitter              | false                | [debug] If true, the plugin will log the emitted events.                                 |
+
+### Automatic lineage extraction
+
+To automatically extract lineage information, the v2 plugin builds on top of Airflow's built-in [OpenLineage extractors](https://openlineage.io/docs/integrations/airflow/default-extractors).
 
-2. Disable lazy plugin loading in your airflow.cfg.
-   On MWAA you should add this config to your [Apache Airflow configuration options](https://docs.aws.amazon.com/mwaa/latest/userguide/configuring-env-variables.html#configuring-2.0-airflow-override).
+The SQL-related extractors have been updated to use DataHub's SQL parser, which is more robust than the built-in one and uses DataHub's metadata information to generate column-level lineage. We discussed the DataHub SQL parser, including why schema-aware parsing works better and how it performs on benchmarks, during the [June 2023 community town hall](https://youtu.be/1QVcUmRQK5E?si=U27zygR7Gi_KdkzE&t=2309).
+
+## DataHub Plugin v1
+
+### Installation
+
+The v1 plugin requires Airflow 2.1+ and Python 3.8+. If you're on older versions, it's still possible to use an older version of the plugin. See the [compatibility section](#compatibility) for more details.
+
+If you're using Airflow 2.3+, we recommend using the v2 plugin instead. If you need to use the v1 plugin with Airflow 2.3+, you must also set the environment variable `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN=true`.
+
+```shell
+pip install 'acryl-datahub-airflow-plugin[plugin-v1]'
+
+# The DataHub rest connection type is included by default.
+# To use the DataHub Kafka connection type, install the plugin with the kafka extras.
+pip install 'acryl-datahub-airflow-plugin[plugin-v1,datahub-kafka]'
+```
+
+<!-- This plugin registers a task success/failure callback on every task with a cluster policy and emits DataHub events from that. This allows this plugin to be able to register both task success as well as failures compared to the older Airflow Lineage Backend which could only support emitting task success. -->
+
+### Configuration
+
+#### Disable lazy plugin loading
 
 ```ini title="airflow.cfg"
 [core]
 lazy_load_plugins = False
 ```
 
-3. You must configure an Airflow hook for Datahub. We support both a Datahub REST hook and a Kafka-based hook, but you only need one.
+On MWAA you should add this config to your [Apache Airflow configuration options](https://docs.aws.amazon.com/mwaa/latest/userguide/configuring-env-variables.html#configuring-2.0-airflow-override).
+
+#### Setup a DataHub connection
 
-   ```shell
-   # For REST-based:
-   airflow connections add  --conn-type 'datahub_rest' 'datahub_rest_default' --conn-host 'http://datahub-gms:8080' --conn-password '<optional datahub auth token>'
-   # For Kafka-based (standard Kafka sink config can be passed via extras):
-   airflow connections add  --conn-type 'datahub_kafka' 'datahub_kafka_default' --conn-host 'broker:9092' --conn-extra '{}'
-   ```
+You must configure an Airflow connection for Datahub. We support both a Datahub REST and a Kafka-based connections, but you only need one.
 
-4. Add your `datahub_conn_id` and/or `cluster` to your `airflow.cfg` file if it is not align with the default values. See configuration parameters below
+```shell
+# For REST-based:
+airflow connections add  --conn-type 'datahub_rest' 'datahub_rest_default' --conn-host 'http://datahub-gms:8080' --conn-password '<optional datahub auth token>'
+# For Kafka-based (standard Kafka sink config can be passed via extras):
+airflow connections add  --conn-type 'datahub_kafka' 'datahub_kafka_default' --conn-host 'broker:9092' --conn-extra '{}'
+```
 
-   **Configuration options:**
+#### Configure the plugin
 
-   | Name                           | Default value        | Description                                                                                                                                                                            |
-   | ------------------------------ | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-   | datahub.enabled                | true                 | If the plugin should be enabled.                                                                                                                                                       |
-   | datahub.conn_id                | datahub_rest_default | The name of the datahub connection you set in step 1.                                                                                                                                  |
-   | datahub.cluster                | prod                 | name of the airflow cluster                                                                                                                                                            |
-   | datahub.capture_ownership_info | true                 | If true, the owners field of the DAG will be capture as a DataHub corpuser.                                                                                                            |
-   | datahub.capture_tags_info      | true                 | If true, the tags field of the DAG will be captured as DataHub tags.                                                                                                                   |
-   | datahub.capture_executions     | true                 | If true, we'll capture task runs in DataHub in addition to DAG definitions.                                                                                                            |
-   | datahub.graceful_exceptions    | true                 | If set to true, most runtime errors in the lineage backend will be suppressed and will not cause the overall task to fail. Note that configuration issues will still throw exceptions. |
+If your config doesn't align with the default values, you can configure the plugin in your `airflow.cfg` file.
+
+```ini title="airflow.cfg"
+[datahub]
+enabled = true
+conn_id = datahub_rest_default  # or datahub_kafka_default
+# etc.
+```
 
-5. Configure `inlets` and `outlets` for your Airflow operators. For reference, look at the sample DAG in [`lineage_backend_demo.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py), or reference [`lineage_backend_taskflow_demo.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py) if you're using the [TaskFlow API](https://airflow.apache.org/docs/apache-airflow/stable/concepts/taskflow.html).
-6. [optional] Learn more about [Airflow lineage](https://airflow.apache.org/docs/apache-airflow/stable/lineage.html), including shorthand notation and some automation.
+| Name                   | Default value        | Description                                                                                                                                                                            |
+| ---------------------- | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| enabled                | true                 | If the plugin should be enabled.                                                                                                                                                       |
+| conn_id                | datahub_rest_default | The name of the datahub connection you set in step 1.                                                                                                                                  |
+| cluster                | prod                 | name of the airflow cluster                                                                                                                                                            |
+| capture_ownership_info | true                 | If true, the owners field of the DAG will be capture as a DataHub corpuser.                                                                                                            |
+| capture_tags_info      | true                 | If true, the tags field of the DAG will be captured as DataHub tags.                                                                                                                   |
+| capture_executions     | true                 | If true, we'll capture task runs in DataHub in addition to DAG definitions.                                                                                                            |
+| graceful_exceptions    | true                 | If set to true, most runtime errors in the lineage backend will be suppressed and will not cause the overall task to fail. Note that configuration issues will still throw exceptions. |
 
-### How to validate installation
+#### Validate that the plugin is working
 
 1. Go and check in Airflow at Admin -> Plugins menu if you can see the DataHub plugin
 2. Run an Airflow DAG. In the task logs, you should see Datahub related log messages like:
@@ -77,9 +140,22 @@ lazy_load_plugins = False
 Emitting DataHub ...
 ```
 
-### Emitting lineage via a custom operator to the Airflow Plugin
+## Manual Lineage Annotation
+
+### Using `inlets` and `outlets`
+
+You can manually annotate lineage by setting `inlets` and `outlets` on your Airflow operators. This is useful if you're using an operator that doesn't support automatic lineage extraction, or if you want to override the automatic lineage extraction.
+
+We have a few code samples that demonstrate how to use `inlets` and `outlets`:
 
-If you have created a custom Airflow operator [docs](https://airflow.apache.org/docs/apache-airflow/stable/howto/custom-operator.html) that inherits from the BaseOperator class,
+- [`lineage_backend_demo.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py)
+- [`lineage_backend_taskflow_demo.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py) - uses the [TaskFlow API](https://airflow.apache.org/docs/apache-airflow/stable/concepts/taskflow.html)
+
+For more information, take a look at the [Airflow lineage docs](https://airflow.apache.org/docs/apache-airflow/stable/lineage.html).
+
+### Custom Operators
+
+If you have created a [custom Airflow operator](https://airflow.apache.org/docs/apache-airflow/stable/howto/custom-operator.html) that inherits from the BaseOperator class,
 when overriding the `execute` function, set inlets and outlets via `context['ti'].task.inlets` and `context['ti'].task.outlets`.
 The DataHub Airflow plugin will then pick up those inlets and outlets after the task runs.
 
@@ -90,7 +166,7 @@ class DbtOperator(BaseOperator):
     def execute(self, context):
         # do something
         inlets, outlets = self._get_lineage()
-        # inlets/outlets are lists of either datahub_provider.entities.Dataset or datahub_provider.entities.Urn
+        # inlets/outlets are lists of either datahub_airflow_plugin.entities.Dataset or datahub_airflow_plugin.entities.Urn
         context['ti'].task.inlets = self.inlets
         context['ti'].task.outlets = self.outlets
 
@@ -100,78 +176,25 @@ class DbtOperator(BaseOperator):
         return inlets, outlets
 ```
 
-If you override the `pre_execute` and `post_execute` function, ensure they include the `@prepare_lineage` and `@apply_lineage` decorators respectively. [source](https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/lineage.html#lineage)
-
-## Using DataHub's Airflow lineage backend (deprecated)
-
-:::caution
-
-The DataHub Airflow plugin (above) is the recommended way to integrate Airflow with DataHub. For managed services like MWAA, the lineage backend is not supported and so you must use the Airflow plugin.
-
-If you're using Airflow 1.x, we recommend using the Airflow lineage backend with acryl-datahub <= 0.9.1.0.
-
-:::
-
-:::note
-
-If you are looking to run Airflow and DataHub using docker locally, follow the guide [here](../../docker/airflow/local_airflow.md). Otherwise proceed to follow the instructions below.
-:::
-
-### Setting up Airflow to use DataHub as Lineage Backend
-
-1. You need to install the required dependency in your airflow. See <https://registry.astronomer.io/providers/datahub/modules/datahublineagebackend>
-
-```shell
-pip install acryl-datahub[airflow]
-# If you need the Kafka-based emitter/hook:
-pip install acryl-datahub[airflow,datahub-kafka]
-```
-
-2. You must configure an Airflow hook for Datahub. We support both a Datahub REST hook and a Kafka-based hook, but you only need one.
-
-   ```shell
-   # For REST-based:
-   airflow connections add  --conn-type 'datahub_rest' 'datahub_rest_default' --conn-host 'http://datahub-gms:8080' --conn-password '<optional datahub auth token>'
-   # For Kafka-based (standard Kafka sink config can be passed via extras):
-   airflow connections add  --conn-type 'datahub_kafka' 'datahub_kafka_default' --conn-host 'broker:9092' --conn-extra '{}'
-   ```
+If you override the `pre_execute` and `post_execute` function, ensure they include the `@prepare_lineage` and `@apply_lineage` decorators respectively. Reference the [Airflow docs](https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/lineage.html#lineage) for more details.
 
-3. Add the following lines to your `airflow.cfg` file.
+## Emit Lineage Directly
 
-   ```ini title="airflow.cfg"
-   [lineage]
-   backend = datahub_provider.lineage.datahub.DatahubLineageBackend
-   datahub_kwargs = {
-       "enabled": true,
-       "datahub_conn_id": "datahub_rest_default",
-       "cluster": "prod",
-       "capture_ownership_info": true,
-       "capture_tags_info": true,
-       "graceful_exceptions": true }
-   # The above indentation is important!
-   ```
+If you can't use the plugin or annotate inlets/outlets, you can also emit lineage using the `DatahubEmitterOperator`.
 
-   **Configuration options:**
+Reference [`lineage_emission_dag.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py) for a full example.
 
-   - `datahub_conn_id` (required): Usually `datahub_rest_default` or `datahub_kafka_default`, depending on what you named the connection in step 1.
-   - `cluster` (defaults to "prod"): The "cluster" to associate Airflow DAGs and tasks with.
-   - `capture_ownership_info` (defaults to true): If true, the owners field of the DAG will be capture as a DataHub corpuser.
-   - `capture_tags_info` (defaults to true): If true, the tags field of the DAG will be captured as DataHub tags.
-   - `capture_executions` (defaults to false): If true, it captures task runs as DataHub DataProcessInstances.
-   - `graceful_exceptions` (defaults to true): If set to true, most runtime errors in the lineage backend will be suppressed and will not cause the overall task to fail. Note that configuration issues will still throw exceptions.
+In order to use this example, you must first configure the Datahub hook. Like in ingestion, we support a Datahub REST hook and a Kafka-based hook. See the plugin configuration for examples.
 
-4. Configure `inlets` and `outlets` for your Airflow operators. For reference, look at the sample DAG in [`lineage_backend_demo.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py), or reference [`lineage_backend_taskflow_demo.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py) if you're using the [TaskFlow API](https://airflow.apache.org/docs/apache-airflow/stable/concepts/taskflow.html).
-5. [optional] Learn more about [Airflow lineage](https://airflow.apache.org/docs/apache-airflow/stable/lineage.html), including shorthand notation and some automation.
-
-## Emitting lineage via a separate operator
-
-Take a look at this sample DAG:
+## Debugging
 
-- [`lineage_emission_dag.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py) - emits lineage using the DatahubEmitterOperator.
+### Missing lineage
 
-In order to use this example, you must first configure the Datahub hook. Like in ingestion, we support a Datahub REST hook and a Kafka-based hook. See step 1 above for details.
+If you're not seeing lineage in DataHub, check the following:
 
-## Debugging
+- Validate that the plugin is loaded in Airflow. Go to Admin -> Plugins and check that the DataHub plugin is listed.
+- If using the v2 plugin's automatic lineage, ensure that the `enable_extractors` config is set to true and that automatic lineage is supported for your operator.
+- If using manual lineage annotation, ensure that you're using the `datahub_airflow_plugin.entities.Dataset` or `datahub_airflow_plugin.entities.Urn` classes for your inlets and outlets.
 
 ### Incorrect URLs
 
@@ -179,9 +202,21 @@ If your URLs aren't being generated correctly (usually they'll start with `http:
 
 ```ini title="airflow.cfg"
 [webserver]
-base_url = http://airflow.example.com
+base_url = http://airflow.mycorp.example.com
 ```
 
+## Compatibility
+
+We no longer officially support Airflow <2.1. However, you can use older versions of `acryl-datahub-airflow-plugin` with older versions of Airflow.
+Both of these options support Python 3.7+.
+
+- Airflow 1.10.x, use DataHub plugin v1 with acryl-datahub-airflow-plugin <= 0.9.1.0.
+- Airflow 2.0.x, use DataHub plugin v1 with acryl-datahub-airflow-plugin <= 0.11.0.1.
+
+DataHub also previously supported an Airflow [lineage backend](https://airflow.apache.org/docs/apache-airflow/2.2.0/lineage.html#lineage-backend) implementation. While the implementation is still in our codebase, it is deprecated and will be removed in a future release.
+Note that the lineage backend did not support automatic lineage extraction, did not capture task failures, and did not work in AWS MWAA.
+The [documentation for the lineage backend](https://docs-website-1wmaehubl-acryldata.vercel.app/docs/lineage/airflow/#using-datahubs-airflow-lineage-backend-deprecated) has already been archived.
+
 ## Additional references
 
 Related Datahub videos:
diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle
index 58a2bc9e670e3..dacf12dc020df 100644
--- a/metadata-ingestion-modules/airflow-plugin/build.gradle
+++ b/metadata-ingestion-modules/airflow-plugin/build.gradle
@@ -10,6 +10,13 @@ ext {
 if (!project.hasProperty("extra_pip_requirements")) {
     ext.extra_pip_requirements = ""
 }
+if (!project.hasProperty("extra_pip_extras")) {
+    ext.extra_pip_extras = "plugin-v2"
+}
+// If extra_pip_extras is non-empty, we need to add a comma to the beginning of the string.
+if (extra_pip_extras != "") {
+    ext.extra_pip_extras = "," + extra_pip_extras
+}
 
 def pip_install_command = "${venv_name}/bin/pip install -e ../../metadata-ingestion"
 
@@ -36,7 +43,7 @@ task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingesti
   // and https://github.com/datahub-project/datahub/pull/8435.
   commandLine 'bash', '-x', '-c',
     "${pip_install_command} install 'Cython<3.0' 'PyYAML<6' --no-build-isolation && " +
-    "${pip_install_command} -e . ${extra_pip_requirements} &&" +
+    "${pip_install_command} -e .[ignore${extra_pip_extras}] ${extra_pip_requirements} &&" +
     "touch ${sentinel_file}"
 }
 
@@ -47,7 +54,7 @@ task installDev(type: Exec, dependsOn: [install]) {
   inputs.file file('setup.py')
   outputs.file("${sentinel_file}")
   commandLine 'bash', '-x', '-c',
-    "${pip_install_command} -e .[dev]  ${extra_pip_requirements} && " +
+    "${pip_install_command} -e .[dev${extra_pip_extras}] ${extra_pip_requirements} && " +
     "touch ${sentinel_file}"
 }
 
@@ -79,7 +86,8 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
   outputs.dir("${venv_name}")
   outputs.file("${sentinel_file}")
   commandLine 'bash', '-x', '-c',
-    "${pip_install_command} -e .[dev,integration-tests] && touch ${sentinel_file}"
+    "${pip_install_command} -e .[dev,integration-tests${extra_pip_extras}] ${extra_pip_requirements} && " +
+    "touch ${sentinel_file}"
 }
 
 def testFile = hasProperty('testFile') ? testFile : 'unknown'
@@ -97,20 +105,13 @@ task testSingle(dependsOn: [installDevTest]) {
 }
 
 task testQuick(type: Exec, dependsOn: installDevTest) {
-  // We can't enforce the coverage requirements if we run a subset of the tests.
   inputs.files(project.fileTree(dir: "src/", include: "**/*.py"))
   inputs.files(project.fileTree(dir: "tests/"))
-  outputs.dir("${venv_name}")
   commandLine 'bash', '-x', '-c',
-    "source ${venv_name}/bin/activate && pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
+    "source ${venv_name}/bin/activate && pytest  -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
 }
 
 
-task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) {
-  commandLine 'bash', '-x', '-c',
-    "source ${venv_name}/bin/activate && pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
-}
-
 task cleanPythonCache(type: Exec) {
   commandLine 'bash', '-c',
     "find src -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete"
diff --git a/metadata-ingestion-modules/airflow-plugin/pyproject.toml b/metadata-ingestion-modules/airflow-plugin/pyproject.toml
index fba81486b9f67..648040c1951db 100644
--- a/metadata-ingestion-modules/airflow-plugin/pyproject.toml
+++ b/metadata-ingestion-modules/airflow-plugin/pyproject.toml
@@ -12,6 +12,7 @@ include = '\.pyi?$'
 
 [tool.isort]
 indent = '    '
+known_future_library = ['__future__', 'datahub.utilities._markupsafe_compat', 'datahub_provider._airflow_compat']
 profile = 'black'
 sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
 
diff --git a/metadata-ingestion-modules/airflow-plugin/setup.cfg b/metadata-ingestion-modules/airflow-plugin/setup.cfg
index 157bcce1c298d..c25256c5751b8 100644
--- a/metadata-ingestion-modules/airflow-plugin/setup.cfg
+++ b/metadata-ingestion-modules/airflow-plugin/setup.cfg
@@ -41,29 +41,29 @@ ignore_missing_imports = no
 
 [tool:pytest]
 asyncio_mode = auto
-addopts = --cov=src --cov-report term-missing --cov-config setup.cfg --strict-markers
+addopts = --cov=src --cov-report='' --cov-config setup.cfg --strict-markers -s -v
+markers =
+    integration: marks tests to only run in integration (deselect with '-m "not integration"')
 
 testpaths = 
     tests/unit
     tests/integration
 
-[coverage:run]
-# Because of some quirks in the way setup.cfg, coverage.py, pytest-cov,
-# and tox interact, we should not uncomment the following line.
-# See https://pytest-cov.readthedocs.io/en/latest/config.html and
-# https://coverage.readthedocs.io/en/coverage-5.0/config.html.
-# We also have some additional pytest/cov config options in tox.ini.
-# source = src
+# [coverage:run]
+# # Because of some quirks in the way setup.cfg, coverage.py, pytest-cov,
+# # and tox interact, we should not uncomment the following line.
+# # See https://pytest-cov.readthedocs.io/en/latest/config.html and
+# # https://coverage.readthedocs.io/en/coverage-5.0/config.html.
+# # We also have some additional pytest/cov config options in tox.ini.
+# # source = src
 
-[coverage:paths]
-# This is necessary for tox-based coverage to be counted properly.
-source =
-   src
-   */site-packages
+# [coverage:paths]
+# # This is necessary for tox-based coverage to be counted properly.
+# source =
+#    src
+#    */site-packages
 
 [coverage:report]
-# The fail_under value ensures that at least some coverage data is collected.
-# We override its value in the tox config.
 show_missing = true
 exclude_lines =
     pragma: no cover
diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py
index 47069f59c314d..a5af881022d8c 100644
--- a/metadata-ingestion-modules/airflow-plugin/setup.py
+++ b/metadata-ingestion-modules/airflow-plugin/setup.py
@@ -1,5 +1,6 @@
 import os
 import pathlib
+from typing import Dict, Set
 
 import setuptools
 
@@ -13,23 +14,43 @@ def get_long_description():
     return pathlib.Path(os.path.join(root, "README.md")).read_text()
 
 
+_version = package_metadata["__version__"]
+_self_pin = f"=={_version}" if not _version.endswith("dev0") else ""
+
+
 rest_common = {"requests", "requests_file"}
 
 base_requirements = {
     # Compatibility.
     "dataclasses>=0.6; python_version < '3.7'",
-    # Typing extension should be >=3.10.0.2 ideally but we can't restrict due to Airflow 2.0.2 dependency conflict
-    "typing_extensions>=3.7.4.3 ;  python_version < '3.8'",
-    "typing_extensions>=3.10.0.2,<4.6.0 ;  python_version >= '3.8'",
     "mypy_extensions>=0.4.3",
     # Actual dependencies.
-    "typing-inspect",
     "pydantic>=1.5.1",
     "apache-airflow >= 2.0.2",
     *rest_common,
-    f"acryl-datahub == {package_metadata['__version__']}",
 }
 
+plugins: Dict[str, Set[str]] = {
+    "datahub-rest": {
+        f"acryl-datahub[datahub-rest]{_self_pin}",
+    },
+    "datahub-kafka": {
+        f"acryl-datahub[datahub-kafka]{_self_pin}",
+    },
+    "datahub-file": {
+        f"acryl-datahub[sync-file-emitter]{_self_pin}",
+    },
+    "plugin-v1": set(),
+    "plugin-v2": {
+        # The v2 plugin requires Python 3.8+.
+        f"acryl-datahub[sql-parser]{_self_pin}",
+        "openlineage-airflow==1.2.0; python_version >= '3.8'",
+    },
+}
+
+# Include datahub-rest in the base requirements.
+base_requirements.update(plugins["datahub-rest"])
+
 
 mypy_stubs = {
     "types-dataclasses",
@@ -45,11 +66,9 @@ def get_long_description():
     # versions 0.1.13 and 0.1.14 seem to have issues
     "types-click==0.1.12",
     "types-tabulate",
-    # avrogen package requires this
-    "types-pytz",
 }
 
-base_dev_requirements = {
+dev_requirements = {
     *base_requirements,
     *mypy_stubs,
     "black==22.12.0",
@@ -66,6 +85,7 @@ def get_long_description():
     "pytest-cov>=2.8.1",
     "tox",
     "deepdiff",
+    "tenacity",
     "requests-mock",
     "freezegun",
     "jsonpickle",
@@ -74,8 +94,24 @@ def get_long_description():
     "packaging",
 }
 
-dev_requirements = {
-    *base_dev_requirements,
+integration_test_requirements = {
+    *dev_requirements,
+    *plugins["datahub-file"],
+    *plugins["datahub-kafka"],
+    f"acryl-datahub[testing-utils]{_self_pin}",
+    # Extra requirements for loading our test dags.
+    "apache-airflow[snowflake]>=2.0.2",
+    # https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350
+    # Eventually we want to set this to "snowflake-sqlalchemy>=1.4.3".
+    # However, that doesn't work with older versions of Airflow. Instead
+    # of splitting this into integration-test-old and integration-test-new,
+    # adding a bound to SQLAlchemy was the simplest solution.
+    "sqlalchemy<1.4.42",
+    # To avoid https://github.com/snowflakedb/snowflake-connector-python/issues/1188,
+    # we need https://github.com/snowflakedb/snowflake-connector-python/pull/1193
+    "snowflake-connector-python>=2.7.10",
+    "virtualenv",  # needed by PythonVirtualenvOperator
+    "apache-airflow-providers-sqlite",
 }
 
 
@@ -88,7 +124,7 @@ def get_long_description():
 setuptools.setup(
     # Package metadata.
     name=package_metadata["__package_name__"],
-    version=package_metadata["__version__"],
+    version=_version,
     url="https://datahubproject.io/",
     project_urls={
         "Documentation": "https://datahubproject.io/docs/",
@@ -131,17 +167,8 @@ def get_long_description():
     # Dependencies.
     install_requires=list(base_requirements),
     extras_require={
+        **{plugin: list(dependencies) for plugin, dependencies in plugins.items()},
         "dev": list(dev_requirements),
-        "datahub-kafka": [
-            f"acryl-datahub[datahub-kafka] == {package_metadata['__version__']}"
-        ],
-        "integration-tests": [
-            f"acryl-datahub[datahub-kafka] == {package_metadata['__version__']}",
-            # Extra requirements for Airflow.
-            "apache-airflow[snowflake]>=2.0.2",  # snowflake is used in example dags
-            # Because of https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350 we need to restrict SQLAlchemy's max version.
-            "SQLAlchemy<1.4.42",
-            "virtualenv",  # needed by PythonVirtualenvOperator
-        ],
+        "integration-tests": list(integration_test_requirements),
     },
 )
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_airflow_shims.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_airflow_shims.py
index 5ad20e1f72551..10f014fbd586f 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_airflow_shims.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_airflow_shims.py
@@ -1,3 +1,7 @@
+from typing import List
+
+import airflow.version
+import packaging.version
 from airflow.models.baseoperator import BaseOperator
 
 from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED
@@ -21,7 +25,35 @@
 
 assert AIRFLOW_PATCHED
 
+# Approach suggested by https://stackoverflow.com/a/11887885/5004662.
+AIRFLOW_VERSION = packaging.version.parse(airflow.version.version)
+HAS_AIRFLOW_STANDALONE_CMD = AIRFLOW_VERSION >= packaging.version.parse("2.2.0.dev0")
+HAS_AIRFLOW_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse("2.3.0.dev0")
+HAS_AIRFLOW_DAG_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse("2.5.0.dev0")
+
+
+def get_task_inlets(operator: "Operator") -> List:
+    # From Airflow 2.4 _inlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _inlets
+    if hasattr(operator, "_inlets"):
+        return operator._inlets  # type: ignore[attr-defined, union-attr]
+    if hasattr(operator, "get_inlet_defs"):
+        return operator.get_inlet_defs()  # type: ignore[attr-defined]
+    return operator.inlets
+
+
+def get_task_outlets(operator: "Operator") -> List:
+    # From Airflow 2.4 _outlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _outlets
+    # We have to use _outlets because outlets is empty in Airflow < 2.4.0
+    if hasattr(operator, "_outlets"):
+        return operator._outlets  # type: ignore[attr-defined, union-attr]
+    if hasattr(operator, "get_outlet_defs"):
+        return operator.get_outlet_defs()
+    return operator.outlets
+
+
 __all__ = [
+    "AIRFLOW_VERSION",
+    "HAS_AIRFLOW_LISTENER_API",
     "Operator",
     "MappedOperator",
     "EmptyOperator",
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py
new file mode 100644
index 0000000000000..67843da2ba995
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py
@@ -0,0 +1,80 @@
+from typing import TYPE_CHECKING, Optional
+
+import datahub.emitter.mce_builder as builder
+from airflow.configuration import conf
+from datahub.configuration.common import ConfigModel
+
+if TYPE_CHECKING:
+    from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
+
+
+class DatahubLineageConfig(ConfigModel):
+    # This class is shared between the lineage backend and the Airflow plugin.
+    # The defaults listed here are only relevant for the lineage backend.
+    # The Airflow plugin's default values come from the fallback values in
+    # the get_lineage_config() function below.
+
+    enabled: bool = True
+
+    # DataHub hook connection ID.
+    datahub_conn_id: str
+
+    # Cluster to associate with the pipelines and tasks. Defaults to "prod".
+    cluster: str = builder.DEFAULT_FLOW_CLUSTER
+
+    # If true, the owners field of the DAG will be capture as a DataHub corpuser.
+    capture_ownership_info: bool = True
+
+    # If true, the tags field of the DAG will be captured as DataHub tags.
+    capture_tags_info: bool = True
+
+    capture_executions: bool = False
+
+    enable_extractors: bool = True
+
+    log_level: Optional[str] = None
+    debug_emitter: bool = False
+
+    disable_openlineage_plugin: bool = True
+
+    # Note that this field is only respected by the lineage backend.
+    # The Airflow plugin behaves as if it were set to True.
+    graceful_exceptions: bool = True
+
+    def make_emitter_hook(self) -> "DatahubGenericHook":
+        # This is necessary to avoid issues with circular imports.
+        from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
+
+        return DatahubGenericHook(self.datahub_conn_id)
+
+
+def get_lineage_config() -> DatahubLineageConfig:
+    """Load the DataHub plugin config from airflow.cfg."""
+
+    enabled = conf.get("datahub", "enabled", fallback=True)
+    datahub_conn_id = conf.get("datahub", "conn_id", fallback="datahub_rest_default")
+    cluster = conf.get("datahub", "cluster", fallback=builder.DEFAULT_FLOW_CLUSTER)
+    capture_tags_info = conf.get("datahub", "capture_tags_info", fallback=True)
+    capture_ownership_info = conf.get(
+        "datahub", "capture_ownership_info", fallback=True
+    )
+    capture_executions = conf.get("datahub", "capture_executions", fallback=True)
+    enable_extractors = conf.get("datahub", "enable_extractors", fallback=True)
+    log_level = conf.get("datahub", "log_level", fallback=None)
+    debug_emitter = conf.get("datahub", "debug_emitter", fallback=False)
+    disable_openlineage_plugin = conf.get(
+        "datahub", "disable_openlineage_plugin", fallback=True
+    )
+
+    return DatahubLineageConfig(
+        enabled=enabled,
+        datahub_conn_id=datahub_conn_id,
+        cluster=cluster,
+        capture_ownership_info=capture_ownership_info,
+        capture_tags_info=capture_tags_info,
+        capture_executions=capture_executions,
+        enable_extractors=enable_extractors,
+        log_level=log_level,
+        debug_emitter=debug_emitter,
+        disable_openlineage_plugin=disable_openlineage_plugin,
+    )
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_listener_module.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_listener_module.py
new file mode 100644
index 0000000000000..f39d37b122228
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_listener_module.py
@@ -0,0 +1,7 @@
+from datahub_airflow_plugin.datahub_listener import get_airflow_plugin_listener
+
+_listener = get_airflow_plugin_listener()
+if _listener:
+    on_task_instance_running = _listener.on_task_instance_running
+    on_task_instance_success = _listener.on_task_instance_success
+    on_task_instance_failed = _listener.on_task_instance_failed
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py
new file mode 100644
index 0000000000000..7d35791bf1db4
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py
@@ -0,0 +1,23 @@
+import logging
+
+import datahub.emitter.mce_builder as builder
+from openlineage.client.run import Dataset as OpenLineageDataset
+
+logger = logging.getLogger(__name__)
+
+
+OL_SCHEME_TWEAKS = {
+    "sqlserver": "mssql",
+    "trino": "presto",
+    "awsathena": "athena",
+}
+
+
+def translate_ol_to_datahub_urn(ol_uri: OpenLineageDataset) -> str:
+    namespace = ol_uri.namespace
+    name = ol_uri.name
+
+    scheme, *rest = namespace.split("://", maxsplit=1)
+
+    platform = OL_SCHEME_TWEAKS.get(scheme, scheme)
+    return builder.make_dataset_urn(platform=platform, name=name)
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py
new file mode 100644
index 0000000000000..f84b7b56f6119
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py
@@ -0,0 +1,244 @@
+import contextlib
+import logging
+import unittest.mock
+from typing import TYPE_CHECKING, Optional
+
+import datahub.emitter.mce_builder as builder
+from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
+    get_platform_from_sqlalchemy_uri,
+)
+from datahub.utilities.sqlglot_lineage import (
+    SqlParsingResult,
+    create_lineage_sql_parsed_result,
+)
+from openlineage.airflow.extractors import BaseExtractor
+from openlineage.airflow.extractors import ExtractorManager as OLExtractorManager
+from openlineage.airflow.extractors import TaskMetadata
+from openlineage.airflow.extractors.snowflake_extractor import SnowflakeExtractor
+from openlineage.airflow.extractors.sql_extractor import SqlExtractor
+from openlineage.airflow.utils import get_operator_class, try_import_from_string
+from openlineage.client.facet import (
+    ExtractionError,
+    ExtractionErrorRunFacet,
+    SqlJobFacet,
+)
+
+from datahub_airflow_plugin._airflow_shims import Operator
+from datahub_airflow_plugin._datahub_ol_adapter import OL_SCHEME_TWEAKS
+
+if TYPE_CHECKING:
+    from airflow.models import DagRun, TaskInstance
+    from datahub.ingestion.graph.client import DataHubGraph
+
+logger = logging.getLogger(__name__)
+_DATAHUB_GRAPH_CONTEXT_KEY = "datahub_graph"
+SQL_PARSING_RESULT_KEY = "datahub_sql"
+
+
+class ExtractorManager(OLExtractorManager):
+    # TODO: On Airflow 2.7, the OLExtractorManager is part of the built-in Airflow API.
+    # When available, we should use that instead. The same goe for most of the OL
+    # extractors.
+
+    def __init__(self):
+        super().__init__()
+
+        _sql_operator_overrides = [
+            # The OL BigQuery extractor has some complex logic to fetch detect
+            # the BigQuery job_id and fetch lineage from there. However, it can't
+            # generate CLL, so we disable it and use our own extractor instead.
+            "BigQueryOperator",
+            "BigQueryExecuteQueryOperator",
+            # Athena also does something similar.
+            "AthenaOperator",
+            "AWSAthenaOperator",
+            # Additional types that OL doesn't support. This is only necessary because
+            # on older versions of Airflow, these operators don't inherit from SQLExecuteQueryOperator.
+            "SqliteOperator",
+        ]
+        for operator in _sql_operator_overrides:
+            self.task_to_extractor.extractors[operator] = GenericSqlExtractor
+
+        self._graph: Optional["DataHubGraph"] = None
+
+    @contextlib.contextmanager
+    def _patch_extractors(self):
+        with contextlib.ExitStack() as stack:
+            # Patch the SqlExtractor.extract() method.
+            stack.enter_context(
+                unittest.mock.patch.object(
+                    SqlExtractor,
+                    "extract",
+                    _sql_extractor_extract,
+                )
+            )
+
+            # Patch the SnowflakeExtractor.default_schema property.
+            stack.enter_context(
+                unittest.mock.patch.object(
+                    SnowflakeExtractor,
+                    "default_schema",
+                    property(snowflake_default_schema),
+                )
+            )
+
+            # TODO: Override the BigQuery extractor to use the DataHub SQL parser.
+            # self.extractor_manager.add_extractor()
+
+            # TODO: Override the Athena extractor to use the DataHub SQL parser.
+
+            yield
+
+    def extract_metadata(
+        self,
+        dagrun: "DagRun",
+        task: "Operator",
+        complete: bool = False,
+        task_instance: Optional["TaskInstance"] = None,
+        task_uuid: Optional[str] = None,
+        graph: Optional["DataHubGraph"] = None,
+    ) -> TaskMetadata:
+        self._graph = graph
+        with self._patch_extractors():
+            return super().extract_metadata(
+                dagrun, task, complete, task_instance, task_uuid
+            )
+
+    def _get_extractor(self, task: "Operator") -> Optional[BaseExtractor]:
+        # By adding this, we can use the generic extractor as a fallback for
+        # any operator that inherits from SQLExecuteQueryOperator.
+        clazz = get_operator_class(task)
+        SQLExecuteQueryOperator = try_import_from_string(
+            "airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator"
+        )
+        if SQLExecuteQueryOperator and issubclass(clazz, SQLExecuteQueryOperator):
+            self.task_to_extractor.extractors.setdefault(
+                clazz.__name__, GenericSqlExtractor
+            )
+
+        extractor = super()._get_extractor(task)
+        if extractor:
+            extractor.set_context(_DATAHUB_GRAPH_CONTEXT_KEY, self._graph)
+        return extractor
+
+
+class GenericSqlExtractor(SqlExtractor):
+    # Note that the extract() method is patched elsewhere.
+
+    @property
+    def default_schema(self):
+        return super().default_schema
+
+    def _get_scheme(self) -> Optional[str]:
+        # Best effort conversion to DataHub platform names.
+
+        with contextlib.suppress(Exception):
+            if self.hook:
+                if hasattr(self.hook, "get_uri"):
+                    uri = self.hook.get_uri()
+                    return get_platform_from_sqlalchemy_uri(uri)
+
+        return self.conn.conn_type or super().dialect
+
+    def _get_database(self) -> Optional[str]:
+        if self.conn:
+            # For BigQuery, the "database" is the project name.
+            if hasattr(self.conn, "project_id"):
+                return self.conn.project_id
+
+            return self.conn.schema
+        return None
+
+
+def _sql_extractor_extract(self: "SqlExtractor") -> TaskMetadata:
+    # Why not override the OL sql_parse method directly, instead of overriding
+    # extract()? A few reasons:
+    #
+    # 1. We would want to pass the default_db and graph instance into our sql parser
+    #    method. The OL code doesn't pass the default_db (despite having it available),
+    #    and it's not clear how to get the graph instance into that method.
+    # 2. OL has some janky logic to fetch table schemas as part of the sql extractor.
+    #    We don't want that behavior and this lets us disable it.
+    # 3. Our SqlParsingResult already has DataHub urns, whereas using SqlMeta would
+    #    require us to convert those urns to OL uris, just for them to get converted
+    #    back to urns later on in our processing.
+
+    task_name = f"{self.operator.dag_id}.{self.operator.task_id}"
+    sql = self.operator.sql
+
+    run_facets = {}
+    job_facets = {"sql": SqlJobFacet(query=self._normalize_sql(sql))}
+
+    # Prepare to run the SQL parser.
+    graph = self.context.get(_DATAHUB_GRAPH_CONTEXT_KEY, None)
+
+    default_database = getattr(self.operator, "database", None)
+    if not default_database:
+        default_database = self.database
+    default_schema = self.default_schema
+
+    # TODO: Add better handling for sql being a list of statements.
+    if isinstance(sql, list):
+        logger.info(f"Got list of SQL statements for {task_name}. Using first one.")
+        sql = sql[0]
+
+    # Run the SQL parser.
+    scheme = self.scheme
+    platform = OL_SCHEME_TWEAKS.get(scheme, scheme)
+    self.log.debug(
+        "Running the SQL parser %s (platform=%s, default db=%s, schema=%s): %s",
+        "with graph client" if graph else "in offline mode",
+        platform,
+        default_database,
+        default_schema,
+        sql,
+    )
+    sql_parsing_result: SqlParsingResult = create_lineage_sql_parsed_result(
+        query=sql,
+        graph=graph,
+        platform=platform,
+        platform_instance=None,
+        env=builder.DEFAULT_ENV,
+        database=default_database,
+        schema=default_schema,
+    )
+    self.log.debug(f"Got sql lineage {sql_parsing_result}")
+
+    if sql_parsing_result.debug_info.error:
+        error = sql_parsing_result.debug_info.error
+        run_facets["extractionError"] = ExtractionErrorRunFacet(
+            totalTasks=1,
+            failedTasks=1,
+            errors=[
+                ExtractionError(
+                    errorMessage=str(error),
+                    stackTrace=None,
+                    task="datahub_sql_parser",
+                    taskNumber=None,
+                )
+            ],
+        )
+
+    # Save sql_parsing_result to the facets dict. It is removed from the
+    # facet dict in the extractor's processing logic.
+    run_facets[SQL_PARSING_RESULT_KEY] = sql_parsing_result  # type: ignore
+
+    return TaskMetadata(
+        name=task_name,
+        inputs=[],
+        outputs=[],
+        run_facets=run_facets,
+        job_facets=job_facets,
+    )
+
+
+def snowflake_default_schema(self: "SnowflakeExtractor") -> Optional[str]:
+    if hasattr(self.operator, "schema") and self.operator.schema is not None:
+        return self.operator.schema
+    return (
+        self.conn.extra_dejson.get("extra__snowflake__schema", "")
+        or self.conn.extra_dejson.get("schema", "")
+        or self.conn.schema
+    )
+    # TODO: Should we try a fallback of:
+    # execute_query_on_hook(self.hook, "SELECT current_schema();")[0][0]
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py
index b5e86e14d85d0..16585f70e820b 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py
@@ -1,4 +1,5 @@
-from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union, cast
+from datetime import datetime
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast
 
 from airflow.configuration import conf
 from datahub.api.entities.datajob import DataFlow, DataJob
@@ -6,6 +7,7 @@
     DataProcessInstance,
     InstanceRunResult,
 )
+from datahub.emitter.generic_emitter import Emitter
 from datahub.metadata.schema_classes import DataProcessTypeClass
 from datahub.utilities.urns.data_flow_urn import DataFlowUrn
 from datahub.utilities.urns.data_job_urn import DataJobUrn
@@ -17,8 +19,6 @@
 if TYPE_CHECKING:
     from airflow import DAG
     from airflow.models import DagRun, TaskInstance
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-    from datahub.emitter.rest_emitter import DatahubRestEmitter
 
     from datahub_airflow_plugin._airflow_shims import Operator
 
@@ -91,7 +91,7 @@ def _get_dependencies(
                 )
 
                 # if the task triggers the subdag, link it to this node in the subdag
-                if subdag_task_id in _task_downstream_task_ids(upstream_task):
+                if subdag_task_id in sorted(_task_downstream_task_ids(upstream_task)):
                     upstream_subdag_triggers.append(upstream_task_urn)
 
         # If the operator is an ExternalTaskSensor then we set the remote task as upstream.
@@ -143,7 +143,7 @@ def generate_dataflow(
         """
         id = dag.dag_id
         orchestrator = "airflow"
-        description = f"{dag.description}\n\n{dag.doc_md or ''}"
+        description = "\n\n".join(filter(None, [dag.description, dag.doc_md])) or None
         data_flow = DataFlow(
             env=cluster, id=id, orchestrator=orchestrator, description=description
         )
@@ -153,7 +153,7 @@ def generate_dataflow(
         allowed_flow_keys = [
             "_access_control",
             "_concurrency",
-            "_default_view",
+            # "_default_view",
             "catchup",
             "fileloc",
             "is_paused_upon_creation",
@@ -171,7 +171,7 @@ def generate_dataflow(
         data_flow.url = f"{base_url}/tree?dag_id={dag.dag_id}"
 
         if capture_owner and dag.owner:
-            data_flow.owners.add(dag.owner)
+            data_flow.owners.update(owner.strip() for owner in dag.owner.split(","))
 
         if capture_tags and dag.tags:
             data_flow.tags.update(dag.tags)
@@ -227,10 +227,7 @@ def generate_datajob(
 
         job_property_bag: Dict[str, str] = {}
 
-        allowed_task_keys = [
-            "_downstream_task_ids",
-            "_inlets",
-            "_outlets",
+        allowed_task_keys: List[Union[str, Tuple[str, ...]]] = [
             "_task_type",
             "_task_module",
             "depends_on_past",
@@ -243,15 +240,28 @@ def generate_datajob(
             "trigger_rule",
             "wait_for_downstream",
             # In Airflow 2.3, _downstream_task_ids was renamed to downstream_task_ids
-            "downstream_task_ids",
+            ("downstream_task_ids", "_downstream_task_ids"),
             # In Airflow 2.4, _inlets and _outlets were removed in favor of non-private versions.
-            "inlets",
-            "outlets",
+            ("inlets", "_inlets"),
+            ("outlets", "_outlets"),
         ]
 
         for key in allowed_task_keys:
-            if hasattr(task, key):
-                job_property_bag[key] = repr(getattr(task, key))
+            if isinstance(key, tuple):
+                out_key: str = key[0]
+                try_keys = key
+            else:
+                out_key = key
+                try_keys = (key,)
+
+            for k in try_keys:
+                if hasattr(task, k):
+                    v = getattr(task, k)
+                    if out_key == "downstream_task_ids":
+                        # Generate these in a consistent order.
+                        v = list(sorted(v))
+                    job_property_bag[out_key] = repr(v)
+                    break
 
         datajob.properties = job_property_bag
         base_url = conf.get("webserver", "base_url")
@@ -288,7 +298,7 @@ def create_datajob_instance(
 
     @staticmethod
     def run_dataflow(
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         cluster: str,
         dag_run: "DagRun",
         start_timestamp_millis: Optional[int] = None,
@@ -340,7 +350,7 @@ def run_dataflow(
 
     @staticmethod
     def complete_dataflow(
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         cluster: str,
         dag_run: "DagRun",
         end_timestamp_millis: Optional[int] = None,
@@ -348,7 +358,7 @@ def complete_dataflow(
     ) -> None:
         """
 
-        :param emitter: DatahubRestEmitter - the datahub rest emitter to emit the generated mcps
+        :param emitter: Emitter - the datahub emitter to emit the generated mcps
         :param cluster: str - name of the cluster
         :param dag_run: DagRun
         :param end_timestamp_millis: Optional[int] - the completion time in milliseconds if not set the current time will be used.
@@ -386,7 +396,7 @@ def complete_dataflow(
 
     @staticmethod
     def run_datajob(
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         cluster: str,
         ti: "TaskInstance",
         dag: "DAG",
@@ -413,16 +423,13 @@ def run_datajob(
         job_property_bag["end_date"] = str(ti.end_date)
         job_property_bag["execution_date"] = str(ti.execution_date)
         job_property_bag["try_number"] = str(ti.try_number - 1)
-        job_property_bag["hostname"] = str(ti.hostname)
         job_property_bag["max_tries"] = str(ti.max_tries)
         # Not compatible with Airflow 1
         if hasattr(ti, "external_executor_id"):
             job_property_bag["external_executor_id"] = str(ti.external_executor_id)
-        job_property_bag["pid"] = str(ti.pid)
         job_property_bag["state"] = str(ti.state)
         job_property_bag["operator"] = str(ti.operator)
         job_property_bag["priority_weight"] = str(ti.priority_weight)
-        job_property_bag["unixname"] = str(ti.unixname)
         job_property_bag["log_url"] = ti.log_url
         dpi.properties.update(job_property_bag)
         dpi.url = ti.log_url
@@ -442,8 +449,10 @@ def run_datajob(
                 dpi.type = DataProcessTypeClass.BATCH_AD_HOC
 
         if start_timestamp_millis is None:
-            assert ti.start_date
-            start_timestamp_millis = int(ti.start_date.timestamp() * 1000)
+            if ti.start_date:
+                start_timestamp_millis = int(ti.start_date.timestamp() * 1000)
+            else:
+                start_timestamp_millis = int(datetime.now().timestamp() * 1000)
 
         if attempt is None:
             attempt = ti.try_number
@@ -458,7 +467,7 @@ def run_datajob(
 
     @staticmethod
     def complete_datajob(
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         cluster: str,
         ti: "TaskInstance",
         dag: "DAG",
@@ -469,7 +478,7 @@ def complete_datajob(
     ) -> DataProcessInstance:
         """
 
-        :param emitter: DatahubRestEmitter
+        :param emitter: Emitter - the datahub emitter to emit the generated mcps
         :param cluster: str
         :param ti: TaskInstance
         :param dag: DAG
@@ -483,8 +492,10 @@ def complete_datajob(
             datajob = AirflowGenerator.generate_datajob(cluster, ti.task, dag)
 
         if end_timestamp_millis is None:
-            assert ti.end_date
-            end_timestamp_millis = int(ti.end_date.timestamp() * 1000)
+            if ti.end_date:
+                end_timestamp_millis = int(ti.end_date.timestamp() * 1000)
+            else:
+                end_timestamp_millis = int(datetime.now().timestamp() * 1000)
 
         if result is None:
             # We should use TaskInstanceState but it is not available in Airflow 1
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
new file mode 100644
index 0000000000000..a3f5cb489e29f
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
@@ -0,0 +1,494 @@
+import copy
+import functools
+import logging
+import threading
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast
+
+import airflow
+import datahub.emitter.mce_builder as builder
+from datahub.api.entities.datajob import DataJob
+from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
+from datahub.emitter.rest_emitter import DatahubRestEmitter
+from datahub.ingestion.graph.client import DataHubGraph
+from datahub.metadata.schema_classes import (
+    FineGrainedLineageClass,
+    FineGrainedLineageDownstreamTypeClass,
+    FineGrainedLineageUpstreamTypeClass,
+)
+from datahub.telemetry import telemetry
+from datahub.utilities.sqlglot_lineage import SqlParsingResult
+from datahub.utilities.urns.dataset_urn import DatasetUrn
+from openlineage.airflow.listener import TaskHolder
+from openlineage.airflow.utils import redact_with_exclusions
+from openlineage.client.serde import Serde
+
+from datahub_airflow_plugin._airflow_shims import (
+    HAS_AIRFLOW_DAG_LISTENER_API,
+    Operator,
+    get_task_inlets,
+    get_task_outlets,
+)
+from datahub_airflow_plugin._config import DatahubLineageConfig, get_lineage_config
+from datahub_airflow_plugin._datahub_ol_adapter import translate_ol_to_datahub_urn
+from datahub_airflow_plugin._extractors import SQL_PARSING_RESULT_KEY, ExtractorManager
+from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator
+from datahub_airflow_plugin.entities import _Entity
+
+_F = TypeVar("_F", bound=Callable[..., None])
+if TYPE_CHECKING:
+    from airflow.models import DAG, DagRun, TaskInstance
+    from sqlalchemy.orm import Session
+
+    # To placate mypy on Airflow versions that don't have the listener API,
+    # we define a dummy hookimpl that's an identity function.
+
+    def hookimpl(f: _F) -> _F:  # type: ignore[misc] # noqa: F811
+        return f
+
+else:
+    from airflow.listeners import hookimpl
+
+logger = logging.getLogger(__name__)
+
+_airflow_listener_initialized = False
+_airflow_listener: Optional["DataHubListener"] = None
+_RUN_IN_THREAD = True
+_RUN_IN_THREAD_TIMEOUT = 30
+
+
+def get_airflow_plugin_listener() -> Optional["DataHubListener"]:
+    # Using globals instead of functools.lru_cache to make testing easier.
+    global _airflow_listener_initialized
+    global _airflow_listener
+
+    if not _airflow_listener_initialized:
+        _airflow_listener_initialized = True
+
+        plugin_config = get_lineage_config()
+
+        if plugin_config.enabled:
+            _airflow_listener = DataHubListener(config=plugin_config)
+
+            if plugin_config.disable_openlineage_plugin:
+                # Deactivate the OpenLineagePlugin listener to avoid conflicts.
+                from openlineage.airflow.plugin import OpenLineagePlugin
+
+                OpenLineagePlugin.listeners = []
+
+            telemetry.telemetry_instance.ping(
+                "airflow-plugin-init",
+                {
+                    "airflow-version": airflow.__version__,
+                    "datahub-airflow-plugin": "v2",
+                    "datahub-airflow-plugin-dag-events": HAS_AIRFLOW_DAG_LISTENER_API,
+                    "capture_executions": plugin_config.capture_executions,
+                    "capture_tags": plugin_config.capture_tags_info,
+                    "capture_ownership": plugin_config.capture_ownership_info,
+                    "enable_extractors": plugin_config.enable_extractors,
+                    "disable_openlineage_plugin": plugin_config.disable_openlineage_plugin,
+                },
+            )
+    return _airflow_listener
+
+
+def run_in_thread(f: _F) -> _F:
+    # This is also responsible for catching exceptions and logging them.
+
+    @functools.wraps(f)
+    def wrapper(*args, **kwargs):
+        try:
+            if _RUN_IN_THREAD:
+                # A poor-man's timeout mechanism.
+                # This ensures that we don't hang the task if the extractors
+                # are slow or the DataHub API is slow to respond.
+
+                thread = threading.Thread(
+                    target=f, args=args, kwargs=kwargs, daemon=True
+                )
+                thread.start()
+
+                thread.join(timeout=_RUN_IN_THREAD_TIMEOUT)
+                if thread.is_alive():
+                    logger.warning(
+                        f"Thread for {f.__name__} is still running after {_RUN_IN_THREAD_TIMEOUT} seconds. "
+                        "Continuing without waiting for it to finish."
+                    )
+            else:
+                f(*args, **kwargs)
+        except Exception as e:
+            logger.exception(e)
+
+    return cast(_F, wrapper)
+
+
+class DataHubListener:
+    __name__ = "DataHubListener"
+
+    def __init__(self, config: DatahubLineageConfig):
+        self.config = config
+        self._set_log_level()
+
+        self._emitter = config.make_emitter_hook().make_emitter()
+        self._graph: Optional[DataHubGraph] = None
+        logger.info(f"DataHub plugin using {repr(self._emitter)}")
+
+        # See discussion here https://github.com/OpenLineage/OpenLineage/pull/508 for
+        # why we need to keep track of tasks ourselves.
+        self._task_holder = TaskHolder()
+
+        # In our case, we also want to cache the initial datajob object
+        # so that we can add to it when the task completes.
+        self._datajob_holder: Dict[str, DataJob] = {}
+
+        self.extractor_manager = ExtractorManager()
+
+        # This "inherits" from types.ModuleType to avoid issues with Airflow's listener plugin loader.
+        # It previously (v2.4.x and likely other versions too) would throw errors if it was not a module.
+        # https://github.com/apache/airflow/blob/e99a518970b2d349a75b1647f6b738c8510fa40e/airflow/listeners/listener.py#L56
+        # self.__class__ = types.ModuleType
+
+    @property
+    def emitter(self):
+        return self._emitter
+
+    @property
+    def graph(self) -> Optional[DataHubGraph]:
+        if self._graph:
+            return self._graph
+
+        if isinstance(self._emitter, DatahubRestEmitter) and not isinstance(
+            self._emitter, DataHubGraph
+        ):
+            # This is lazy initialized to avoid throwing errors on plugin load.
+            self._graph = self._emitter.to_graph()
+            self._emitter = self._graph
+
+        return self._graph
+
+    def _set_log_level(self) -> None:
+        """Set the log level for the plugin and its dependencies.
+
+        This may need to be called multiple times, since Airflow sometimes
+        messes with the logging configuration after the plugin is loaded.
+        In particular, the loggers may get changed when the worker starts
+        executing a task.
+        """
+
+        if self.config.log_level:
+            logging.getLogger(__name__.split(".")[0]).setLevel(self.config.log_level)
+        if self.config.debug_emitter:
+            logging.getLogger("datahub.emitter").setLevel(logging.DEBUG)
+
+    def _make_emit_callback(self) -> Callable[[Optional[Exception], str], None]:
+        def emit_callback(err: Optional[Exception], msg: str) -> None:
+            if err:
+                logger.error(f"Error sending metadata to datahub: {msg}", exc_info=err)
+
+        return emit_callback
+
+    def _extract_lineage(
+        self,
+        datajob: DataJob,
+        dagrun: "DagRun",
+        task: "Operator",
+        task_instance: "TaskInstance",
+        complete: bool = False,
+    ) -> None:
+        """
+        Combine lineage (including column lineage) from task inlets/outlets and
+        extractor-generated task_metadata and write it to the datajob. This
+        routine is also responsible for converting the lineage to DataHub URNs.
+        """
+
+        input_urns: List[str] = []
+        output_urns: List[str] = []
+        fine_grained_lineages: List[FineGrainedLineageClass] = []
+
+        task_metadata = None
+        if self.config.enable_extractors:
+            task_metadata = self.extractor_manager.extract_metadata(
+                dagrun,
+                task,
+                complete=complete,
+                task_instance=task_instance,
+                task_uuid=str(datajob.urn),
+                graph=self.graph,
+            )
+            logger.debug(f"Got task metadata: {task_metadata}")
+
+            # Translate task_metadata.inputs/outputs to DataHub URNs.
+            input_urns.extend(
+                translate_ol_to_datahub_urn(dataset) for dataset in task_metadata.inputs
+            )
+            output_urns.extend(
+                translate_ol_to_datahub_urn(dataset)
+                for dataset in task_metadata.outputs
+            )
+
+        # Add DataHub-native SQL parser results.
+        sql_parsing_result: Optional[SqlParsingResult] = None
+        if task_metadata:
+            sql_parsing_result = task_metadata.run_facets.pop(
+                SQL_PARSING_RESULT_KEY, None
+            )
+        if sql_parsing_result:
+            if sql_parsing_result.debug_info.error:
+                datajob.properties["datahub_sql_parser_error"] = str(
+                    sql_parsing_result.debug_info.error
+                )
+            if not sql_parsing_result.debug_info.table_error:
+                input_urns.extend(sql_parsing_result.in_tables)
+                output_urns.extend(sql_parsing_result.out_tables)
+
+                if sql_parsing_result.column_lineage:
+                    fine_grained_lineages.extend(
+                        FineGrainedLineageClass(
+                            upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
+                            downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
+                            upstreams=[
+                                builder.make_schema_field_urn(
+                                    upstream.table, upstream.column
+                                )
+                                for upstream in column_lineage.upstreams
+                            ],
+                            downstreams=[
+                                builder.make_schema_field_urn(
+                                    downstream.table, downstream.column
+                                )
+                                for downstream in [column_lineage.downstream]
+                                if downstream.table
+                            ],
+                        )
+                        for column_lineage in sql_parsing_result.column_lineage
+                    )
+
+        # Add DataHub-native inlets/outlets.
+        # These are filtered out by the extractor, so we need to add them manually.
+        input_urns.extend(
+            iolet.urn for iolet in get_task_inlets(task) if isinstance(iolet, _Entity)
+        )
+        output_urns.extend(
+            iolet.urn for iolet in get_task_outlets(task) if isinstance(iolet, _Entity)
+        )
+
+        # Write the lineage to the datajob object.
+        datajob.inlets.extend(DatasetUrn.create_from_string(urn) for urn in input_urns)
+        datajob.outlets.extend(
+            DatasetUrn.create_from_string(urn) for urn in output_urns
+        )
+        datajob.fine_grained_lineages.extend(fine_grained_lineages)
+
+        # Merge in extra stuff that was present in the DataJob we constructed
+        # at the start of the task.
+        if complete:
+            original_datajob = self._datajob_holder.get(str(datajob.urn), None)
+        else:
+            self._datajob_holder[str(datajob.urn)] = datajob
+            original_datajob = None
+
+        if original_datajob:
+            logger.debug("Merging start datajob into finish datajob")
+            datajob.inlets.extend(original_datajob.inlets)
+            datajob.outlets.extend(original_datajob.outlets)
+            datajob.fine_grained_lineages.extend(original_datajob.fine_grained_lineages)
+
+            for k, v in original_datajob.properties.items():
+                datajob.properties.setdefault(k, v)
+
+        # Deduplicate inlets/outlets.
+        datajob.inlets = list(sorted(set(datajob.inlets), key=lambda x: str(x)))
+        datajob.outlets = list(sorted(set(datajob.outlets), key=lambda x: str(x)))
+
+        # Write all other OL facets as DataHub properties.
+        if task_metadata:
+            for k, v in task_metadata.job_facets.items():
+                datajob.properties[f"openlineage_job_facet_{k}"] = Serde.to_json(
+                    redact_with_exclusions(v)
+                )
+
+            for k, v in task_metadata.run_facets.items():
+                datajob.properties[f"openlineage_run_facet_{k}"] = Serde.to_json(
+                    redact_with_exclusions(v)
+                )
+
+    @hookimpl
+    @run_in_thread
+    def on_task_instance_running(
+        self,
+        previous_state: None,
+        task_instance: "TaskInstance",
+        session: "Session",  # This will always be QUEUED
+    ) -> None:
+        self._set_log_level()
+
+        # This if statement mirrors the logic in https://github.com/OpenLineage/OpenLineage/pull/508.
+        if not hasattr(task_instance, "task"):
+            # The type ignore is to placate mypy on Airflow 2.1.x.
+            logger.warning(
+                f"No task set for task_id: {task_instance.task_id} - "  # type: ignore[attr-defined]
+                f"dag_id: {task_instance.dag_id} - run_id {task_instance.run_id}"  # type: ignore[attr-defined]
+            )
+            return
+
+        logger.debug(
+            f"DataHub listener got notification about task instance start for {task_instance.task_id}"
+        )
+
+        # Render templates in a copy of the task instance.
+        # This is necessary to get the correct operator args in the extractors.
+        task_instance = copy.deepcopy(task_instance)
+        task_instance.render_templates()
+
+        # The type ignore is to placate mypy on Airflow 2.1.x.
+        dagrun: "DagRun" = task_instance.dag_run  # type: ignore[attr-defined]
+        task = task_instance.task
+        dag: "DAG" = task.dag  # type: ignore[assignment]
+
+        self._task_holder.set_task(task_instance)
+
+        # Handle async operators in Airflow 2.3 by skipping deferred state.
+        # Inspired by https://github.com/OpenLineage/OpenLineage/pull/1601
+        if task_instance.next_method is not None:  # type: ignore[attr-defined]
+            return
+
+        # If we don't have the DAG listener API, we just pretend that
+        # the start of the task is the start of the DAG.
+        # This generates duplicate events, but it's better than not
+        # generating anything.
+        if not HAS_AIRFLOW_DAG_LISTENER_API:
+            self.on_dag_start(dagrun)
+
+        datajob = AirflowGenerator.generate_datajob(
+            cluster=self.config.cluster,
+            task=task,
+            dag=dag,
+            capture_tags=self.config.capture_tags_info,
+            capture_owner=self.config.capture_ownership_info,
+        )
+
+        # TODO: Make use of get_task_location to extract github urls.
+
+        # Add lineage info.
+        self._extract_lineage(datajob, dagrun, task, task_instance)
+
+        # TODO: Add handling for Airflow mapped tasks using task_instance.map_index
+
+        datajob.emit(self.emitter, callback=self._make_emit_callback())
+        logger.debug(f"Emitted DataHub Datajob start: {datajob}")
+
+        if self.config.capture_executions:
+            dpi = AirflowGenerator.run_datajob(
+                emitter=self.emitter,
+                cluster=self.config.cluster,
+                ti=task_instance,
+                dag=dag,
+                dag_run=dagrun,
+                datajob=datajob,
+                emit_templates=False,
+            )
+            logger.debug(f"Emitted DataHub DataProcess Instance start: {dpi}")
+
+        self.emitter.flush()
+
+        logger.debug(
+            f"DataHub listener finished processing notification about task instance start for {task_instance.task_id}"
+        )
+
+    def on_task_instance_finish(
+        self, task_instance: "TaskInstance", status: InstanceRunResult
+    ) -> None:
+        dagrun: "DagRun" = task_instance.dag_run  # type: ignore[attr-defined]
+        task = self._task_holder.get_task(task_instance) or task_instance.task
+        dag: "DAG" = task.dag  # type: ignore[assignment]
+
+        datajob = AirflowGenerator.generate_datajob(
+            cluster=self.config.cluster,
+            task=task,
+            dag=dag,
+            capture_tags=self.config.capture_tags_info,
+            capture_owner=self.config.capture_ownership_info,
+        )
+
+        # Add lineage info.
+        self._extract_lineage(datajob, dagrun, task, task_instance, complete=True)
+
+        datajob.emit(self.emitter, callback=self._make_emit_callback())
+        logger.debug(f"Emitted DataHub Datajob finish w/ status {status}: {datajob}")
+
+        if self.config.capture_executions:
+            dpi = AirflowGenerator.complete_datajob(
+                emitter=self.emitter,
+                cluster=self.config.cluster,
+                ti=task_instance,
+                dag=dag,
+                dag_run=dagrun,
+                datajob=datajob,
+                result=status,
+            )
+            logger.debug(
+                f"Emitted DataHub DataProcess Instance with status {status}: {dpi}"
+            )
+
+        self.emitter.flush()
+
+    @hookimpl
+    @run_in_thread
+    def on_task_instance_success(
+        self, previous_state: None, task_instance: "TaskInstance", session: "Session"
+    ) -> None:
+        self._set_log_level()
+
+        logger.debug(
+            f"DataHub listener got notification about task instance success for {task_instance.task_id}"
+        )
+        self.on_task_instance_finish(task_instance, status=InstanceRunResult.SUCCESS)
+        logger.debug(
+            f"DataHub listener finished processing task instance success for {task_instance.task_id}"
+        )
+
+    @hookimpl
+    @run_in_thread
+    def on_task_instance_failed(
+        self, previous_state: None, task_instance: "TaskInstance", session: "Session"
+    ) -> None:
+        self._set_log_level()
+
+        logger.debug(
+            f"DataHub listener got notification about task instance failure for {task_instance.task_id}"
+        )
+
+        # TODO: Handle UP_FOR_RETRY state.
+        self.on_task_instance_finish(task_instance, status=InstanceRunResult.FAILURE)
+        logger.debug(
+            f"DataHub listener finished processing task instance failure for {task_instance.task_id}"
+        )
+
+    def on_dag_start(self, dag_run: "DagRun") -> None:
+        dag = dag_run.dag
+        if not dag:
+            return
+
+        dataflow = AirflowGenerator.generate_dataflow(
+            cluster=self.config.cluster,
+            dag=dag,
+            capture_tags=self.config.capture_tags_info,
+            capture_owner=self.config.capture_ownership_info,
+        )
+        dataflow.emit(self.emitter, callback=self._make_emit_callback())
+
+    if HAS_AIRFLOW_DAG_LISTENER_API:
+
+        @hookimpl
+        @run_in_thread
+        def on_dag_run_running(self, dag_run: "DagRun", msg: str) -> None:
+            self._set_log_level()
+
+            logger.debug(
+                f"DataHub listener got notification about dag run start for {dag_run.dag_id}"
+            )
+
+            self.on_dag_start(dag_run)
+
+            self.emitter.flush()
+
+    # TODO: Add hooks for on_dag_run_success, on_dag_run_failed -> call AirflowGenerator.complete_dataflow
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py
index d1cec9e5c1b54..c96fab31647f5 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py
@@ -1,367 +1,74 @@
 import contextlib
 import logging
-import traceback
-from typing import Any, Callable, Iterable, List, Optional, Union
+import os
 
-from airflow.configuration import conf
-from airflow.lineage import PIPELINE_OUTLETS
-from airflow.models.baseoperator import BaseOperator
 from airflow.plugins_manager import AirflowPlugin
-from airflow.utils.module_loading import import_string
-from cattr import structure
-from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
 
 from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED
-from datahub_airflow_plugin._airflow_shims import MappedOperator, Operator
-from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator
-from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
-from datahub_airflow_plugin.lineage.datahub import DatahubLineageConfig
+from datahub_airflow_plugin._airflow_shims import (
+    HAS_AIRFLOW_DAG_LISTENER_API,
+    HAS_AIRFLOW_LISTENER_API,
+)
 
 assert AIRFLOW_PATCHED
 logger = logging.getLogger(__name__)
 
-TASK_ON_FAILURE_CALLBACK = "on_failure_callback"
-TASK_ON_SUCCESS_CALLBACK = "on_success_callback"
 
+_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and not os.getenv(
+    "DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN", "false"
+).lower() in ("true", "1")
 
-def get_lineage_config() -> DatahubLineageConfig:
-    """Load the lineage config from airflow.cfg."""
+if _USE_AIRFLOW_LISTENER_INTERFACE:
+    try:
+        from openlineage.airflow.utils import try_import_from_string  # noqa: F401
+    except ImportError:
+        # If v2 plugin dependencies are not installed, we fall back to v1.
+        logger.debug("Falling back to v1 plugin due to missing dependencies.")
+        _USE_AIRFLOW_LISTENER_INTERFACE = False
 
-    enabled = conf.get("datahub", "enabled", fallback=True)
-    datahub_conn_id = conf.get("datahub", "conn_id", fallback="datahub_rest_default")
-    cluster = conf.get("datahub", "cluster", fallback="prod")
-    graceful_exceptions = conf.get("datahub", "graceful_exceptions", fallback=True)
-    capture_tags_info = conf.get("datahub", "capture_tags_info", fallback=True)
-    capture_ownership_info = conf.get(
-        "datahub", "capture_ownership_info", fallback=True
-    )
-    capture_executions = conf.get("datahub", "capture_executions", fallback=True)
-    return DatahubLineageConfig(
-        enabled=enabled,
-        datahub_conn_id=datahub_conn_id,
-        cluster=cluster,
-        graceful_exceptions=graceful_exceptions,
-        capture_ownership_info=capture_ownership_info,
-        capture_tags_info=capture_tags_info,
-        capture_executions=capture_executions,
-    )
 
+with contextlib.suppress(Exception):
+    if not os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() in (
+        "true",
+        "1",
+    ):
+        # From https://github.com/apache/airflow/discussions/24463#discussioncomment-4404542
+        # I'm not exactly sure why this fixes it, but I suspect it's that this
+        # forces the proxy settings to get cached before the fork happens.
+        #
+        # For more details, see https://github.com/python/cpython/issues/58037
+        # and https://wefearchange.org/2018/11/forkmacos.rst.html
+        # and https://bugs.python.org/issue30385#msg293958
+        # An alternative fix is to set NO_PROXY='*'
 
-def _task_inlets(operator: "Operator") -> List:
-    # From Airflow 2.4 _inlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _inlets
-    if hasattr(operator, "_inlets"):
-        return operator._inlets  # type: ignore[attr-defined, union-attr]
-    return operator.inlets
+        from _scproxy import _get_proxy_settings
 
+        _get_proxy_settings()
 
-def _task_outlets(operator: "Operator") -> List:
-    # From Airflow 2.4 _outlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _outlets
-    # We have to use _outlets because outlets is empty in Airflow < 2.4.0
-    if hasattr(operator, "_outlets"):
-        return operator._outlets  # type: ignore[attr-defined, union-attr]
-    return operator.outlets
 
+class DatahubPlugin(AirflowPlugin):
+    name = "datahub_plugin"
 
-def get_inlets_from_task(task: BaseOperator, context: Any) -> Iterable[Any]:
-    # TODO: Fix for https://github.com/apache/airflow/commit/1b1f3fabc5909a447a6277cafef3a0d4ef1f01ae
-    # in Airflow 2.4.
-    # TODO: ignore/handle airflow's dataset type in our lineage
-
-    inlets: List[Any] = []
-    task_inlets = _task_inlets(task)
-    # From Airflow 2.3 this should be AbstractOperator but due to compatibility reason lets use BaseOperator
-    if isinstance(task_inlets, (str, BaseOperator)):
-        inlets = [
-            task_inlets,
-        ]
-
-    if task_inlets and isinstance(task_inlets, list):
-        inlets = []
-        task_ids = (
-            {o for o in task_inlets if isinstance(o, str)}
-            .union(op.task_id for op in task_inlets if isinstance(op, BaseOperator))
-            .intersection(task.get_flat_relative_ids(upstream=True))
-        )
-
-        from airflow.lineage import AUTO
-
-        # pick up unique direct upstream task_ids if AUTO is specified
-        if AUTO.upper() in task_inlets or AUTO.lower() in task_inlets:
-            print("Picking up unique direct upstream task_ids as AUTO is specified")
-            task_ids = task_ids.union(
-                task_ids.symmetric_difference(task.upstream_task_ids)
-            )
-
-        inlets = task.xcom_pull(
-            context, task_ids=list(task_ids), dag_id=task.dag_id, key=PIPELINE_OUTLETS
-        )
-
-        # re-instantiate the obtained inlets
-        inlets = [
-            structure(item["data"], import_string(item["type_name"]))
-            # _get_instance(structure(item, Metadata))
-            for sublist in inlets
-            if sublist
-            for item in sublist
-        ]
-
-        for inlet in task_inlets:
-            if not isinstance(inlet, str):
-                inlets.append(inlet)
-
-    return inlets
-
-
-def _make_emit_callback(
-    logger: logging.Logger,
-) -> Callable[[Optional[Exception], str], None]:
-    def emit_callback(err: Optional[Exception], msg: str) -> None:
-        if err:
-            logger.error(f"Error sending metadata to datahub: {msg}", exc_info=err)
-
-    return emit_callback
-
-
-def datahub_task_status_callback(context, status):
-    ti = context["ti"]
-    task: "BaseOperator" = ti.task
-    dag = context["dag"]
-
-    # This code is from the original airflow lineage code ->
-    # https://github.com/apache/airflow/blob/main/airflow/lineage/__init__.py
-    inlets = get_inlets_from_task(task, context)
-
-    emitter = (
-        DatahubGenericHook(context["_datahub_config"].datahub_conn_id)
-        .get_underlying_hook()
-        .make_emitter()
-    )
-
-    dataflow = AirflowGenerator.generate_dataflow(
-        cluster=context["_datahub_config"].cluster,
-        dag=dag,
-        capture_tags=context["_datahub_config"].capture_tags_info,
-        capture_owner=context["_datahub_config"].capture_ownership_info,
-    )
-    task.log.info(f"Emitting Datahub Dataflow: {dataflow}")
-    dataflow.emit(emitter, callback=_make_emit_callback(task.log))
-
-    datajob = AirflowGenerator.generate_datajob(
-        cluster=context["_datahub_config"].cluster,
-        task=task,
-        dag=dag,
-        capture_tags=context["_datahub_config"].capture_tags_info,
-        capture_owner=context["_datahub_config"].capture_ownership_info,
-    )
-
-    for inlet in inlets:
-        datajob.inlets.append(inlet.urn)
-
-    task_outlets = _task_outlets(task)
-    for outlet in task_outlets:
-        datajob.outlets.append(outlet.urn)
-
-    task.log.info(f"Emitting Datahub Datajob: {datajob}")
-    datajob.emit(emitter, callback=_make_emit_callback(task.log))
-
-    if context["_datahub_config"].capture_executions:
-        dpi = AirflowGenerator.run_datajob(
-            emitter=emitter,
-            cluster=context["_datahub_config"].cluster,
-            ti=context["ti"],
-            dag=dag,
-            dag_run=context["dag_run"],
-            datajob=datajob,
-            start_timestamp_millis=int(ti.start_date.timestamp() * 1000),
-        )
-
-        task.log.info(f"Emitted Start Datahub Dataprocess Instance: {dpi}")
-
-        dpi = AirflowGenerator.complete_datajob(
-            emitter=emitter,
-            cluster=context["_datahub_config"].cluster,
-            ti=context["ti"],
-            dag_run=context["dag_run"],
-            result=status,
-            dag=dag,
-            datajob=datajob,
-            end_timestamp_millis=int(ti.end_date.timestamp() * 1000),
-        )
-        task.log.info(f"Emitted Completed Data Process Instance: {dpi}")
-
-    emitter.flush()
-
-
-def datahub_pre_execution(context):
-    ti = context["ti"]
-    task: "BaseOperator" = ti.task
-    dag = context["dag"]
-
-    task.log.info("Running Datahub pre_execute method")
-
-    emitter = (
-        DatahubGenericHook(context["_datahub_config"].datahub_conn_id)
-        .get_underlying_hook()
-        .make_emitter()
-    )
-
-    # This code is from the original airflow lineage code ->
-    # https://github.com/apache/airflow/blob/main/airflow/lineage/__init__.py
-    inlets = get_inlets_from_task(task, context)
-
-    datajob = AirflowGenerator.generate_datajob(
-        cluster=context["_datahub_config"].cluster,
-        task=context["ti"].task,
-        dag=dag,
-        capture_tags=context["_datahub_config"].capture_tags_info,
-        capture_owner=context["_datahub_config"].capture_ownership_info,
-    )
-
-    for inlet in inlets:
-        datajob.inlets.append(inlet.urn)
-
-    task_outlets = _task_outlets(task)
-
-    for outlet in task_outlets:
-        datajob.outlets.append(outlet.urn)
-
-    task.log.info(f"Emitting Datahub dataJob {datajob}")
-    datajob.emit(emitter, callback=_make_emit_callback(task.log))
-
-    if context["_datahub_config"].capture_executions:
-        dpi = AirflowGenerator.run_datajob(
-            emitter=emitter,
-            cluster=context["_datahub_config"].cluster,
-            ti=context["ti"],
-            dag=dag,
-            dag_run=context["dag_run"],
-            datajob=datajob,
-            start_timestamp_millis=int(ti.start_date.timestamp() * 1000),
-        )
-
-        task.log.info(f"Emitting Datahub Dataprocess Instance: {dpi}")
-
-    emitter.flush()
-
-
-def _wrap_pre_execution(pre_execution):
-    def custom_pre_execution(context):
-        config = get_lineage_config()
-        if config.enabled:
-            context["_datahub_config"] = config
-            datahub_pre_execution(context)
-
-        # Call original policy
-        if pre_execution:
-            pre_execution(context)
-
-    return custom_pre_execution
-
-
-def _wrap_on_failure_callback(on_failure_callback):
-    def custom_on_failure_callback(context):
-        config = get_lineage_config()
-        if config.enabled:
-            context["_datahub_config"] = config
-            try:
-                datahub_task_status_callback(context, status=InstanceRunResult.FAILURE)
-            except Exception as e:
-                if not config.graceful_exceptions:
-                    raise e
-                else:
-                    print(f"Exception: {traceback.format_exc()}")
-
-        # Call original policy
-        if on_failure_callback:
-            on_failure_callback(context)
-
-    return custom_on_failure_callback
-
-
-def _wrap_on_success_callback(on_success_callback):
-    def custom_on_success_callback(context):
-        config = get_lineage_config()
-        if config.enabled:
-            context["_datahub_config"] = config
-            try:
-                datahub_task_status_callback(context, status=InstanceRunResult.SUCCESS)
-            except Exception as e:
-                if not config.graceful_exceptions:
-                    raise e
-                else:
-                    print(f"Exception: {traceback.format_exc()}")
-
-        # Call original policy
-        if on_success_callback:
-            on_success_callback(context)
-
-    return custom_on_success_callback
-
-
-def task_policy(task: Union[BaseOperator, MappedOperator]) -> None:
-    task.log.debug(f"Setting task policy for Dag: {task.dag_id} Task: {task.task_id}")
-    # task.add_inlets(["auto"])
-    # task.pre_execute = _wrap_pre_execution(task.pre_execute)
-
-    # MappedOperator's callbacks don't have setters until Airflow 2.X.X
-    # https://github.com/apache/airflow/issues/24547
-    # We can bypass this by going through partial_kwargs for now
-    if MappedOperator and isinstance(task, MappedOperator):  # type: ignore
-        on_failure_callback_prop: property = getattr(
-            MappedOperator, TASK_ON_FAILURE_CALLBACK
-        )
-        on_success_callback_prop: property = getattr(
-            MappedOperator, TASK_ON_SUCCESS_CALLBACK
-        )
-        if not on_failure_callback_prop.fset or not on_success_callback_prop.fset:
-            task.log.debug(
-                "Using MappedOperator's partial_kwargs instead of callback properties"
-            )
-            task.partial_kwargs[TASK_ON_FAILURE_CALLBACK] = _wrap_on_failure_callback(
-                task.on_failure_callback
+    if _USE_AIRFLOW_LISTENER_INTERFACE:
+        if HAS_AIRFLOW_DAG_LISTENER_API:
+            from datahub_airflow_plugin.datahub_listener import (  # type: ignore[misc]
+                get_airflow_plugin_listener,
             )
-            task.partial_kwargs[TASK_ON_SUCCESS_CALLBACK] = _wrap_on_success_callback(
-                task.on_success_callback
-            )
-            return
-
-    task.on_failure_callback = _wrap_on_failure_callback(task.on_failure_callback)  # type: ignore
-    task.on_success_callback = _wrap_on_success_callback(task.on_success_callback)  # type: ignore
-    # task.pre_execute = _wrap_pre_execution(task.pre_execute)
-
-
-def _wrap_task_policy(policy):
-    if policy and hasattr(policy, "_task_policy_patched_by"):
-        return policy
-
-    def custom_task_policy(task):
-        policy(task)
-        task_policy(task)
-
-    # Add a flag to the policy to indicate that we've patched it.
-    custom_task_policy._task_policy_patched_by = "datahub_plugin"  # type: ignore[attr-defined]
-    return custom_task_policy
 
+            listeners: list = list(filter(None, [get_airflow_plugin_listener()]))
 
-def _patch_policy(settings):
-    if hasattr(settings, "task_policy"):
-        datahub_task_policy = _wrap_task_policy(settings.task_policy)
-        settings.task_policy = datahub_task_policy
+        else:
+            # On Airflow < 2.5, we need the listener to be a module.
+            # This is just a quick shim layer to make that work.
+            # The DAG listener API was added at the same time as this method
+            # was fixed, so we're reusing the same check variable.
+            #
+            # Related Airflow change: https://github.com/apache/airflow/pull/27113.
+            import datahub_airflow_plugin._datahub_listener_module as _listener_module  # type: ignore[misc]
 
+            listeners = [_listener_module]
 
-def _patch_datahub_policy():
-    with contextlib.suppress(ImportError):
-        import airflow_local_settings
 
-        _patch_policy(airflow_local_settings)
-
-    from airflow.models.dagbag import settings
-
-    _patch_policy(settings)
-
-
-_patch_datahub_policy()
-
-
-class DatahubPlugin(AirflowPlugin):
-    name = "datahub_plugin"
+if not _USE_AIRFLOW_LISTENER_INTERFACE:
+    # Use the policy patcher mechanism on Airflow 2.2 and below.
+    import datahub_airflow_plugin.datahub_plugin_v22  # noqa: F401
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
new file mode 100644
index 0000000000000..046fbb5efaa03
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
@@ -0,0 +1,336 @@
+import contextlib
+import logging
+import traceback
+from typing import Any, Callable, Iterable, List, Optional, Union
+
+import airflow
+from airflow.lineage import PIPELINE_OUTLETS
+from airflow.models.baseoperator import BaseOperator
+from airflow.utils.module_loading import import_string
+from cattr import structure
+from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
+from datahub.telemetry import telemetry
+
+from datahub_airflow_plugin._airflow_shims import (
+    MappedOperator,
+    get_task_inlets,
+    get_task_outlets,
+)
+from datahub_airflow_plugin._config import get_lineage_config
+from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator
+from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
+from datahub_airflow_plugin.lineage.datahub import DatahubLineageConfig
+
+TASK_ON_FAILURE_CALLBACK = "on_failure_callback"
+TASK_ON_SUCCESS_CALLBACK = "on_success_callback"
+
+
+def get_task_inlets_advanced(task: BaseOperator, context: Any) -> Iterable[Any]:
+    # TODO: Fix for https://github.com/apache/airflow/commit/1b1f3fabc5909a447a6277cafef3a0d4ef1f01ae
+    # in Airflow 2.4.
+    # TODO: ignore/handle airflow's dataset type in our lineage
+
+    inlets: List[Any] = []
+    task_inlets = get_task_inlets(task)
+    # From Airflow 2.3 this should be AbstractOperator but due to compatibility reason lets use BaseOperator
+    if isinstance(task_inlets, (str, BaseOperator)):
+        inlets = [
+            task_inlets,
+        ]
+
+    if task_inlets and isinstance(task_inlets, list):
+        inlets = []
+        task_ids = (
+            {o for o in task_inlets if isinstance(o, str)}
+            .union(op.task_id for op in task_inlets if isinstance(op, BaseOperator))
+            .intersection(task.get_flat_relative_ids(upstream=True))
+        )
+
+        from airflow.lineage import AUTO
+
+        # pick up unique direct upstream task_ids if AUTO is specified
+        if AUTO.upper() in task_inlets or AUTO.lower() in task_inlets:
+            print("Picking up unique direct upstream task_ids as AUTO is specified")
+            task_ids = task_ids.union(
+                task_ids.symmetric_difference(task.upstream_task_ids)
+            )
+
+        inlets = task.xcom_pull(
+            context, task_ids=list(task_ids), dag_id=task.dag_id, key=PIPELINE_OUTLETS
+        )
+
+        # re-instantiate the obtained inlets
+        inlets = [
+            structure(item["data"], import_string(item["type_name"]))
+            # _get_instance(structure(item, Metadata))
+            for sublist in inlets
+            if sublist
+            for item in sublist
+        ]
+
+        for inlet in task_inlets:
+            if not isinstance(inlet, str):
+                inlets.append(inlet)
+
+    return inlets
+
+
+def _make_emit_callback(
+    logger: logging.Logger,
+) -> Callable[[Optional[Exception], str], None]:
+    def emit_callback(err: Optional[Exception], msg: str) -> None:
+        if err:
+            logger.error(f"Error sending metadata to datahub: {msg}", exc_info=err)
+
+    return emit_callback
+
+
+def datahub_task_status_callback(context, status):
+    ti = context["ti"]
+    task: "BaseOperator" = ti.task
+    dag = context["dag"]
+    config: DatahubLineageConfig = context["_datahub_config"]
+
+    # This code is from the original airflow lineage code ->
+    # https://github.com/apache/airflow/blob/main/airflow/lineage/__init__.py
+    inlets = get_task_inlets_advanced(task, context)
+
+    emitter = (
+        DatahubGenericHook(config.datahub_conn_id).get_underlying_hook().make_emitter()
+    )
+
+    dataflow = AirflowGenerator.generate_dataflow(
+        cluster=config.cluster,
+        dag=dag,
+        capture_tags=config.capture_tags_info,
+        capture_owner=config.capture_ownership_info,
+    )
+    task.log.info(f"Emitting Datahub Dataflow: {dataflow}")
+    dataflow.emit(emitter, callback=_make_emit_callback(task.log))
+
+    datajob = AirflowGenerator.generate_datajob(
+        cluster=config.cluster,
+        task=task,
+        dag=dag,
+        capture_tags=config.capture_tags_info,
+        capture_owner=config.capture_ownership_info,
+    )
+
+    for inlet in inlets:
+        datajob.inlets.append(inlet.urn)
+
+    task_outlets = get_task_outlets(task)
+    for outlet in task_outlets:
+        datajob.outlets.append(outlet.urn)
+
+    task.log.info(f"Emitting Datahub Datajob: {datajob}")
+    datajob.emit(emitter, callback=_make_emit_callback(task.log))
+
+    if config.capture_executions:
+        dpi = AirflowGenerator.run_datajob(
+            emitter=emitter,
+            cluster=config.cluster,
+            ti=ti,
+            dag=dag,
+            dag_run=context["dag_run"],
+            datajob=datajob,
+            start_timestamp_millis=int(ti.start_date.timestamp() * 1000),
+        )
+
+        task.log.info(f"Emitted Start Datahub Dataprocess Instance: {dpi}")
+
+        dpi = AirflowGenerator.complete_datajob(
+            emitter=emitter,
+            cluster=config.cluster,
+            ti=ti,
+            dag_run=context["dag_run"],
+            result=status,
+            dag=dag,
+            datajob=datajob,
+            end_timestamp_millis=int(ti.end_date.timestamp() * 1000),
+        )
+        task.log.info(f"Emitted Completed Data Process Instance: {dpi}")
+
+    emitter.flush()
+
+
+def datahub_pre_execution(context):
+    ti = context["ti"]
+    task: "BaseOperator" = ti.task
+    dag = context["dag"]
+    config: DatahubLineageConfig = context["_datahub_config"]
+
+    task.log.info("Running Datahub pre_execute method")
+
+    emitter = (
+        DatahubGenericHook(config.datahub_conn_id).get_underlying_hook().make_emitter()
+    )
+
+    # This code is from the original airflow lineage code ->
+    # https://github.com/apache/airflow/blob/main/airflow/lineage/__init__.py
+    inlets = get_task_inlets_advanced(task, context)
+
+    datajob = AirflowGenerator.generate_datajob(
+        cluster=config.cluster,
+        task=ti.task,
+        dag=dag,
+        capture_tags=config.capture_tags_info,
+        capture_owner=config.capture_ownership_info,
+    )
+
+    for inlet in inlets:
+        datajob.inlets.append(inlet.urn)
+
+    task_outlets = get_task_outlets(task)
+
+    for outlet in task_outlets:
+        datajob.outlets.append(outlet.urn)
+
+    task.log.info(f"Emitting Datahub dataJob {datajob}")
+    datajob.emit(emitter, callback=_make_emit_callback(task.log))
+
+    if config.capture_executions:
+        dpi = AirflowGenerator.run_datajob(
+            emitter=emitter,
+            cluster=config.cluster,
+            ti=ti,
+            dag=dag,
+            dag_run=context["dag_run"],
+            datajob=datajob,
+            start_timestamp_millis=int(ti.start_date.timestamp() * 1000),
+        )
+
+        task.log.info(f"Emitting Datahub Dataprocess Instance: {dpi}")
+
+    emitter.flush()
+
+
+def _wrap_pre_execution(pre_execution):
+    def custom_pre_execution(context):
+        config = get_lineage_config()
+        if config.enabled:
+            context["_datahub_config"] = config
+            datahub_pre_execution(context)
+
+        # Call original policy
+        if pre_execution:
+            pre_execution(context)
+
+    return custom_pre_execution
+
+
+def _wrap_on_failure_callback(on_failure_callback):
+    def custom_on_failure_callback(context):
+        config = get_lineage_config()
+        if config.enabled:
+            context["_datahub_config"] = config
+            try:
+                datahub_task_status_callback(context, status=InstanceRunResult.FAILURE)
+            except Exception as e:
+                if not config.graceful_exceptions:
+                    raise e
+                else:
+                    print(f"Exception: {traceback.format_exc()}")
+
+        # Call original policy
+        if on_failure_callback:
+            on_failure_callback(context)
+
+    return custom_on_failure_callback
+
+
+def _wrap_on_success_callback(on_success_callback):
+    def custom_on_success_callback(context):
+        config = get_lineage_config()
+        if config.enabled:
+            context["_datahub_config"] = config
+            try:
+                datahub_task_status_callback(context, status=InstanceRunResult.SUCCESS)
+            except Exception as e:
+                if not config.graceful_exceptions:
+                    raise e
+                else:
+                    print(f"Exception: {traceback.format_exc()}")
+
+        # Call original policy
+        if on_success_callback:
+            on_success_callback(context)
+
+    return custom_on_success_callback
+
+
+def task_policy(task: Union[BaseOperator, MappedOperator]) -> None:
+    task.log.debug(f"Setting task policy for Dag: {task.dag_id} Task: {task.task_id}")
+    # task.add_inlets(["auto"])
+    # task.pre_execute = _wrap_pre_execution(task.pre_execute)
+
+    # MappedOperator's callbacks don't have setters until Airflow 2.X.X
+    # https://github.com/apache/airflow/issues/24547
+    # We can bypass this by going through partial_kwargs for now
+    if MappedOperator and isinstance(task, MappedOperator):  # type: ignore
+        on_failure_callback_prop: property = getattr(
+            MappedOperator, TASK_ON_FAILURE_CALLBACK
+        )
+        on_success_callback_prop: property = getattr(
+            MappedOperator, TASK_ON_SUCCESS_CALLBACK
+        )
+        if not on_failure_callback_prop.fset or not on_success_callback_prop.fset:
+            task.log.debug(
+                "Using MappedOperator's partial_kwargs instead of callback properties"
+            )
+            task.partial_kwargs[TASK_ON_FAILURE_CALLBACK] = _wrap_on_failure_callback(
+                task.on_failure_callback
+            )
+            task.partial_kwargs[TASK_ON_SUCCESS_CALLBACK] = _wrap_on_success_callback(
+                task.on_success_callback
+            )
+            return
+
+    task.on_failure_callback = _wrap_on_failure_callback(task.on_failure_callback)  # type: ignore
+    task.on_success_callback = _wrap_on_success_callback(task.on_success_callback)  # type: ignore
+    # task.pre_execute = _wrap_pre_execution(task.pre_execute)
+
+
+def _wrap_task_policy(policy):
+    if policy and hasattr(policy, "_task_policy_patched_by"):
+        return policy
+
+    def custom_task_policy(task):
+        policy(task)
+        task_policy(task)
+
+    # Add a flag to the policy to indicate that we've patched it.
+    custom_task_policy._task_policy_patched_by = "datahub_plugin"  # type: ignore[attr-defined]
+    return custom_task_policy
+
+
+def _patch_policy(settings):
+    if hasattr(settings, "task_policy"):
+        datahub_task_policy = _wrap_task_policy(settings.task_policy)
+        settings.task_policy = datahub_task_policy
+
+
+def _patch_datahub_policy():
+    with contextlib.suppress(ImportError):
+        import airflow_local_settings
+
+        _patch_policy(airflow_local_settings)
+
+    from airflow.models.dagbag import settings
+
+    _patch_policy(settings)
+
+    plugin_config = get_lineage_config()
+    telemetry.telemetry_instance.ping(
+        "airflow-plugin-init",
+        {
+            "airflow-version": airflow.__version__,
+            "datahub-airflow-plugin": "v1",
+            "capture_executions": plugin_config.capture_executions,
+            "capture_tags": plugin_config.capture_tags_info,
+            "capture_ownership": plugin_config.capture_ownership_info,
+        },
+    )
+
+
+_patch_datahub_policy()
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
index f40295c6bb883..0d7cdb6b6e90a 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
@@ -2,12 +2,11 @@
 
 This example demonstrates how to emit lineage to DataHub within an Airflow DAG.
 """
-
 from datetime import timedelta
 
 import datahub.emitter.mce_builder as builder
 from airflow import DAG
-from airflow.providers.snowflake.operators.snowflake import SnowflakeOperator
+from airflow.operators.bash import BashOperator
 from airflow.utils.dates import days_ago
 
 from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator
@@ -33,23 +32,10 @@
     catchup=False,
     default_view="tree",
 ) as dag:
-    # This example shows a SnowflakeOperator followed by a lineage emission. However, the
-    # same DatahubEmitterOperator can be used to emit lineage in any context.
-
-    sql = """CREATE OR REPLACE TABLE `mydb.schema.tableC` AS
-            WITH some_table AS (
-              SELECT * FROM `mydb.schema.tableA`
-            ),
-            some_other_table AS (
-              SELECT id, some_column FROM `mydb.schema.tableB`
-            )
-            SELECT * FROM some_table
-            LEFT JOIN some_other_table ON some_table.unique_id=some_other_table.id"""
-    transformation_task = SnowflakeOperator(
-        task_id="snowflake_transformation",
+    transformation_task = BashOperator(
+        task_id="transformation_task",
         dag=dag,
-        snowflake_conn_id="snowflake_default",
-        sql=sql,
+        bash_command="echo 'This is where you might run your data tooling.'",
     )
 
     emit_lineage_task = DatahubEmitterOperator(
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py
index 8fb7363f8cad1..9604931795ccb 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py
@@ -1,7 +1,9 @@
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Tuple, Union
 
 from airflow.exceptions import AirflowException
 from airflow.hooks.base import BaseHook
+from datahub.emitter.generic_emitter import Emitter
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
     MetadataChangeEvent,
     MetadataChangeProposal,
@@ -11,6 +13,7 @@
     from airflow.models.connection import Connection
     from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
     from datahub.emitter.rest_emitter import DatahubRestEmitter
+    from datahub.emitter.synchronized_file_emitter import SynchronizedFileEmitter
     from datahub.ingestion.sink.datahub_kafka import KafkaSinkConfig
 
 
@@ -80,17 +83,24 @@ def make_emitter(self) -> "DatahubRestEmitter":
 
         return datahub.emitter.rest_emitter.DatahubRestEmitter(*self._get_config())
 
-    def emit_mces(self, mces: List[MetadataChangeEvent]) -> None:
+    def emit(
+        self,
+        items: Sequence[
+            Union[
+                MetadataChangeEvent,
+                MetadataChangeProposal,
+                MetadataChangeProposalWrapper,
+            ]
+        ],
+    ) -> None:
         emitter = self.make_emitter()
 
-        for mce in mces:
-            emitter.emit_mce(mce)
+        for item in items:
+            emitter.emit(item)
 
-    def emit_mcps(self, mcps: List[MetadataChangeProposal]) -> None:
-        emitter = self.make_emitter()
-
-        for mce in mcps:
-            emitter.emit_mcp(mce)
+    # Retained for backwards compatibility.
+    emit_mces = emit
+    emit_mcps = emit
 
 
 class DatahubKafkaHook(BaseHook):
@@ -152,7 +162,16 @@ def make_emitter(self) -> "DatahubKafkaEmitter":
         sink_config = self._get_config()
         return datahub.emitter.kafka_emitter.DatahubKafkaEmitter(sink_config)
 
-    def emit_mces(self, mces: List[MetadataChangeEvent]) -> None:
+    def emit(
+        self,
+        items: Sequence[
+            Union[
+                MetadataChangeEvent,
+                MetadataChangeProposal,
+                MetadataChangeProposalWrapper,
+            ]
+        ],
+    ) -> None:
         emitter = self.make_emitter()
         errors = []
 
@@ -160,29 +179,50 @@ def callback(exc, msg):
             if exc:
                 errors.append(exc)
 
-        for mce in mces:
-            emitter.emit_mce_async(mce, callback)
+        for mce in items:
+            emitter.emit(mce, callback)
 
         emitter.flush()
 
         if errors:
-            raise AirflowException(f"failed to push some MCEs: {errors}")
+            raise AirflowException(f"failed to push some metadata: {errors}")
 
-    def emit_mcps(self, mcps: List[MetadataChangeProposal]) -> None:
-        emitter = self.make_emitter()
-        errors = []
+    # Retained for backwards compatibility.
+    emit_mces = emit
+    emit_mcps = emit
 
-        def callback(exc, msg):
-            if exc:
-                errors.append(exc)
 
-        for mcp in mcps:
-            emitter.emit_mcp_async(mcp, callback)
+class SynchronizedFileHook(BaseHook):
+    conn_type = "datahub-file"
 
-        emitter.flush()
+    def __init__(self, datahub_conn_id: str) -> None:
+        super().__init__()
+        self.datahub_conn_id = datahub_conn_id
 
-        if errors:
-            raise AirflowException(f"failed to push some MCPs: {errors}")
+    def make_emitter(self) -> "SynchronizedFileEmitter":
+        from datahub.emitter.synchronized_file_emitter import SynchronizedFileEmitter
+
+        conn = self.get_connection(self.datahub_conn_id)
+        filename = conn.host
+        if not filename:
+            raise AirflowException("filename parameter is required")
+
+        return SynchronizedFileEmitter(filename=filename)
+
+    def emit(
+        self,
+        items: Sequence[
+            Union[
+                MetadataChangeEvent,
+                MetadataChangeProposal,
+                MetadataChangeProposalWrapper,
+            ]
+        ],
+    ) -> None:
+        emitter = self.make_emitter()
+
+        for item in items:
+            emitter.emit(item)
 
 
 class DatahubGenericHook(BaseHook):
@@ -198,7 +238,9 @@ def __init__(self, datahub_conn_id: str) -> None:
         super().__init__()
         self.datahub_conn_id = datahub_conn_id
 
-    def get_underlying_hook(self) -> Union[DatahubRestHook, DatahubKafkaHook]:
+    def get_underlying_hook(
+        self,
+    ) -> Union[DatahubRestHook, DatahubKafkaHook, SynchronizedFileHook]:
         conn = self.get_connection(self.datahub_conn_id)
 
         # We need to figure out the underlying hook type. First check the
@@ -213,6 +255,11 @@ def get_underlying_hook(self) -> Union[DatahubRestHook, DatahubKafkaHook]:
             or conn.conn_type == DatahubKafkaHook.conn_type.replace("-", "_")
         ):
             return DatahubKafkaHook(self.datahub_conn_id)
+        elif (
+            conn.conn_type == SynchronizedFileHook.conn_type
+            or conn.conn_type == SynchronizedFileHook.conn_type.replace("-", "_")
+        ):
+            return SynchronizedFileHook(self.datahub_conn_id)
         elif "rest" in self.datahub_conn_id:
             return DatahubRestHook(self.datahub_conn_id)
         elif "kafka" in self.datahub_conn_id:
@@ -222,8 +269,20 @@ def get_underlying_hook(self) -> Union[DatahubRestHook, DatahubKafkaHook]:
                 f"DataHub cannot handle conn_type {conn.conn_type} in {conn}"
             )
 
-    def make_emitter(self) -> Union["DatahubRestEmitter", "DatahubKafkaEmitter"]:
+    def make_emitter(self) -> Emitter:
         return self.get_underlying_hook().make_emitter()
 
-    def emit_mces(self, mces: List[MetadataChangeEvent]) -> None:
-        return self.get_underlying_hook().emit_mces(mces)
+    def emit(
+        self,
+        items: Sequence[
+            Union[
+                MetadataChangeEvent,
+                MetadataChangeProposal,
+                MetadataChangeProposalWrapper,
+            ]
+        ],
+    ) -> None:
+        return self.get_underlying_hook().emit(items)
+
+    # Retained for backwards compatibility.
+    emit_mces = emit
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_lineage_core.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py
similarity index 72%
rename from metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_lineage_core.py
rename to metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py
index d91c039ffa718..f5f519fa23b11 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_lineage_core.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py
@@ -1,11 +1,10 @@
 from datetime import datetime
 from typing import TYPE_CHECKING, Dict, List
 
-import datahub.emitter.mce_builder as builder
 from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
-from datahub.configuration.common import ConfigModel
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 
+from datahub_airflow_plugin._config import DatahubLineageConfig
 from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator
 from datahub_airflow_plugin.entities import _Entity
 
@@ -15,39 +14,14 @@
     from airflow.models.taskinstance import TaskInstance
 
     from datahub_airflow_plugin._airflow_shims import Operator
-    from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
 
 
 def _entities_to_urn_list(iolets: List[_Entity]) -> List[DatasetUrn]:
     return [DatasetUrn.create_from_string(let.urn) for let in iolets]
 
 
-class DatahubBasicLineageConfig(ConfigModel):
-    enabled: bool = True
-
-    # DataHub hook connection ID.
-    datahub_conn_id: str
-
-    # Cluster to associate with the pipelines and tasks. Defaults to "prod".
-    cluster: str = builder.DEFAULT_FLOW_CLUSTER
-
-    # If true, the owners field of the DAG will be capture as a DataHub corpuser.
-    capture_ownership_info: bool = True
-
-    # If true, the tags field of the DAG will be captured as DataHub tags.
-    capture_tags_info: bool = True
-
-    capture_executions: bool = False
-
-    def make_emitter_hook(self) -> "DatahubGenericHook":
-        # This is necessary to avoid issues with circular imports.
-        from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
-
-        return DatahubGenericHook(self.datahub_conn_id)
-
-
 def send_lineage_to_datahub(
-    config: DatahubBasicLineageConfig,
+    config: DatahubLineageConfig,
     operator: "Operator",
     inlets: List[_Entity],
     outlets: List[_Entity],
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/datahub.py
index c41bb2b2a1e37..3ebe7831d08f9 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/datahub.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/datahub.py
@@ -4,8 +4,8 @@
 from airflow.configuration import conf
 from airflow.lineage.backend import LineageBackend
 
-from datahub_airflow_plugin._lineage_core import (
-    DatahubBasicLineageConfig,
+from datahub_airflow_plugin.lineage._lineage_core import (
+    DatahubLineageConfig,
     send_lineage_to_datahub,
 )
 
@@ -13,14 +13,7 @@
     from airflow.models.baseoperator import BaseOperator
 
 
-class DatahubLineageConfig(DatahubBasicLineageConfig):
-    # If set to true, most runtime errors in the lineage backend will be
-    # suppressed and will not cause the overall task to fail. Note that
-    # configuration issues will still throw exceptions.
-    graceful_exceptions: bool = True
-
-
-def get_lineage_config() -> DatahubLineageConfig:
+def get_lineage_backend_config() -> DatahubLineageConfig:
     """Load the lineage config from airflow.cfg."""
 
     # The kwargs pattern is also used for secret backends.
@@ -51,8 +44,7 @@ class DatahubLineageBackend(LineageBackend):
         datahub_kwargs = {
             "datahub_conn_id": "datahub_rest_default",
             "capture_ownership_info": true,
-            "capture_tags_info": true,
-            "graceful_exceptions": true }
+            "capture_tags_info": true }
         # The above indentation is important!
     """
 
@@ -61,7 +53,7 @@ def __init__(self) -> None:
 
         # By attempting to get and parse the config, we can detect configuration errors
         # ahead of time. The init method is only called in Airflow 2.x.
-        _ = get_lineage_config()
+        _ = get_lineage_backend_config()
 
     # With Airflow 2.0, this can be an instance method. However, with Airflow 1.10.x, this
     # method is used statically, even though LineageBackend declares it as an instance variable.
@@ -72,7 +64,7 @@ def send_lineage(
         outlets: Optional[List] = None,  # unused
         context: Optional[Dict] = None,
     ) -> None:
-        config = get_lineage_config()
+        config = get_lineage_backend_config()
         if not config.enabled:
             return
 
@@ -82,10 +74,4 @@ def send_lineage(
                 config, operator, operator.inlets, operator.outlets, context
             )
         except Exception as e:
-            if config.graceful_exceptions:
-                operator.log.error(e)
-                operator.log.info(
-                    "Suppressing error because graceful_exceptions is set"
-                )
-            else:
-                raise
+            operator.log.error(e)
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py
index 109e7ddfe4dfa..15b50c51a561d 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py
@@ -57,7 +57,7 @@ def __init__(  # type: ignore[no-untyped-def]
             datahub_conn_id=datahub_conn_id,
             **kwargs,
         )
-        self.mces = mces
+        self.metadata = mces
 
     def execute(self, context):
-        self.generic_hook.get_underlying_hook().emit_mces(self.mces)
+        self.generic_hook.get_underlying_hook().emit(self.metadata)
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/conftest.py b/metadata-ingestion-modules/airflow-plugin/tests/conftest.py
new file mode 100644
index 0000000000000..d2c45e723f1b0
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/conftest.py
@@ -0,0 +1,6 @@
+def pytest_addoption(parser):
+    parser.addoption(
+        "--update-golden-files",
+        action="store_true",
+        default=False,
+    )
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py
new file mode 100644
index 0000000000000..8b0803ab98422
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py
@@ -0,0 +1,34 @@
+from datetime import datetime
+
+from airflow import DAG
+from airflow.operators.bash import BashOperator
+
+from datahub_airflow_plugin.entities import Dataset, Urn
+
+with DAG(
+    "basic_iolets",
+    start_date=datetime(2023, 1, 1),
+    schedule_interval=None,
+    catchup=False,
+) as dag:
+    task = BashOperator(
+        task_id="run_data_task",
+        dag=dag,
+        bash_command="echo 'This is where you might run your data tooling.'",
+        inlets=[
+            Dataset(platform="snowflake", name="mydb.schema.tableA"),
+            Dataset(platform="snowflake", name="mydb.schema.tableB", env="DEV"),
+            Dataset(
+                platform="snowflake",
+                name="mydb.schema.tableC",
+                platform_instance="cloud",
+            ),
+            Urn(
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ),
+        ],
+        outlets=[
+            Dataset("snowflake", "mydb.schema.tableD"),
+            Dataset("snowflake", "mydb.schema.tableE"),
+        ],
+    )
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py
new file mode 100644
index 0000000000000..1dd047f0a6dcc
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py
@@ -0,0 +1,34 @@
+from datetime import datetime
+
+from airflow import DAG
+from airflow.operators.bash import BashOperator
+
+from datahub_airflow_plugin.entities import Dataset, Urn
+
+with DAG(
+    "simple_dag",
+    start_date=datetime(2023, 1, 1),
+    schedule_interval=None,
+    catchup=False,
+    description="A simple DAG that runs a few fake data tasks.",
+) as dag:
+    task1 = BashOperator(
+        task_id="task_1",
+        dag=dag,
+        bash_command="echo 'task 1'",
+        inlets=[
+            Dataset(platform="snowflake", name="mydb.schema.tableA"),
+            Urn(
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ),
+        ],
+        outlets=[Dataset("snowflake", "mydb.schema.tableD")],
+    )
+
+    task2 = BashOperator(
+        task_id="run_another_data_task",
+        dag=dag,
+        bash_command="echo 'task 2'",
+    )
+
+    task1 >> task2
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py
new file mode 100644
index 0000000000000..347d0f88b0cd0
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py
@@ -0,0 +1,32 @@
+from datetime import datetime
+
+from airflow import DAG
+from airflow.providers.snowflake.operators.snowflake import SnowflakeOperator
+
+SNOWFLAKE_COST_TABLE = "costs"
+SNOWFLAKE_PROCESSED_TABLE = "processed_costs"
+
+with DAG(
+    "snowflake_operator",
+    start_date=datetime(2023, 1, 1),
+    schedule_interval=None,
+    catchup=False,
+) as dag:
+    transform_cost_table = SnowflakeOperator(
+        snowflake_conn_id="my_snowflake",
+        task_id="transform_cost_table",
+        sql="""
+        CREATE OR REPLACE TABLE {{ params.out_table_name }} AS
+        SELECT
+            id,
+            month,
+            total_cost,
+            area,
+            total_cost / area as cost_per_area
+        FROM {{ params.in_table_name }}
+        """,
+        params={
+            "in_table_name": SNOWFLAKE_COST_TABLE,
+            "out_table_name": SNOWFLAKE_PROCESSED_TABLE,
+        },
+    )
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py
new file mode 100644
index 0000000000000..77faec3c8935a
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py
@@ -0,0 +1,75 @@
+from datetime import datetime
+
+from airflow import DAG
+from airflow.providers.sqlite.operators.sqlite import SqliteOperator
+
+CONN_ID = "my_sqlite"
+
+COST_TABLE = "costs"
+PROCESSED_TABLE = "processed_costs"
+
+with DAG(
+    "sqlite_operator",
+    start_date=datetime(2023, 1, 1),
+    schedule_interval=None,
+    catchup=False,
+) as dag:
+    create_cost_table = SqliteOperator(
+        sqlite_conn_id=CONN_ID,
+        task_id="create_cost_table",
+        sql="""
+        CREATE TABLE IF NOT EXISTS {{ params.table_name }} (
+            id INTEGER PRIMARY KEY,
+            month TEXT NOT NULL,
+            total_cost REAL NOT NULL,
+            area REAL NOT NULL
+        )
+        """,
+        params={"table_name": COST_TABLE},
+    )
+
+    populate_cost_table = SqliteOperator(
+        sqlite_conn_id=CONN_ID,
+        task_id="populate_cost_table",
+        sql="""
+        INSERT INTO {{ params.table_name }} (id, month, total_cost, area)
+        VALUES
+            (1, '2021-01', 100, 10),
+            (2, '2021-02', 200, 20),
+            (3, '2021-03', 300, 30)
+        """,
+        params={"table_name": COST_TABLE},
+    )
+
+    transform_cost_table = SqliteOperator(
+        sqlite_conn_id=CONN_ID,
+        task_id="transform_cost_table",
+        sql="""
+        CREATE TABLE IF NOT EXISTS {{ params.out_table_name }} AS
+        SELECT
+            id,
+            month,
+            total_cost,
+            area,
+            total_cost / area as cost_per_area
+        FROM {{ params.in_table_name }}
+        """,
+        params={
+            "in_table_name": COST_TABLE,
+            "out_table_name": PROCESSED_TABLE,
+        },
+    )
+
+    cleanup_tables = []
+    for table_name in [COST_TABLE, PROCESSED_TABLE]:
+        cleanup_table = SqliteOperator(
+            sqlite_conn_id=CONN_ID,
+            task_id=f"cleanup_{table_name}",
+            sql="""
+            DROP TABLE {{ params.table_name }}
+            """,
+            params={"table_name": table_name},
+        )
+        cleanup_tables.append(cleanup_table)
+
+    create_cost_table >> populate_cost_table >> transform_cost_table >> cleanup_tables
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json
new file mode 100644
index 0000000000000..26aa2afaa831a
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json
@@ -0,0 +1,533 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "None",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=basic_iolets",
+            "name": "basic_iolets"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_data_task'",
+                "trigger_rule": "'all_success'",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task",
+            "name": "run_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_data_task'",
+                "trigger_rule": "'all_success'",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task",
+            "name": "run_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "0.176536",
+                "start_date": "2023-09-30 00:49:56.670239+00:00",
+                "end_date": "2023-09-30 00:49:56.846775+00:00",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "1",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "success",
+                "operator": "BashOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets",
+            "name": "basic_iolets_run_data_task_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696034996670,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696034996670,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 2
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696034996846,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json
new file mode 100644
index 0000000000000..b2e3a1fe47da7
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json
@@ -0,0 +1,718 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "None",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag",
+            "name": "simple_dag",
+            "description": "A simple DAG that runs a few fake data tasks."
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'task_1'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'task_1'",
+                "trigger_rule": "'all_success'",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['run_another_data_task']",
+                "inlets": "[]",
+                "outlets": "[]"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
+            "name": "task_1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'task_1'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'task_1'",
+                "trigger_rule": "'all_success'",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['run_another_data_task']",
+                "inlets": "[]",
+                "outlets": "[]"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
+            "name": "task_1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "0.175983",
+                "start_date": "2023-09-30 00:48:58.943850+00:00",
+                "end_date": "2023-09-30 00:48:59.119833+00:00",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "1",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "success",
+                "operator": "BashOperator",
+                "priority_weight": "2",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag",
+            "name": "simple_dag_task_1_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696034938943,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696034938943,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 2
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696034939119,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "None",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag",
+            "name": "simple_dag",
+            "description": "A simple DAG that runs a few fake data tasks."
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_another_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_another_data_task'",
+                "trigger_rule": "'all_success'",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
+            "name": "run_another_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_another_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_another_data_task'",
+                "trigger_rule": "'all_success'",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
+            "name": "run_another_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "0.129888",
+                "start_date": "2023-09-30 00:49:02.158752+00:00",
+                "end_date": "2023-09-30 00:49:02.288640+00:00",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "1",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "success",
+                "operator": "BashOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag",
+            "name": "simple_dag_run_another_data_task_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696034942158,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696034942158,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 2
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696034942288,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json
new file mode 100644
index 0000000000000..2e733c2ad40a9
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json
@@ -0,0 +1,535 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=basic_iolets",
+            "name": "basic_iolets"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='DEV', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableC', env='PROD', platform_instance='cloud'), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableE', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"task_id\": \"run_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_lock_for_execution\": true, \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_on_exit_code\": [99], \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task",
+            "name": "run_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 01:13:14.266272+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "BashOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1",
+            "name": "basic_iolets_run_data_task_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696036394266,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696036394266,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='DEV', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableC', env='PROD', platform_instance='cloud'), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableE', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"task_id\": \"run_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_lock_for_execution\": true, \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_on_exit_code\": [99], \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task",
+            "name": "run_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696036394833,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json
new file mode 100644
index 0000000000000..44b288efda954
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json
@@ -0,0 +1,535 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=basic_iolets",
+            "name": "basic_iolets"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='DEV', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableC', env='PROD', platform_instance='cloud'), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableE', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"task_id\": \"run_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_exit_code\": 99, \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task",
+            "name": "run_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:59:52.401211+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "BashOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1",
+            "name": "basic_iolets_run_data_task_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057192401,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057192401,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='DEV', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableC', env='PROD', platform_instance='cloud'), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableE', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"task_id\": \"run_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_exit_code\": 99, \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task",
+            "name": "run_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057192982,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json
new file mode 100644
index 0000000000000..454c509279e11
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json
@@ -0,0 +1,666 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag",
+            "name": "simple_dag",
+            "description": "A simple DAG that runs a few fake data tasks."
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'task_1'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'task_1'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['run_another_data_task']",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 1'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"task_id\": \"task_1\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_lock_for_execution\": true, \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 1'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [\"run_another_data_task\"], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_on_exit_code\": [99], \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
+            "name": "task_1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:53:58.219003+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "BashOperator",
+                "priority_weight": "2",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1",
+            "name": "simple_dag_task_1_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056838219,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056838219,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'task_1'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'task_1'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['run_another_data_task']",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 1'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"task_id\": \"task_1\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_lock_for_execution\": true, \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 1'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [\"run_another_data_task\"], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_on_exit_code\": [99], \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
+            "name": "task_1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056838648,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_another_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_another_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 2'\", \"dag\": \"<<non-serializable: DAG>>\", \"task_id\": \"run_another_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_lock_for_execution\": true, \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 2'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [], \"outlets\": [], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_on_exit_code\": [99], \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [\"task_1\"], \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
+            "name": "run_another_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:54:02.407515+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "BashOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1",
+            "name": "simple_dag_run_another_data_task_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056842407,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056842407,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_another_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_another_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 2'\", \"dag\": \"<<non-serializable: DAG>>\", \"task_id\": \"run_another_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_lock_for_execution\": true, \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 2'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [], \"outlets\": [], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_on_exit_code\": [99], \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [\"task_1\"], \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
+            "name": "run_another_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056842831,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json
new file mode 100644
index 0000000000000..73b5765e96b7d
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json
@@ -0,0 +1,722 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag",
+            "name": "simple_dag",
+            "description": "A simple DAG that runs a few fake data tasks."
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'task_1'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'task_1'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['run_another_data_task']",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 1'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"task_id\": \"task_1\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 1'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [\"run_another_data_task\"], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_exit_code\": 99, \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
+            "name": "task_1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:58:56.105026+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "BashOperator",
+                "priority_weight": "2",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1",
+            "name": "simple_dag_task_1_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057136105,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057136105,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'task_1'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'task_1'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['run_another_data_task']",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 1'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"task_id\": \"task_1\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 1'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [\"run_another_data_task\"], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_exit_code\": 99, \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
+            "name": "task_1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057136612,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag",
+            "name": "simple_dag",
+            "description": "A simple DAG that runs a few fake data tasks."
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_another_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_another_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 2'\", \"dag\": \"<<non-serializable: DAG>>\", \"task_id\": \"run_another_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 2'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [], \"outlets\": [], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_exit_code\": 99, \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [\"task_1\"], \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
+            "name": "run_another_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:58:59.567004+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "BashOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1",
+            "name": "simple_dag_run_another_data_task_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057139567,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057139567,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_another_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_another_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 2'\", \"dag\": \"<<non-serializable: DAG>>\", \"task_id\": \"run_another_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 2'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [], \"outlets\": [], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_exit_code\": 99, \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [\"task_1\"], \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
+            "name": "run_another_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057140164,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json
new file mode 100644
index 0000000000000..affc395d421da
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json
@@ -0,0 +1,507 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,snowflake_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=snowflake_operator",
+            "name": "snowflake_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,snowflake_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,snowflake_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'transform_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE OR REPLACE TABLE processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        '",
+                "task_id": "'transform_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE OR REPLACE TABLE processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=snowflake_operator&_flt_3_task_id=transform_cost_table",
+            "name": "transform_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:3161034cc84e16a7c5e1906225734747",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:55:36.844976+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SnowflakeOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1",
+            "name": "snowflake_operator_transform_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056936844,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:3161034cc84e16a7c5e1906225734747",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:3161034cc84e16a7c5e1906225734747",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:3161034cc84e16a7c5e1906225734747",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:3161034cc84e16a7c5e1906225734747",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056936844,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'transform_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE OR REPLACE TABLE processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        '",
+                "task_id": "'transform_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE OR REPLACE TABLE processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=snowflake_operator&_flt_3_task_id=transform_cost_table",
+            "name": "transform_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:3161034cc84e16a7c5e1906225734747",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056938096,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "FAILURE",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
new file mode 100644
index 0000000000000..1a32b38ce055d
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
@@ -0,0 +1,1735 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator",
+            "name": "sqlite_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'create_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        '",
+                "task_id": "'create_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['populate_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table",
+            "name": "create_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:56:24.632190+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "5",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_create_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056984632,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056984632,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'create_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        '",
+                "task_id": "'create_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['populate_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table",
+            "name": "create_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056984947,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'populate_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "\"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"",
+                "task_id": "'populate_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['transform_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=populate_cost_table",
+            "name": "populate_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:56:28.605901+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "4",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_populate_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056988605,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056988605,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'populate_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "\"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"",
+                "task_id": "'populate_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['transform_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=populate_cost_table",
+            "name": "populate_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056989098,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'transform_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        '",
+                "task_id": "'transform_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table",
+            "name": "transform_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)"
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:56:32.888165+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "3",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_transform_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056992888,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056992888,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'transform_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        '",
+                "task_id": "'transform_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table",
+            "name": "transform_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)"
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056993744,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE costs\\n            '",
+                "task_id": "'cleanup_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_costs",
+            "name": "cleanup_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:56:37.745717+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_cleanup_costs_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056997745,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056997745,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE costs\\n            '",
+                "task_id": "'cleanup_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_costs",
+            "name": "cleanup_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056998672,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_processed_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE processed_costs\\n            '",
+                "task_id": "'cleanup_processed_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE processed_costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_processed_costs",
+            "name": "cleanup_processed_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:56:42.645806+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_cleanup_processed_costs_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057002645,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057002645,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_processed_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE processed_costs\\n            '",
+                "task_id": "'cleanup_processed_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE processed_costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_processed_costs",
+            "name": "cleanup_processed_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057003759,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
new file mode 100644
index 0000000000000..c082be693e30c
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
@@ -0,0 +1,1955 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator",
+            "name": "sqlite_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'create_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        '",
+                "task_id": "'create_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['populate_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table",
+            "name": "create_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 07:00:45.832554+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "5",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_create_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057245832,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057245832,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'create_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        '",
+                "task_id": "'create_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['populate_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table",
+            "name": "create_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057246734,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator",
+            "name": "sqlite_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'populate_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "\"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"",
+                "task_id": "'populate_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['transform_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=populate_cost_table",
+            "name": "populate_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 07:00:49.653938+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "4",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_populate_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057249653,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057249653,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'populate_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "\"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"",
+                "task_id": "'populate_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['transform_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=populate_cost_table",
+            "name": "populate_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057250831,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator",
+            "name": "sqlite_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'transform_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        '",
+                "task_id": "'transform_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table",
+            "name": "transform_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)"
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 07:00:53.989264+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "3",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_transform_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057253989,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057253989,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'transform_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        '",
+                "task_id": "'transform_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table",
+            "name": "transform_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)"
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057255628,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator",
+            "name": "sqlite_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE costs\\n            '",
+                "task_id": "'cleanup_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_costs",
+            "name": "cleanup_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 07:01:00.421177+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_cleanup_costs_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057260421,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057260421,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE costs\\n            '",
+                "task_id": "'cleanup_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_costs",
+            "name": "cleanup_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057262258,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator",
+            "name": "sqlite_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_processed_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE processed_costs\\n            '",
+                "task_id": "'cleanup_processed_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE processed_costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_processed_costs",
+            "name": "cleanup_processed_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 07:01:05.540192+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_cleanup_processed_costs_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057265540,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057265540,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_processed_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE processed_costs\\n            '",
+                "task_id": "'cleanup_processed_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE processed_costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_processed_costs",
+            "name": "cleanup_processed_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057267631,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/integration_test_dummy.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/integration_test_dummy.py
deleted file mode 100644
index 10cf3ad0a608a..0000000000000
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/integration_test_dummy.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def test_dummy():
-    pass
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
new file mode 100644
index 0000000000000..a2b7fd151a1e4
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
@@ -0,0 +1,392 @@
+import contextlib
+import dataclasses
+import functools
+import logging
+import os
+import pathlib
+import random
+import signal
+import subprocess
+import time
+from typing import Iterator, Sequence
+
+import pytest
+import requests
+import tenacity
+from airflow.models.connection import Connection
+from datahub.testing.compare_metadata_json import assert_metadata_files_equal
+
+from datahub_airflow_plugin._airflow_shims import (
+    HAS_AIRFLOW_DAG_LISTENER_API,
+    HAS_AIRFLOW_LISTENER_API,
+    HAS_AIRFLOW_STANDALONE_CMD,
+)
+
+pytestmark = pytest.mark.integration
+
+logger = logging.getLogger(__name__)
+IS_LOCAL = os.environ.get("CI", "false") == "false"
+
+DAGS_FOLDER = pathlib.Path(__file__).parent / "dags"
+GOLDENS_FOLDER = pathlib.Path(__file__).parent / "goldens"
+
+
+@dataclasses.dataclass
+class AirflowInstance:
+    airflow_home: pathlib.Path
+    airflow_port: int
+    pid: int
+    env_vars: dict
+
+    username: str
+    password: str
+
+    metadata_file: pathlib.Path
+
+    @property
+    def airflow_url(self) -> str:
+        return f"http://localhost:{self.airflow_port}"
+
+    @functools.cached_property
+    def session(self) -> requests.Session:
+        session = requests.Session()
+        session.auth = (self.username, self.password)
+        return session
+
+
+@tenacity.retry(
+    reraise=True,
+    wait=tenacity.wait_fixed(1),
+    stop=tenacity.stop_after_delay(60),
+    retry=tenacity.retry_if_exception_type(
+        (AssertionError, requests.exceptions.RequestException)
+    ),
+)
+def _wait_for_airflow_healthy(airflow_port: int) -> None:
+    print("Checking if Airflow is ready...")
+    res = requests.get(f"http://localhost:{airflow_port}/health", timeout=5)
+    res.raise_for_status()
+
+    airflow_health = res.json()
+    assert airflow_health["metadatabase"]["status"] == "healthy"
+    assert airflow_health["scheduler"]["status"] == "healthy"
+
+
+class NotReadyError(Exception):
+    pass
+
+
+@tenacity.retry(
+    reraise=True,
+    wait=tenacity.wait_fixed(1),
+    stop=tenacity.stop_after_delay(90),
+    retry=tenacity.retry_if_exception_type(NotReadyError),
+)
+def _wait_for_dag_finish(
+    airflow_instance: AirflowInstance, dag_id: str, require_success: bool
+) -> None:
+    print("Checking if DAG is finished")
+    res = airflow_instance.session.get(
+        f"{airflow_instance.airflow_url}/api/v1/dags/{dag_id}/dagRuns", timeout=5
+    )
+    res.raise_for_status()
+
+    dag_runs = res.json()["dag_runs"]
+    if not dag_runs:
+        raise NotReadyError("No DAG runs found")
+
+    dag_run = dag_runs[0]
+    if dag_run["state"] == "failed":
+        if require_success:
+            raise ValueError("DAG failed")
+        # else - success is not required, so we're done.
+
+    elif dag_run["state"] != "success":
+        raise NotReadyError(f"DAG has not finished yet: {dag_run['state']}")
+
+
+@contextlib.contextmanager
+def _run_airflow(
+    tmp_path: pathlib.Path, dags_folder: pathlib.Path, is_v1: bool
+) -> Iterator[AirflowInstance]:
+    airflow_home = tmp_path / "airflow_home"
+    print(f"Using airflow home: {airflow_home}")
+
+    if IS_LOCAL:
+        airflow_port = 11792
+    else:
+        airflow_port = random.randint(10000, 12000)
+    print(f"Using airflow port: {airflow_port}")
+
+    datahub_connection_name = "datahub_file_default"
+    meta_file = tmp_path / "datahub_metadata.json"
+
+    environment = {
+        **os.environ,
+        "AIRFLOW_HOME": str(airflow_home),
+        "AIRFLOW__WEBSERVER__WEB_SERVER_PORT": str(airflow_port),
+        "AIRFLOW__WEBSERVER__BASE_URL": "http://airflow.example.com",
+        # Point airflow to the DAGs folder.
+        "AIRFLOW__CORE__LOAD_EXAMPLES": "False",
+        "AIRFLOW__CORE__DAGS_FOLDER": str(dags_folder),
+        "AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION": "False",
+        # Have the Airflow API use username/password authentication.
+        "AIRFLOW__API__AUTH_BACKEND": "airflow.api.auth.backend.basic_auth",
+        # Configure the datahub plugin and have it write the MCPs to a file.
+        "AIRFLOW__CORE__LAZY_LOAD_PLUGINS": "False" if is_v1 else "True",
+        "AIRFLOW__DATAHUB__CONN_ID": datahub_connection_name,
+        f"AIRFLOW_CONN_{datahub_connection_name.upper()}": Connection(
+            conn_id="datahub_file_default",
+            conn_type="datahub-file",
+            host=str(meta_file),
+        ).get_uri(),
+        # Configure fake credentials for the Snowflake connection.
+        "AIRFLOW_CONN_MY_SNOWFLAKE": Connection(
+            conn_id="my_snowflake",
+            conn_type="snowflake",
+            login="fake_username",
+            password="fake_password",
+            schema="DATAHUB_TEST_SCHEMA",
+            extra={
+                "account": "fake_account",
+                "database": "DATAHUB_TEST_DATABASE",
+                "warehouse": "fake_warehouse",
+                "role": "fake_role",
+                "insecure_mode": "true",
+            },
+        ).get_uri(),
+        "AIRFLOW_CONN_MY_SQLITE": Connection(
+            conn_id="my_sqlite",
+            conn_type="sqlite",
+            host=str(tmp_path / "my_sqlite.db"),
+        ).get_uri(),
+        # Convenience settings.
+        "AIRFLOW__DATAHUB__LOG_LEVEL": "DEBUG",
+        "AIRFLOW__DATAHUB__DEBUG_EMITTER": "True",
+        "SQLALCHEMY_SILENCE_UBER_WARNING": "1",
+    }
+
+    if not HAS_AIRFLOW_STANDALONE_CMD:
+        raise pytest.skip("Airflow standalone command is not available")
+
+    # Start airflow in a background subprocess.
+    airflow_process = subprocess.Popen(
+        ["airflow", "standalone"],
+        env=environment,
+    )
+
+    try:
+        _wait_for_airflow_healthy(airflow_port)
+        print("Airflow is ready!")
+
+        # Sleep for a few seconds to make sure the other Airflow processes are ready.
+        time.sleep(3)
+
+        # Create an extra "airflow" user for easy testing.
+        if IS_LOCAL:
+            print("Creating an extra test user...")
+            subprocess.check_call(
+                [
+                    # fmt: off
+                    "airflow", "users", "create",
+                    "--username", "airflow",
+                    "--password", "airflow",
+                    "--firstname", "admin",
+                    "--lastname", "admin",
+                    "--role", "Admin",
+                    "--email", "airflow@example.com",
+                    # fmt: on
+                ],
+                env=environment,
+            )
+
+        # Sanity check that the plugin got loaded.
+        if not is_v1:
+            print("[debug] Listing loaded plugins")
+            subprocess.check_call(
+                ["airflow", "plugins", "-v"],
+                env=environment,
+            )
+
+        # Load the admin user's password. This is generated by the
+        # `airflow standalone` command, and is different from the
+        # airflow user that we create when running locally.
+        airflow_username = "admin"
+        airflow_password = (airflow_home / "standalone_admin_password.txt").read_text()
+
+        airflow_instance = AirflowInstance(
+            airflow_home=airflow_home,
+            airflow_port=airflow_port,
+            pid=airflow_process.pid,
+            env_vars=environment,
+            username=airflow_username,
+            password=airflow_password,
+            metadata_file=meta_file,
+        )
+
+        yield airflow_instance
+    finally:
+        try:
+            # Attempt a graceful shutdown.
+            print("Shutting down airflow...")
+            airflow_process.send_signal(signal.SIGINT)
+            airflow_process.wait(timeout=30)
+        except subprocess.TimeoutExpired:
+            # If the graceful shutdown failed, kill the process.
+            print("Hard shutting down airflow...")
+            airflow_process.kill()
+            airflow_process.wait(timeout=3)
+
+
+def check_golden_file(
+    pytestconfig: pytest.Config,
+    output_path: pathlib.Path,
+    golden_path: pathlib.Path,
+    ignore_paths: Sequence[str] = (),
+) -> None:
+    update_golden = pytestconfig.getoption("--update-golden-files")
+
+    assert_metadata_files_equal(
+        output_path=output_path,
+        golden_path=golden_path,
+        update_golden=update_golden,
+        copy_output=False,
+        ignore_paths=ignore_paths,
+        ignore_order=False,
+    )
+
+
+@dataclasses.dataclass
+class DagTestCase:
+    dag_id: str
+    success: bool = True
+
+    v2_only: bool = False
+
+
+test_cases = [
+    DagTestCase("simple_dag"),
+    DagTestCase("basic_iolets"),
+    DagTestCase("snowflake_operator", success=False, v2_only=True),
+    DagTestCase("sqlite_operator", v2_only=True),
+]
+
+
+@pytest.mark.parametrize(
+    ["golden_filename", "test_case", "is_v1"],
+    [
+        # On Airflow <= 2.2, test plugin v1.
+        *[
+            pytest.param(
+                f"v1_{test_case.dag_id}",
+                test_case,
+                True,
+                id=f"v1_{test_case.dag_id}",
+                marks=pytest.mark.skipif(
+                    HAS_AIRFLOW_LISTENER_API,
+                    reason="Not testing plugin v1 on newer Airflow versions",
+                ),
+            )
+            for test_case in test_cases
+            if not test_case.v2_only
+        ],
+        *[
+            pytest.param(
+                # On Airflow 2.3-2.4, test plugin v2 without dataFlows.
+                f"v2_{test_case.dag_id}"
+                if HAS_AIRFLOW_DAG_LISTENER_API
+                else f"v2_{test_case.dag_id}_no_dag_listener",
+                test_case,
+                False,
+                id=f"v2_{test_case.dag_id}"
+                if HAS_AIRFLOW_DAG_LISTENER_API
+                else f"v2_{test_case.dag_id}_no_dag_listener",
+                marks=pytest.mark.skipif(
+                    not HAS_AIRFLOW_LISTENER_API,
+                    reason="Cannot test plugin v2 without the Airflow plugin listener API",
+                ),
+            )
+            for test_case in test_cases
+        ],
+    ],
+)
+def test_airflow_plugin(
+    pytestconfig: pytest.Config,
+    tmp_path: pathlib.Path,
+    golden_filename: str,
+    test_case: DagTestCase,
+    is_v1: bool,
+) -> None:
+    # This test:
+    # - Configures the plugin.
+    # - Starts a local airflow instance in a subprocess.
+    # - Runs a DAG that uses an operator supported by the extractor.
+    # - Waits for the DAG to complete.
+    # - Validates the metadata generated against a golden file.
+
+    if not is_v1 and not test_case.success and not HAS_AIRFLOW_DAG_LISTENER_API:
+        # Saw a number of issues in CI where this would fail to emit the last events
+        # due to an error in the SQLAlchemy listener. This never happened locally for me.
+        pytest.skip("Cannot test failure cases without the Airflow DAG listener API")
+
+    golden_path = GOLDENS_FOLDER / f"{golden_filename}.json"
+    dag_id = test_case.dag_id
+
+    with _run_airflow(
+        tmp_path, dags_folder=DAGS_FOLDER, is_v1=is_v1
+    ) as airflow_instance:
+        print(f"Running DAG {dag_id}...")
+        subprocess.check_call(
+            [
+                "airflow",
+                "dags",
+                "trigger",
+                "--exec-date",
+                "2023-09-27T21:34:38+00:00",
+                "-r",
+                "manual_run_test",
+                dag_id,
+            ],
+            env=airflow_instance.env_vars,
+        )
+
+        print("Waiting for DAG to finish...")
+        _wait_for_dag_finish(
+            airflow_instance, dag_id, require_success=test_case.success
+        )
+
+        print("Sleeping for a few seconds to let the plugin finish...")
+        time.sleep(10)
+
+    check_golden_file(
+        pytestconfig=pytestconfig,
+        output_path=airflow_instance.metadata_file,
+        golden_path=golden_path,
+        ignore_paths=[
+            # Timing-related items.
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['start_date'\]",
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['end_date'\]",
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['duration'\]",
+            # Host-specific items.
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['pid'\]",
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['hostname'\]",
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['unixname'\]",
+            # TODO: If we switched to Git urls, maybe we could get this to work consistently.
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['fileloc'\]",
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['openlineage_.*'\]",
+        ],
+    )
+
+
+if __name__ == "__main__":
+    # When run directly, just set up a local airflow instance.
+    import tempfile
+
+    with _run_airflow(
+        tmp_path=pathlib.Path(tempfile.mkdtemp("airflow-plugin-test")),
+        dags_folder=DAGS_FOLDER,
+        is_v1=not HAS_AIRFLOW_LISTENER_API,
+    ) as airflow_instance:
+        # input("Press enter to exit...")
+        breakpoint()
+        print("quitting airflow")
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
index 9aa901171cfa6..d8620e74d7e30 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
+++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
@@ -14,18 +14,21 @@
 import pytest
 from airflow.lineage import apply_lineage, prepare_lineage
 from airflow.models import DAG, Connection, DagBag, DagRun, TaskInstance
-from datahub_provider import get_provider_info
-from datahub_provider._airflow_shims import AIRFLOW_PATCHED, EmptyOperator
-from datahub_provider.entities import Dataset, Urn
-from datahub_provider.hooks.datahub import DatahubKafkaHook, DatahubRestHook
-from datahub_provider.operators.datahub import DatahubEmitterOperator
+
+from datahub_airflow_plugin import get_provider_info
+from datahub_airflow_plugin._airflow_shims import (
+    AIRFLOW_PATCHED,
+    AIRFLOW_VERSION,
+    EmptyOperator,
+)
+from datahub_airflow_plugin.entities import Dataset, Urn
+from datahub_airflow_plugin.hooks.datahub import DatahubKafkaHook, DatahubRestHook
+from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator
 
 assert AIRFLOW_PATCHED
 
 # TODO: Remove default_view="tree" arg. Figure out why is default_view being picked as "grid" and how to fix it ?
 
-# Approach suggested by https://stackoverflow.com/a/11887885/5004662.
-AIRFLOW_VERSION = packaging.version.parse(airflow.version.version)
 
 lineage_mce = builder.make_lineage_mce(
     [
@@ -105,7 +108,7 @@ def test_datahub_rest_hook(mock_emitter):
 
         mock_emitter.assert_called_once_with(config.host, None, None)
         instance = mock_emitter.return_value
-        instance.emit_mce.assert_called_with(lineage_mce)
+        instance.emit.assert_called_with(lineage_mce)
 
 
 @mock.patch("datahub.emitter.rest_emitter.DatahubRestEmitter", autospec=True)
@@ -119,7 +122,7 @@ def test_datahub_rest_hook_with_timeout(mock_emitter):
 
         mock_emitter.assert_called_once_with(config.host, None, 5)
         instance = mock_emitter.return_value
-        instance.emit_mce.assert_called_with(lineage_mce)
+        instance.emit.assert_called_with(lineage_mce)
 
 
 @mock.patch("datahub.emitter.kafka_emitter.DatahubKafkaEmitter", autospec=True)
@@ -131,11 +134,11 @@ def test_datahub_kafka_hook(mock_emitter):
 
         mock_emitter.assert_called_once()
         instance = mock_emitter.return_value
-        instance.emit_mce_async.assert_called()
+        instance.emit.assert_called()
         instance.flush.assert_called_once()
 
 
-@mock.patch("datahub_provider.hooks.datahub.DatahubRestHook.emit_mces")
+@mock.patch("datahub_provider.hooks.datahub.DatahubRestHook.emit")
 def test_datahub_lineage_operator(mock_emit):
     with patch_airflow_connection(datahub_rest_connection_config) as config:
         assert config.conn_id
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_dummy.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_dummy.py
deleted file mode 100644
index 10cf3ad0a608a..0000000000000
--- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_dummy.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def test_dummy():
-    pass
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py
new file mode 100644
index 0000000000000..1d0ce5835f958
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py
@@ -0,0 +1,8 @@
+import setuptools
+
+
+def test_package_list_match_inits():
+    where = "./src"
+    package_list = set(setuptools.find_packages(where))
+    namespace_packages = set(setuptools.find_namespace_packages(where))
+    assert package_list == namespace_packages, "are you missing a package init file?"
diff --git a/metadata-ingestion-modules/airflow-plugin/tox.ini b/metadata-ingestion-modules/airflow-plugin/tox.ini
index 6a1c06aed8cdd..2f05854940d10 100644
--- a/metadata-ingestion-modules/airflow-plugin/tox.ini
+++ b/metadata-ingestion-modules/airflow-plugin/tox.ini
@@ -4,32 +4,23 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py3-quick,py3-full
-
-[gh-actions]
-python =
-    3.6: py3-full
-    3.9: py3-full
-
-# Providing optional features that add dependencies from setup.py as deps here 
-# allows tox to recreate testenv when new dependencies are added to setup.py.
-# Previous approach of using the tox global setting extras is not recommended  
-# as extras is only called when the testenv is created for the first time!
-# see more here -> https://github.com/tox-dev/tox/issues/1105#issuecomment-448596282
+envlist = py38-airflow21, py38-airflow22, py310-airflow24, py310-airflow26, py310-airflow27
 
 [testenv]
-deps = 
-    -e ../../metadata-ingestion/[.dev]
+use_develop = true
+extras = dev,integration-tests,plugin-v1
+deps =
+    -e ../../metadata-ingestion/
+    # Airflow version
+    airflow21: apache-airflow~=2.1.0
+    airflow22: apache-airflow~=2.2.0
+    airflow24: apache-airflow~=2.4.0
+    airflow26: apache-airflow~=2.6.0
+    airflow27: apache-airflow~=2.7.0
 commands =
-    pytest --cov={envsitepackagesdir}/datahub --cov={envsitepackagesdir}/datahub_provider \
-        py3-quick: -m 'not integration and not slow_integration' --junit-xml=junit.quick.xml \
-        py3-full: --cov-fail-under 65 --junit-xml=junit.full.xml \
-        --continue-on-collection-errors \
-        -vv
+    pytest --cov-append {posargs}
 
-setenv =
-    AIRFLOW_HOME = /tmp/airflow/thisshouldnotexist-{envname}
+# For Airflow 2.4+, add the plugin-v2 extra.
+[testenv:py310-airflow{24,26,27}]
+extras = dev,integration-tests,plugin-v2
 
-[testenv:py3-full]
-deps =
-    ../../metadata-ingestion/.[dev]
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 8fb7b5f29cc22..34afa8cdb39a4 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -1,4 +1,3 @@
-import os
 import sys
 from typing import Dict, Set
 
@@ -9,16 +8,9 @@
     exec(fp.read(), package_metadata)
 
 
-def get_long_description():
-    root = os.path.dirname(__file__)
-    with open(os.path.join(root, "README.md")) as f:
-        description = f.read()
-
-    return description
-
-
 base_requirements = {
-    "typing_extensions>=3.10.0.2",
+    # Typing extension should be >=3.10.0.2 ideally but we can't restrict due to a Airflow 2.1 dependency conflict.
+    "typing_extensions>=3.7.4.3",
     "mypy_extensions>=0.4.3",
     # Actual dependencies.
     "typing-inspect",
@@ -270,6 +262,7 @@ def get_long_description():
     # Sink plugins.
     "datahub-kafka": kafka_common,
     "datahub-rest": rest_common,
+    "sync-file-emitter": {"filelock"},
     "datahub-lite": {
         "duckdb",
         "fastapi",
@@ -670,7 +663,12 @@ def get_long_description():
     },
     license="Apache License 2.0",
     description="A CLI to work with DataHub metadata",
-    long_description=get_long_description(),
+    long_description="""\
+The `acryl-datahub` package contains a CLI and SDK for interacting with DataHub,
+as well as an integration framework for pulling/pushing metadata from external systems.
+
+See the [DataHub docs](https://datahubproject.io/docs/metadata-ingestion).
+""",
     long_description_content_type="text/markdown",
     classifiers=[
         "Development Status :: 5 - Production/Stable",
diff --git a/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py b/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py
index 796786beba21b..a898e35bb810e 100644
--- a/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py
+++ b/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py
@@ -2,7 +2,7 @@
 
 import logging
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Callable, Iterable, List, Optional, Union
+from typing import Callable, Iterable, List, Optional, Union
 
 import pydantic
 from pydantic import BaseModel
@@ -11,9 +11,10 @@
 from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
 from datahub.configuration.common import ConfigurationError
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.rest_emitter import DatahubRestEmitter
-from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+from datahub.ingestion.graph.client import DataHubGraph
 from datahub.metadata.schema_classes import (
     CorpGroupEditableInfoClass,
     CorpGroupInfoClass,
@@ -25,9 +26,6 @@
     _Aspect,
 )
 
-if TYPE_CHECKING:
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-
 logger = logging.getLogger(__name__)
 
 
@@ -194,30 +192,9 @@ def generate_mcp(
                 entityUrn=urn, aspect=StatusClass(removed=False)
             )
 
-    @staticmethod
-    def _datahub_graph_from_datahub_rest_emitter(
-        rest_emitter: DatahubRestEmitter,
-    ) -> DataHubGraph:
-        """
-        Create a datahub graph instance from a REST Emitter.
-        A stop-gap implementation which is expected to be removed after PATCH support is implemented
-        for membership updates for users <-> groups
-        """
-        graph = DataHubGraph(
-            config=DatahubClientConfig(
-                server=rest_emitter._gms_server,
-                token=rest_emitter._token,
-                timeout_sec=rest_emitter._connect_timeout_sec,
-                retry_status_codes=rest_emitter._retry_status_codes,
-                extra_headers=rest_emitter._session.headers,
-                disable_ssl_verification=rest_emitter._session.verify is False,
-            )
-        )
-        return graph
-
     def emit(
         self,
-        emitter: Union[DatahubRestEmitter, "DatahubKafkaEmitter"],
+        emitter: Emitter,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
         """
@@ -235,7 +212,7 @@ def emit(
                 # who are passing in a DataHubRestEmitter today
                 # we won't need this in the future once PATCH support is implemented as all emitters
                 # will work
-                datahub_graph = self._datahub_graph_from_datahub_rest_emitter(emitter)
+                datahub_graph = emitter.to_graph()
         for mcp in self.generate_mcp(
             generation_config=CorpGroupGenerationConfig(
                 override_editable=self.overrideEditable, datahub_graph=datahub_graph
diff --git a/metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py b/metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py
index c67eb02a870a5..9fe1ebedafca7 100644
--- a/metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py
+++ b/metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py
@@ -1,14 +1,14 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Callable, Iterable, List, Optional, Union
+from typing import Callable, Iterable, List, Optional
 
 import pydantic
 
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import ConfigModel
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.emitter.rest_emitter import DatahubRestEmitter
 from datahub.metadata.schema_classes import (
     CorpUserEditableInfoClass,
     CorpUserInfoClass,
@@ -16,9 +16,6 @@
     StatusClass,
 )
 
-if TYPE_CHECKING:
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-
 
 @dataclass
 class CorpUserGenerationConfig:
@@ -144,7 +141,7 @@ def generate_mcp(
 
     def emit(
         self,
-        emitter: Union[DatahubRestEmitter, "DatahubKafkaEmitter"],
+        emitter: Emitter,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
         """
diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py b/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py
index 8a04768bc0a72..acd708ee81a5c 100644
--- a/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py
+++ b/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py
@@ -1,18 +1,9 @@
 import logging
 from dataclasses import dataclass, field
-from typing import (
-    TYPE_CHECKING,
-    Callable,
-    Dict,
-    Iterable,
-    List,
-    Optional,
-    Set,
-    Union,
-    cast,
-)
+from typing import Callable, Dict, Iterable, List, Optional, Set, cast
 
 import datahub.emitter.mce_builder as builder
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.metadata.schema_classes import (
     AuditStampClass,
@@ -29,10 +20,6 @@
 )
 from datahub.utilities.urns.data_flow_urn import DataFlowUrn
 
-if TYPE_CHECKING:
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-    from datahub.emitter.rest_emitter import DatahubRestEmitter
-
 logger = logging.getLogger(__name__)
 
 
@@ -170,7 +157,7 @@ def generate_mcp(self) -> Iterable[MetadataChangeProposalWrapper]:
 
     def emit(
         self,
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
         """
diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py
index 7eb6fc8c8d1a9..0face6415bacc 100644
--- a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py
+++ b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py
@@ -1,16 +1,16 @@
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Set, Union
+from typing import Callable, Dict, Iterable, List, Optional, Set
 
 import datahub.emitter.mce_builder as builder
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.metadata.schema_classes import (
     AuditStampClass,
     AzkabanJobTypeClass,
     DataJobInfoClass,
     DataJobInputOutputClass,
-    DataJobSnapshotClass,
+    FineGrainedLineageClass,
     GlobalTagsClass,
-    MetadataChangeEventClass,
     OwnerClass,
     OwnershipClass,
     OwnershipSourceClass,
@@ -23,10 +23,6 @@
 from datahub.utilities.urns.data_job_urn import DataJobUrn
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 
-if TYPE_CHECKING:
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-    from datahub.emitter.rest_emitter import DatahubRestEmitter
-
 
 @dataclass
 class DataJob:
@@ -59,6 +55,7 @@ class DataJob:
     group_owners: Set[str] = field(default_factory=set)
     inlets: List[DatasetUrn] = field(default_factory=list)
     outlets: List[DatasetUrn] = field(default_factory=list)
+    fine_grained_lineages: List[FineGrainedLineageClass] = field(default_factory=list)
     upstream_urns: List[DataJobUrn] = field(default_factory=list)
 
     def __post_init__(self):
@@ -103,31 +100,6 @@ def generate_tags_aspect(self) -> Iterable[GlobalTagsClass]:
         )
         return [tags]
 
-    def generate_mce(self) -> MetadataChangeEventClass:
-        job_mce = MetadataChangeEventClass(
-            proposedSnapshot=DataJobSnapshotClass(
-                urn=str(self.urn),
-                aspects=[
-                    DataJobInfoClass(
-                        name=self.name if self.name is not None else self.id,
-                        type=AzkabanJobTypeClass.COMMAND,
-                        description=self.description,
-                        customProperties=self.properties,
-                        externalUrl=self.url,
-                    ),
-                    DataJobInputOutputClass(
-                        inputDatasets=[str(urn) for urn in self.inlets],
-                        outputDatasets=[str(urn) for urn in self.outlets],
-                        inputDatajobs=[str(urn) for urn in self.upstream_urns],
-                    ),
-                    *self.generate_ownership_aspect(),
-                    *self.generate_tags_aspect(),
-                ],
-            )
-        )
-
-        return job_mce
-
     def generate_mcp(self) -> Iterable[MetadataChangeProposalWrapper]:
         mcp = MetadataChangeProposalWrapper(
             entityUrn=str(self.urn),
@@ -159,7 +131,7 @@ def generate_mcp(self) -> Iterable[MetadataChangeProposalWrapper]:
 
     def emit(
         self,
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
         """
@@ -179,6 +151,7 @@ def generate_data_input_output_mcp(self) -> Iterable[MetadataChangeProposalWrapp
                 inputDatasets=[str(urn) for urn in self.inlets],
                 outputDatasets=[str(urn) for urn in self.outlets],
                 inputDatajobs=[str(urn) for urn in self.upstream_urns],
+                fineGrainedLineages=self.fine_grained_lineages,
             ),
         )
         yield mcp
diff --git a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py
index 9ec389c3a0989..cf6080c7072e6 100644
--- a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py
+++ b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py
@@ -1,9 +1,10 @@
 import time
 from dataclasses import dataclass, field
 from enum import Enum
-from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Union, cast
+from typing import Callable, Dict, Iterable, List, Optional, Union, cast
 
 from datahub.api.entities.datajob import DataFlow, DataJob
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_builder import DatahubKey
 from datahub.metadata.com.linkedin.pegasus2avro.dataprocess import (
@@ -26,10 +27,6 @@
 from datahub.utilities.urns.data_process_instance_urn import DataProcessInstanceUrn
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 
-if TYPE_CHECKING:
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-    from datahub.emitter.rest_emitter import DatahubRestEmitter
-
 
 class DataProcessInstanceKey(DatahubKey):
     cluster: str
@@ -106,7 +103,7 @@ def start_event_mcp(
 
     def emit_process_start(
         self,
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         start_timestamp_millis: int,
         attempt: Optional[int] = None,
         emit_template: bool = True,
@@ -197,7 +194,7 @@ def end_event_mcp(
 
     def emit_process_end(
         self,
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         end_timestamp_millis: int,
         result: InstanceRunResult,
         result_type: Optional[str] = None,
@@ -207,7 +204,7 @@ def emit_process_end(
         """
         Generate an DataProcessInstance finish event and emits is
 
-        :param emitter: (Union[DatahubRestEmitter, DatahubKafkaEmitter]) the datahub emitter to emit generated mcps
+        :param emitter: (Emitter) the datahub emitter to emit generated mcps
         :param end_timestamp_millis: (int) the end time of the execution in milliseconds
         :param result: (InstanceRunResult) The result of the run
         :param result_type: (string) It identifies the system where the native result comes from like Airflow, Azkaban
@@ -261,24 +258,24 @@ def generate_mcp(
     @staticmethod
     def _emit_mcp(
         mcp: MetadataChangeProposalWrapper,
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
         """
 
-        :param emitter: (Union[DatahubRestEmitter, DatahubKafkaEmitter]) the datahub emitter to emit generated mcps
+        :param emitter: (Emitter) the datahub emitter to emit generated mcps
         :param callback: (Optional[Callable[[Exception, str], None]]) the callback method for KafkaEmitter if it is used
         """
         emitter.emit(mcp, callback)
 
     def emit(
         self,
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
         """
 
-        :param emitter: (Union[DatahubRestEmitter, DatahubKafkaEmitter]) the datahub emitter to emit generated mcps
+        :param emitter: (Emitter) the datahub emitter to emit generated mcps
         :param callback: (Optional[Callable[[Exception, str], None]]) the callback method for KafkaEmitter if it is used
         """
         for mcp in self.generate_mcp():
diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
index 04f12b4f61d1e..2d9b14ceb2d06 100644
--- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
+++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
@@ -2,25 +2,15 @@
 
 import time
 from pathlib import Path
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    Dict,
-    Iterable,
-    List,
-    Optional,
-    Tuple,
-    Union,
-)
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
 
 import pydantic
 from ruamel.yaml import YAML
 
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import ConfigModel
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.emitter.rest_emitter import DatahubRestEmitter
 from datahub.ingestion.graph.client import DataHubGraph
 from datahub.metadata.schema_classes import (
     AuditStampClass,
@@ -43,9 +33,6 @@
 from datahub.utilities.registries.domain_registry import DomainRegistry
 from datahub.utilities.urns.urn import Urn
 
-if TYPE_CHECKING:
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-
 
 def patch_list(
     orig_list: Optional[list],
@@ -225,7 +212,6 @@ def _generate_properties_mcp(
     def generate_mcp(
         self, upsert: bool
     ) -> Iterable[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]:
-
         if self._resolved_domain_urn is None:
             raise Exception(
                 f"Unable to generate MCP-s because we were unable to resolve the domain {self.domain} to an urn."
@@ -282,7 +268,7 @@ def generate_mcp(
 
     def emit(
         self,
-        emitter: Union[DatahubRestEmitter, "DatahubKafkaEmitter"],
+        emitter: Emitter,
         upsert: bool,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
@@ -440,7 +426,6 @@ def patch_yaml(
         original_dataproduct: DataProduct,
         output_file: Path,
     ) -> bool:
-
         update_needed = False
         if not original_dataproduct._original_yaml_dict:
             raise Exception("Original Data Product was not loaded from yaml")
@@ -523,7 +508,6 @@ def to_yaml(
         self,
         file: Path,
     ) -> None:
-
         with open(file, "w") as fp:
             yaml = YAML(typ="rt")  # default, if not specfied, is 'rt' (round-trip)
             yaml.indent(mapping=2, sequence=4, offset=2)
diff --git a/metadata-ingestion/src/datahub/emitter/generic_emitter.py b/metadata-ingestion/src/datahub/emitter/generic_emitter.py
new file mode 100644
index 0000000000000..28138c6182758
--- /dev/null
+++ b/metadata-ingestion/src/datahub/emitter/generic_emitter.py
@@ -0,0 +1,31 @@
+from typing import Any, Callable, Optional, Union
+
+from typing_extensions import Protocol
+
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
+    MetadataChangeEvent,
+    MetadataChangeProposal,
+)
+
+
+class Emitter(Protocol):
+    def emit(
+        self,
+        item: Union[
+            MetadataChangeEvent,
+            MetadataChangeProposal,
+            MetadataChangeProposalWrapper,
+        ],
+        # NOTE: This signature should have the exception be optional rather than
+        #      required. However, this would be a breaking change that may need
+        #      more careful consideration.
+        callback: Optional[Callable[[Exception, str], None]] = None,
+        # TODO: The rest emitter returns timestamps as the return type. For now
+        # we smooth over that detail using Any, but eventually we should
+        # standardize on a return type.
+    ) -> Any:
+        raise NotImplementedError
+
+    def flush(self) -> None:
+        pass
diff --git a/metadata-ingestion/src/datahub/emitter/kafka_emitter.py b/metadata-ingestion/src/datahub/emitter/kafka_emitter.py
index ec0c8f3418a4a..781930011b78f 100644
--- a/metadata-ingestion/src/datahub/emitter/kafka_emitter.py
+++ b/metadata-ingestion/src/datahub/emitter/kafka_emitter.py
@@ -10,6 +10,7 @@
 from datahub.configuration.common import ConfigModel
 from datahub.configuration.kafka import KafkaProducerConnectionConfig
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.closeable import Closeable
 from datahub.metadata.schema_classes import (
@@ -55,7 +56,7 @@ def validate_topic_routes(cls, v: Dict[str, str]) -> Dict[str, str]:
         return v
 
 
-class DatahubKafkaEmitter(Closeable):
+class DatahubKafkaEmitter(Closeable, Emitter):
     def __init__(self, config: KafkaEmitterConfig):
         self.config = config
         schema_registry_conf = {
diff --git a/metadata-ingestion/src/datahub/emitter/rest_emitter.py b/metadata-ingestion/src/datahub/emitter/rest_emitter.py
index 937e0902d6d8c..afb19df9791af 100644
--- a/metadata-ingestion/src/datahub/emitter/rest_emitter.py
+++ b/metadata-ingestion/src/datahub/emitter/rest_emitter.py
@@ -4,7 +4,7 @@
 import logging
 import os
 from json.decoder import JSONDecodeError
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
 
 import requests
 from deprecated import deprecated
@@ -13,6 +13,7 @@
 
 from datahub.cli.cli_utils import get_system_auth
 from datahub.configuration.common import ConfigurationError, OperationalError
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.request_helper import make_curl_command
 from datahub.emitter.serialization_helper import pre_json_transform
@@ -23,6 +24,9 @@
 )
 from datahub.metadata.com.linkedin.pegasus2avro.usage import UsageAggregation
 
+if TYPE_CHECKING:
+    from datahub.ingestion.graph.client import DataHubGraph
+
 logger = logging.getLogger(__name__)
 
 _DEFAULT_CONNECT_TIMEOUT_SEC = 30  # 30 seconds should be plenty to connect
@@ -42,7 +46,7 @@
 )
 
 
-class DataHubRestEmitter(Closeable):
+class DataHubRestEmitter(Closeable, Emitter):
     _gms_server: str
     _token: Optional[str]
     _session: requests.Session
@@ -190,6 +194,11 @@ def test_connection(self) -> dict:
             message += "\nPlease check your configuration and make sure you are talking to the DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms)."
             raise ConfigurationError(message)
 
+    def to_graph(self) -> "DataHubGraph":
+        from datahub.ingestion.graph.client import DataHubGraph
+
+        return DataHubGraph.from_emitter(self)
+
     def emit(
         self,
         item: Union[
@@ -198,9 +207,6 @@ def emit(
             MetadataChangeProposalWrapper,
             UsageAggregation,
         ],
-        # NOTE: This signature should have the exception be optional rather than
-        #      required. However, this would be a breaking change that may need
-        #      more careful consideration.
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> Tuple[datetime.datetime, datetime.datetime]:
         start_time = datetime.datetime.now()
diff --git a/metadata-ingestion/src/datahub/emitter/synchronized_file_emitter.py b/metadata-ingestion/src/datahub/emitter/synchronized_file_emitter.py
new file mode 100644
index 0000000000000..f82882f1a87cc
--- /dev/null
+++ b/metadata-ingestion/src/datahub/emitter/synchronized_file_emitter.py
@@ -0,0 +1,60 @@
+import logging
+import pathlib
+from typing import Callable, Optional, Union
+
+import filelock
+
+from datahub.emitter.generic_emitter import Emitter
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.closeable import Closeable
+from datahub.ingestion.sink.file import write_metadata_file
+from datahub.ingestion.source.file import read_metadata_file
+from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
+    MetadataChangeEvent,
+    MetadataChangeProposal,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class SynchronizedFileEmitter(Closeable, Emitter):
+    """
+    A multiprocessing-safe emitter that writes to a file.
+
+    This emitter is intended for testing purposes only. It is not performant
+    because it reads and writes the full file on every emit call to ensure
+    that the file is always valid JSON.
+    """
+
+    def __init__(self, filename: str) -> None:
+        self._filename = pathlib.Path(filename)
+        self._lock = filelock.FileLock(self._filename.with_suffix(".lock"))
+
+    def emit(
+        self,
+        item: Union[
+            MetadataChangeEvent, MetadataChangeProposal, MetadataChangeProposalWrapper
+        ],
+        callback: Optional[Callable[[Exception, str], None]] = None,
+    ) -> None:
+        with self._lock:
+            if self._filename.exists():
+                metadata = list(read_metadata_file(self._filename))
+            else:
+                metadata = []
+
+            logger.debug("Emitting metadata: %s", item)
+            metadata.append(item)
+
+            write_metadata_file(self._filename, metadata)
+
+    def __repr__(self) -> str:
+        return f"SynchronizedFileEmitter('{self._filename}')"
+
+    def flush(self) -> None:
+        # No-op.
+        pass
+
+    def close(self) -> None:
+        # No-op.
+        pass
diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index 673ada4f73051..5120d4f643c94 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -138,6 +138,23 @@ def __init__(self, config: DatahubClientConfig) -> None:
             self.server_id = "missing"
             logger.debug(f"Failed to get server id due to {e}")
 
+    @classmethod
+    def from_emitter(cls, emitter: DatahubRestEmitter) -> "DataHubGraph":
+        return cls(
+            DatahubClientConfig(
+                server=emitter._gms_server,
+                token=emitter._token,
+                timeout_sec=emitter._read_timeout_sec,
+                retry_status_codes=emitter._retry_status_codes,
+                retry_max_times=emitter._retry_max_times,
+                extra_headers=emitter._session.headers,
+                disable_ssl_verification=emitter._session.verify is False,
+                # TODO: Support these headers.
+                # ca_certificate_path=emitter._ca_certificate_path,
+                # client_certificate_path=emitter._client_certificate_path,
+            )
+        )
+
     def _send_restli_request(self, method: str, url: str, **kwargs: Any) -> Dict:
         try:
             response = self._session.request(method, url, **kwargs)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
index f3344782917ab..5fae0ee5215a3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
@@ -28,7 +28,9 @@
 )
 from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.sql.sql_common import get_platform_from_sqlalchemy_uri
+from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
+    get_platform_from_sqlalchemy_uri,
+)
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StaleEntityRemovalHandler,
     StaleEntityRemovalSourceReport,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index 112defe76d957..056be6c2e50ac 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -1,12 +1,10 @@
 import datetime
 import logging
 import traceback
-from collections import OrderedDict
 from dataclasses import dataclass, field
 from typing import (
     TYPE_CHECKING,
     Any,
-    Callable,
     Dict,
     Iterable,
     List,
@@ -103,52 +101,6 @@
 MISSING_COLUMN_INFO = "missing column information"
 
 
-def _platform_alchemy_uri_tester_gen(
-    platform: str, opt_starts_with: Optional[str] = None
-) -> Tuple[str, Callable[[str], bool]]:
-    return platform, lambda x: x.startswith(
-        platform if not opt_starts_with else opt_starts_with
-    )
-
-
-PLATFORM_TO_SQLALCHEMY_URI_TESTER_MAP: Dict[str, Callable[[str], bool]] = OrderedDict(
-    [
-        _platform_alchemy_uri_tester_gen("athena", "awsathena"),
-        _platform_alchemy_uri_tester_gen("bigquery"),
-        _platform_alchemy_uri_tester_gen("clickhouse"),
-        _platform_alchemy_uri_tester_gen("druid"),
-        _platform_alchemy_uri_tester_gen("hana"),
-        _platform_alchemy_uri_tester_gen("hive"),
-        _platform_alchemy_uri_tester_gen("mongodb"),
-        _platform_alchemy_uri_tester_gen("mssql"),
-        _platform_alchemy_uri_tester_gen("mysql"),
-        _platform_alchemy_uri_tester_gen("oracle"),
-        _platform_alchemy_uri_tester_gen("pinot"),
-        _platform_alchemy_uri_tester_gen("presto"),
-        (
-            "redshift",
-            lambda x: (
-                x.startswith(("jdbc:postgres:", "postgresql"))
-                and x.find("redshift.amazonaws") > 0
-            )
-            or x.startswith("redshift"),
-        ),
-        # Don't move this before redshift.
-        _platform_alchemy_uri_tester_gen("postgres", "postgresql"),
-        _platform_alchemy_uri_tester_gen("snowflake"),
-        _platform_alchemy_uri_tester_gen("trino"),
-        _platform_alchemy_uri_tester_gen("vertica"),
-    ]
-)
-
-
-def get_platform_from_sqlalchemy_uri(sqlalchemy_uri: str) -> str:
-    for platform, tester in PLATFORM_TO_SQLALCHEMY_URI_TESTER_MAP.items():
-        if tester(sqlalchemy_uri):
-            return platform
-    return "external"
-
-
 @dataclass
 class SQLSourceReport(StaleEntityRemovalSourceReport):
     tables_scanned: int = 0
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py
new file mode 100644
index 0000000000000..b6a463837228d
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py
@@ -0,0 +1,47 @@
+from collections import OrderedDict
+from typing import Callable, Dict, Optional, Tuple
+
+
+def _platform_alchemy_uri_tester_gen(
+    platform: str, opt_starts_with: Optional[str] = None
+) -> Tuple[str, Callable[[str], bool]]:
+    return platform, lambda x: x.startswith(opt_starts_with or platform)
+
+
+PLATFORM_TO_SQLALCHEMY_URI_TESTER_MAP: Dict[str, Callable[[str], bool]] = OrderedDict(
+    [
+        _platform_alchemy_uri_tester_gen("athena", "awsathena"),
+        _platform_alchemy_uri_tester_gen("bigquery"),
+        _platform_alchemy_uri_tester_gen("clickhouse"),
+        _platform_alchemy_uri_tester_gen("druid"),
+        _platform_alchemy_uri_tester_gen("hana"),
+        _platform_alchemy_uri_tester_gen("hive"),
+        _platform_alchemy_uri_tester_gen("mongodb"),
+        _platform_alchemy_uri_tester_gen("mssql"),
+        _platform_alchemy_uri_tester_gen("mysql"),
+        _platform_alchemy_uri_tester_gen("oracle"),
+        _platform_alchemy_uri_tester_gen("pinot"),
+        _platform_alchemy_uri_tester_gen("presto"),
+        (
+            "redshift",
+            lambda x: (
+                x.startswith(("jdbc:postgres:", "postgresql"))
+                and x.find("redshift.amazonaws") > 0
+            )
+            or x.startswith("redshift"),
+        ),
+        # Don't move this before redshift.
+        _platform_alchemy_uri_tester_gen("postgres", "postgresql"),
+        _platform_alchemy_uri_tester_gen("snowflake"),
+        _platform_alchemy_uri_tester_gen("sqlite"),
+        _platform_alchemy_uri_tester_gen("trino"),
+        _platform_alchemy_uri_tester_gen("vertica"),
+    ]
+)
+
+
+def get_platform_from_sqlalchemy_uri(sqlalchemy_uri: str) -> str:
+    for platform, tester in PLATFORM_TO_SQLALCHEMY_URI_TESTER_MAP.items():
+        if tester(sqlalchemy_uri):
+            return platform
+    return "external"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py
index 2a4563439b6ba..14bc4242d2a91 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/superset.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py
@@ -21,7 +21,9 @@
 )
 from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.sql import sql_common
+from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
+    get_platform_from_sqlalchemy_uri,
+)
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StaleEntityRemovalHandler,
     StaleEntityRemovalSourceReport,
@@ -202,7 +204,7 @@ def get_platform_from_database_id(self, database_id):
         sqlalchemy_uri = database_response.get("result", {}).get("sqlalchemy_uri")
         if sqlalchemy_uri is None:
             return database_response.get("result", {}).get("backend", "external")
-        return sql_common.get_platform_from_sqlalchemy_uri(sqlalchemy_uri)
+        return get_platform_from_sqlalchemy_uri(sqlalchemy_uri)
 
     @lru_cache(maxsize=None)
     def get_datasource_urn_from_id(self, datasource_id):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
index 4cc00a66116e9..6214cba342622 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
@@ -1179,8 +1179,6 @@ def get_upstream_fields_of_field_in_datasource(
     def get_upstream_fields_from_custom_sql(
         self, datasource: dict, datasource_urn: str
     ) -> List[FineGrainedLineage]:
-        fine_grained_lineages: List[FineGrainedLineage] = []
-
         parsed_result = self.parse_custom_sql(
             datasource=datasource,
             datasource_urn=datasource_urn,
@@ -1194,13 +1192,20 @@ def get_upstream_fields_from_custom_sql(
             logger.info(
                 f"Failed to extract column level lineage from datasource {datasource_urn}"
             )
-            return fine_grained_lineages
+            return []
+        if parsed_result.debug_info.error:
+            logger.info(
+                f"Failed to extract column level lineage from datasource {datasource_urn}: {parsed_result.debug_info.error}"
+            )
+            return []
 
         cll: List[ColumnLineageInfo] = (
             parsed_result.column_lineage
             if parsed_result.column_lineage is not None
             else []
         )
+
+        fine_grained_lineages: List[FineGrainedLineage] = []
         for cll_info in cll:
             downstream = (
                 [
diff --git a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
index eabf62a4cda2b..f116550328819 100644
--- a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
+++ b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
@@ -35,7 +35,9 @@
 from datahub.cli.cli_utils import get_boolean_env_variable
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.rest_emitter import DatahubRestEmitter
-from datahub.ingestion.source.sql.sql_common import get_platform_from_sqlalchemy_uri
+from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
+    get_platform_from_sqlalchemy_uri,
+)
 from datahub.metadata.com.linkedin.pegasus2avro.assertion import (
     AssertionInfo,
     AssertionResult,
diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
index 5c52e1ab4f0b3..54f6a6e984c00 100644
--- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
+++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
@@ -40,6 +40,7 @@ def assert_metadata_files_equal(
     update_golden: bool,
     copy_output: bool,
     ignore_paths: Sequence[str] = (),
+    ignore_order: bool = True,
 ) -> None:
     golden_exists = os.path.isfile(golden_path)
 
@@ -65,7 +66,7 @@ def assert_metadata_files_equal(
             write_metadata_file(pathlib.Path(temp.name), golden_metadata)
             golden = load_json_file(temp.name)
 
-    diff = diff_metadata_json(output, golden, ignore_paths)
+    diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order)
     if diff and update_golden:
         if isinstance(diff, MCPDiff):
             diff.apply_delta(golden)
@@ -91,16 +92,19 @@ def diff_metadata_json(
     output: MetadataJson,
     golden: MetadataJson,
     ignore_paths: Sequence[str] = (),
+    ignore_order: bool = True,
 ) -> Union[DeepDiff, MCPDiff]:
     ignore_paths = (*ignore_paths, *default_exclude_paths, r"root\[\d+].delta_info")
     try:
-        golden_map = get_aspects_by_urn(golden)
-        output_map = get_aspects_by_urn(output)
-        return MCPDiff.create(
-            golden=golden_map,
-            output=output_map,
-            ignore_paths=ignore_paths,
-        )
+        if ignore_order:
+            golden_map = get_aspects_by_urn(golden)
+            output_map = get_aspects_by_urn(output)
+            return MCPDiff.create(
+                golden=golden_map,
+                output=output_map,
+                ignore_paths=ignore_paths,
+            )
+        # if ignore_order is False, always use DeepDiff
     except CannotCompareMCPs as e:
         logger.info(f"{e}, falling back to MCE diff")
     except AssertionError as e:
@@ -111,5 +115,5 @@ def diff_metadata_json(
         golden,
         output,
         exclude_regex_paths=ignore_paths,
-        ignore_order=True,
+        ignore_order=ignore_order,
     )
diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index f18235af3d1fd..4b3090eaaad31 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -231,6 +231,13 @@ def _table_level_lineage(
         # In some cases like "MERGE ... then INSERT (col1, col2) VALUES (col1, col2)",
         # the `this` on the INSERT part isn't a table.
         if isinstance(expr.this, sqlglot.exp.Table)
+    } | {
+        # For CREATE DDL statements, the table name is nested inside
+        # a Schema object.
+        _TableName.from_sqlglot_table(expr.this.this)
+        for expr in statement.find_all(sqlglot.exp.Create)
+        if isinstance(expr.this, sqlglot.exp.Schema)
+        and isinstance(expr.this.this, sqlglot.exp.Table)
     }
 
     tables = (
@@ -242,7 +249,7 @@ def _table_level_lineage(
         - modified
         # ignore CTEs created in this statement
         - {
-            _TableName(database=None, schema=None, table=cte.alias_or_name)
+            _TableName(database=None, db_schema=None, table=cte.alias_or_name)
             for cte in statement.find_all(sqlglot.exp.CTE)
         }
     )
@@ -906,32 +913,39 @@ def create_lineage_sql_parsed_result(
     env: str,
     schema: Optional[str] = None,
     graph: Optional[DataHubGraph] = None,
-) -> Optional["SqlParsingResult"]:
-    parsed_result: Optional["SqlParsingResult"] = None
+) -> SqlParsingResult:
+    needs_close = False
     try:
-        schema_resolver = (
-            graph._make_schema_resolver(
+        if graph:
+            schema_resolver = graph._make_schema_resolver(
                 platform=platform,
                 platform_instance=platform_instance,
                 env=env,
             )
-            if graph is not None
-            else SchemaResolver(
+        else:
+            needs_close = True
+            schema_resolver = SchemaResolver(
                 platform=platform,
                 platform_instance=platform_instance,
                 env=env,
                 graph=None,
             )
-        )
 
-        parsed_result = sqlglot_lineage(
+        return sqlglot_lineage(
             query,
             schema_resolver=schema_resolver,
             default_db=database,
             default_schema=schema,
         )
     except Exception as e:
-        logger.debug(f"Fail to prase query {query}", exc_info=e)
-        logger.warning("Fail to parse custom SQL")
-
-    return parsed_result
+        return SqlParsingResult(
+            in_tables=[],
+            out_tables=[],
+            column_lineage=None,
+            debug_info=SqlParsingDebugInfo(
+                table_error=e,
+            ),
+        )
+    finally:
+        if needs_close:
+            schema_resolver.close()
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json
new file mode 100644
index 0000000000000..4773974545bfa
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json
@@ -0,0 +1,8 @@
+{
+    "query_type": "CREATE",
+    "in_tables": [],
+    "out_tables": [
+        "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)"
+    ],
+    "column_lineage": null
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index 483c1ac4cc7f9..2a965a9bb1e61 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -274,6 +274,21 @@ def test_expand_select_star_basic():
     )
 
 
+def test_create_table_ddl():
+    assert_sql_result(
+        """
+CREATE TABLE IF NOT EXISTS costs (
+    id INTEGER PRIMARY KEY,
+    month TEXT NOT NULL,
+    total_cost REAL NOT NULL,
+    area REAL NOT NULL
+)
+""",
+        dialect="sqlite",
+        expected_file=RESOURCE_DIR / "test_create_table_ddl.json",
+    )
+
+
 def test_snowflake_column_normalization():
     # Technically speaking this is incorrect since the column names are different and both quoted.
 
diff --git a/metadata-ingestion/tests/unit/test_sql_common.py b/metadata-ingestion/tests/unit/test_sql_common.py
index 95af0e623e991..808b38192411d 100644
--- a/metadata-ingestion/tests/unit/test_sql_common.py
+++ b/metadata-ingestion/tests/unit/test_sql_common.py
@@ -4,12 +4,11 @@
 import pytest
 from sqlalchemy.engine.reflection import Inspector
 
-from datahub.ingestion.source.sql.sql_common import (
-    PipelineContext,
-    SQLAlchemySource,
+from datahub.ingestion.source.sql.sql_common import PipelineContext, SQLAlchemySource
+from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
+from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
     get_platform_from_sqlalchemy_uri,
 )
-from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
 
 
 class _TestSQLAlchemyConfig(SQLCommonConfig):

From e3780c2d75e4dc4dc95e83476d103a4454ee2aae Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Wed, 4 Oct 2023 16:23:31 +0530
Subject: [PATCH 25/25] =?UTF-8?q?feat(ingest/snowflake):=20initialize=20sc?=
 =?UTF-8?q?hema=20resolver=20from=20datahub=20for=20l=E2=80=A6=20(#8903)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/datahub/ingestion/graph/client.py     |  8 ++---
 .../ingestion/source/bigquery_v2/bigquery.py  |  2 +-
 .../source/snowflake/snowflake_config.py      |  4 +--
 .../source/snowflake/snowflake_v2.py          | 33 ++++++++++++-------
 .../datahub/ingestion/source/sql_queries.py   |  5 ++-
 .../src/datahub/utilities/sqlglot_lineage.py  |  5 +--
 6 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index 5120d4f643c94..ccff677c3a471 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -7,7 +7,7 @@
 from dataclasses import dataclass
 from datetime import datetime
 from json.decoder import JSONDecodeError
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Type
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Type
 
 from avro.schema import RecordSchema
 from deprecated import deprecated
@@ -1010,14 +1010,13 @@ def _make_schema_resolver(
 
     def initialize_schema_resolver_from_datahub(
         self, platform: str, platform_instance: Optional[str], env: str
-    ) -> Tuple["SchemaResolver", Set[str]]:
+    ) -> "SchemaResolver":
         logger.info("Initializing schema resolver")
         schema_resolver = self._make_schema_resolver(
             platform, platform_instance, env, include_graph=False
         )
 
         logger.info(f"Fetching schemas for platform {platform}, env {env}")
-        urns = []
         count = 0
         with PerfTimer() as timer:
             for urn, schema_info in self._bulk_fetch_schema_info_by_filter(
@@ -1026,7 +1025,6 @@ def initialize_schema_resolver_from_datahub(
                 env=env,
             ):
                 try:
-                    urns.append(urn)
                     schema_resolver.add_graphql_schema_metadata(urn, schema_info)
                     count += 1
                 except Exception:
@@ -1041,7 +1039,7 @@ def initialize_schema_resolver_from_datahub(
             )
 
         logger.info("Finished initializing schema resolver")
-        return schema_resolver, set(urns)
+        return schema_resolver
 
     def parse_sql_lineage(
         self,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index 8a16b1a4a5f6b..f6adbcf033bcc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -458,7 +458,7 @@ def _init_schema_resolver(self) -> SchemaResolver:
                     platform=self.platform,
                     platform_instance=self.config.platform_instance,
                     env=self.config.env,
-                )[0]
+                )
             else:
                 logger.warning(
                     "Failed to load schema info from DataHub as DataHubGraph is missing.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
index 95f6444384408..032bdef178fdf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
@@ -101,8 +101,8 @@ class SnowflakeV2Config(
     )
 
     include_view_column_lineage: bool = Field(
-        default=False,
-        description="Populates view->view and table->view column lineage.",
+        default=True,
+        description="Populates view->view and table->view column lineage using DataHub's sql parser.",
     )
 
     _check_role_grants_removed = pydantic_removed_field("check_role_grants")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index 240e0ffa1a0b6..215116b4c33fb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -301,14 +301,11 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config):
         # Caches tables for a single database. Consider moving to disk or S3 when possible.
         self.db_tables: Dict[str, List[SnowflakeTable]] = {}
 
-        self.sql_parser_schema_resolver = SchemaResolver(
-            platform=self.platform,
-            platform_instance=self.config.platform_instance,
-            env=self.config.env,
-        )
         self.view_definitions: FileBackedDict[str] = FileBackedDict()
         self.add_config_to_report()
 
+        self.sql_parser_schema_resolver = self._init_schema_resolver()
+
     @classmethod
     def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
         config = SnowflakeV2Config.parse_obj(config_dict)
@@ -493,6 +490,24 @@ def query(query):
 
         return _report
 
+    def _init_schema_resolver(self) -> SchemaResolver:
+        if not self.config.include_technical_schema and self.config.parse_view_ddl:
+            if self.ctx.graph:
+                return self.ctx.graph.initialize_schema_resolver_from_datahub(
+                    platform=self.platform,
+                    platform_instance=self.config.platform_instance,
+                    env=self.config.env,
+                )
+            else:
+                logger.warning(
+                    "Failed to load schema info from DataHub as DataHubGraph is missing.",
+                )
+        return SchemaResolver(
+            platform=self.platform,
+            platform_instance=self.config.platform_instance,
+            env=self.config.env,
+        )
+
     def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
         return [
             *super().get_workunit_processors(),
@@ -764,7 +779,7 @@ def _process_schema(
             )
             self.db_tables[schema_name] = tables
 
-            if self.config.include_technical_schema or self.config.parse_view_ddl:
+            if self.config.include_technical_schema:
                 for table in tables:
                     yield from self._process_table(table, schema_name, db_name)
 
@@ -776,7 +791,7 @@ def _process_schema(
                     if view.view_definition:
                         self.view_definitions[key] = view.view_definition
 
-            if self.config.include_technical_schema or self.config.parse_view_ddl:
+            if self.config.include_technical_schema:
                 for view in views:
                     yield from self._process_view(view, schema_name, db_name)
 
@@ -892,8 +907,6 @@ def _process_table(
                     yield from self._process_tag(tag)
 
             yield from self.gen_dataset_workunits(table, schema_name, db_name)
-        elif self.config.parse_view_ddl:
-            self.gen_schema_metadata(table, schema_name, db_name)
 
     def fetch_sample_data_for_classification(
         self, table: SnowflakeTable, schema_name: str, db_name: str, dataset_name: str
@@ -1004,8 +1017,6 @@ def _process_view(
                     yield from self._process_tag(tag)
 
             yield from self.gen_dataset_workunits(view, schema_name, db_name)
-        elif self.config.parse_view_ddl:
-            self.gen_schema_metadata(view, schema_name, db_name)
 
     def _process_tag(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]:
         tag_identifier = tag.identifier()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
index 2fcc93292c2ef..bce4d1ec76e6e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
@@ -103,13 +103,12 @@ def __init__(self, ctx: PipelineContext, config: SqlQueriesSourceConfig):
         self.builder = SqlParsingBuilder(usage_config=self.config.usage)
 
         if self.config.use_schema_resolver:
-            schema_resolver, urns = self.graph.initialize_schema_resolver_from_datahub(
+            self.schema_resolver = self.graph.initialize_schema_resolver_from_datahub(
                 platform=self.config.platform,
                 platform_instance=self.config.platform_instance,
                 env=self.config.env,
             )
-            self.schema_resolver = schema_resolver
-            self.urns = urns
+            self.urns = self.schema_resolver.get_urns()
         else:
             self.schema_resolver = self.graph._make_schema_resolver(
                 platform=self.config.platform,
diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index 4b3090eaaad31..81c43884fdf7d 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -283,6 +283,9 @@ def __init__(
             shared_connection=shared_conn,
         )
 
+    def get_urns(self) -> Set[str]:
+        return set(self._schema_cache.keys())
+
     def get_urn_for_table(self, table: _TableName, lower: bool = False) -> str:
         # TODO: Validate that this is the correct 2/3 layer hierarchy for the platform.
 
@@ -397,8 +400,6 @@ def convert_graphql_schema_metadata_to_info(
             )
         }
 
-    # TODO add a method to load all from graphql
-
     def close(self) -> None:
         self._schema_cache.close()