diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index a1bfc4d39a370..e910449ed5870 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -7,6 +7,7 @@ on:
     paths:
       - "metadata-ingestion/**"
       - "metadata-models/**"
+      - "docs/**"
       - "docs-website/**"
   push:
     branches:
@@ -14,6 +15,7 @@ on:
     paths:
       - "metadata-ingestion/**"
       - "metadata-models/**"
+      - "docs/**"
       - "docs-website/**"
   # release:
   #   types: [published, edited]
diff --git a/.github/workflows/pr-labeler.yml b/.github/workflows/pr-labeler.yml
index fc6bdb856816f..4fdb4a5b92e4d 100644
--- a/.github/workflows/pr-labeler.yml
+++ b/.github/workflows/pr-labeler.yml
@@ -43,7 +43,9 @@ jobs:
                 "gaurav2733",
                 "dushayntAW",
                 "AvaniSiddhapuraAPT",
-                "akarsh991"
+                "akarsh991",
+                "shubhamjagtap639",
+                "mayurinehate"
               ]'), 
               github.actor
             ) 
diff --git a/build.gradle b/build.gradle
index 833dbaeb21d94..9c18ca62fb3c1 100644
--- a/build.gradle
+++ b/build.gradle
@@ -107,7 +107,7 @@ project.ext.externalDependency = [
     'avro': 'org.apache.avro:avro:1.11.3',
     'avroCompiler': 'org.apache.avro:avro-compiler:1.11.3',
     'awsGlueSchemaRegistrySerde': 'software.amazon.glue:schema-registry-serde:1.1.17',
-    'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:1.1.9',
+    'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:2.0.3',
     'awsSecretsManagerJdbc': 'com.amazonaws.secretsmanager:aws-secretsmanager-jdbc:1.0.13',
     'awsPostgresIamAuth': 'software.amazon.jdbc:aws-advanced-jdbc-wrapper:1.0.2',
     'awsRds':'software.amazon.awssdk:rds:2.18.24',
diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle
index 782f9a05dfb25..8ee07db0fde30 100644
--- a/datahub-upgrade/build.gradle
+++ b/datahub-upgrade/build.gradle
@@ -69,9 +69,7 @@ dependencies {
   runtimeOnly externalDependency.mysqlConnector
   runtimeOnly externalDependency.postgresql
 
-  implementation(externalDependency.awsMskIamAuth) {
-    exclude group: 'software.amazon.awssdk', module: 'third-party-jackson-core'
-  }
+  implementation externalDependency.awsMskIamAuth
 
   annotationProcessor externalDependency.lombok
   annotationProcessor externalDependency.picocli
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx
index 26228e8c44515..2d93f3cc73470 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx
@@ -106,6 +106,8 @@ type Props = {
     shouldRefetch?: boolean;
     resetShouldRefetch?: () => void;
     applyView?: boolean;
+    onLineageClick?: () => void;
+    isLineageTab?: boolean;
 };
 
 export const EmbeddedListSearch = ({
@@ -134,6 +136,8 @@ export const EmbeddedListSearch = ({
     shouldRefetch,
     resetShouldRefetch,
     applyView = false,
+    onLineageClick,
+    isLineageTab = false,
 }: Props) => {
     const { shouldRefetchEmbeddedListSearch, setShouldRefetchEmbeddedListSearch } = useEntityContext();
     // Adjust query based on props
@@ -143,7 +147,6 @@ export const EmbeddedListSearch = ({
         unionType,
         filters,
     };
-
     const finalFilters =
         (fixedFilters && mergeFilterSets(fixedFilters, baseFilters)) || generateOrFilters(unionType, filters);
 
@@ -191,6 +194,12 @@ export const EmbeddedListSearch = ({
         fetchPolicy: 'cache-first',
     });
 
+    const [serverError, setServerError] = useState<any>(undefined);
+
+    useEffect(() => {
+        setServerError(error);
+    }, [error]);
+
     useEffect(() => {
         if (shouldRefetch && resetShouldRefetch) {
             refetch({
@@ -282,9 +291,18 @@ export const EmbeddedListSearch = ({
         });
     }
 
+    const isServerOverloadError = [503, 500, 504].includes(serverError?.networkError?.response?.status);
+
+    const onClickLessHops = () => {
+        setServerError(undefined);
+        onChangeFilters(defaultFilters);
+    };
+
+    const ErrorMessage = () => <Message type="error" content="Failed to load results! An unexpected error occurred." />;
+
     return (
         <Container>
-            {error && <Message type="error" content="Failed to load results! An unexpected error occurred." />}
+            {!isLineageTab ? error && <ErrorMessage /> : serverError && !isServerOverloadError && <ErrorMessage />}
             <EmbeddedListSearchHeader
                 onSearch={(q) => onChangeQuery(addFixedQuery(q, fixedQuery as string, emptySearchQuery as string))}
                 placeholderText={placeholderText}
@@ -303,6 +321,10 @@ export const EmbeddedListSearch = ({
             />
             <EmbeddedListSearchResults
                 unionType={unionType}
+                isServerOverloadError={isServerOverloadError}
+                onClickLessHops={onClickLessHops}
+                onLineageClick={onLineageClick}
+                isLineageTab={isLineageTab}
                 loading={loading}
                 searchResponse={data}
                 filters={finalFacets}
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx
index 80fc2aa223fdf..b75f4f10005ff 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx
@@ -1,5 +1,5 @@
 import React from 'react';
-import { Pagination, Spin, Typography } from 'antd';
+import { Button, Pagination, Spin, Typography } from 'antd';
 import { LoadingOutlined } from '@ant-design/icons';
 import styled from 'styled-components';
 import { FacetFilterInput, FacetMetadata, SearchResults as SearchResultType } from '../../../../../../types.generated';
@@ -66,6 +66,20 @@ const StyledLoading = styled(LoadingOutlined)`
     padding-bottom: 18px;
 ]`;
 
+const ErrorMessage = styled.div`
+    padding-top: 70px;
+    font-size: 16px;
+    padding-bottom: 40px;
+    width: 100%;
+    text-align: center;
+    flex: 1;
+`;
+
+const StyledLinkButton = styled(Button)`
+    margin: 0 -14px;
+    font-size: 16px;
+`;
+
 interface Props {
     page: number;
     searchResponse?: SearchResultType | null;
@@ -84,6 +98,10 @@ interface Props {
     setNumResultsPerPage: (numResults: number) => void;
     entityAction?: React.FC<EntityActionProps>;
     applyView?: boolean;
+    isServerOverloadError?: any;
+    onClickLessHops?: () => void;
+    onLineageClick?: () => void;
+    isLineageTab?: boolean;
 }
 
 export const EmbeddedListSearchResults = ({
@@ -104,6 +122,10 @@ export const EmbeddedListSearchResults = ({
     setNumResultsPerPage,
     entityAction,
     applyView,
+    isServerOverloadError,
+    onClickLessHops,
+    onLineageClick,
+    isLineageTab = false,
 }: Props) => {
     const pageStart = searchResponse?.start || 0;
     const pageSize = searchResponse?.count || 0;
@@ -131,7 +153,19 @@ export const EmbeddedListSearchResults = ({
                             <Spin indicator={<StyledLoading />} />
                         </LoadingContainer>
                     )}
-                    {!loading && (
+                    {isLineageTab && !loading && isServerOverloadError && (
+                        <ErrorMessage>
+                            Data is too large. Please use
+                            <StyledLinkButton onClick={onLineageClick} type="link">
+                                visualize lineage
+                            </StyledLinkButton>
+                            or see less hops by clicking
+                            <StyledLinkButton onClick={onClickLessHops} type="link">
+                                here
+                            </StyledLinkButton>
+                        </ErrorMessage>
+                    )}
+                    {!loading && !isServerOverloadError && (
                         <EntitySearchResults
                             searchResults={searchResponse?.searchResults || []}
                             additionalPropertiesList={
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchSection.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchSection.tsx
index 2f688af218d98..1ad9e44bf1958 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchSection.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchSection.tsx
@@ -52,6 +52,8 @@ type Props = {
     shouldRefetch?: boolean;
     resetShouldRefetch?: () => void;
     applyView?: boolean;
+    onLineageClick?: () => void;
+    isLineageTab?: boolean;
 };
 
 export const EmbeddedListSearchSection = ({
@@ -69,6 +71,8 @@ export const EmbeddedListSearchSection = ({
     shouldRefetch,
     resetShouldRefetch,
     applyView,
+    onLineageClick,
+    isLineageTab
 }: Props) => {
     const history = useHistory();
     const location = useLocation();
@@ -155,6 +159,8 @@ export const EmbeddedListSearchSection = ({
             shouldRefetch={shouldRefetch}
             resetShouldRefetch={resetShouldRefetch}
             applyView={applyView}
+            onLineageClick={onLineageClick}
+            isLineageTab={isLineageTab}
         />
     );
 };
diff --git a/datahub-web-react/src/app/entity/shared/tabs/Lineage/ImpactAnalysis.tsx b/datahub-web-react/src/app/entity/shared/tabs/Lineage/ImpactAnalysis.tsx
index ce5a1598a00ec..4f1c5bb98807d 100644
--- a/datahub-web-react/src/app/entity/shared/tabs/Lineage/ImpactAnalysis.tsx
+++ b/datahub-web-react/src/app/entity/shared/tabs/Lineage/ImpactAnalysis.tsx
@@ -13,6 +13,8 @@ type Props = {
     skipCache?: boolean;
     setSkipCache?: (skipCache: boolean) => void;
     resetShouldRefetch?: () => void;
+    onLineageClick?: () => void;
+    isLineageTab?: boolean;
 };
 
 export const ImpactAnalysis = ({
@@ -24,6 +26,8 @@ export const ImpactAnalysis = ({
     skipCache,
     setSkipCache,
     resetShouldRefetch,
+    onLineageClick,
+    isLineageTab
 }: Props) => {
     const finalStartTimeMillis = startTimeMillis || undefined;
     const finalEndTimeMillis = endTimeMillis || undefined;
@@ -49,6 +53,8 @@ export const ImpactAnalysis = ({
             defaultFilters={[{ field: 'degree', values: ['1'] }]}
             shouldRefetch={shouldRefetch}
             resetShouldRefetch={resetShouldRefetch}
+            onLineageClick={onLineageClick}
+            isLineageTab={isLineageTab}
         />
     );
 };
diff --git a/datahub-web-react/src/app/entity/shared/tabs/Lineage/LineageTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Lineage/LineageTab.tsx
index bbc86d49404a6..a5debe43d4f0f 100644
--- a/datahub-web-react/src/app/entity/shared/tabs/Lineage/LineageTab.tsx
+++ b/datahub-web-react/src/app/entity/shared/tabs/Lineage/LineageTab.tsx
@@ -181,6 +181,8 @@ export const LineageTab = ({
             <LineageTabContext.Provider value={{ isColumnLevelLineage, selectedColumn, lineageDirection }}>
                 <ImpactAnalysis
                     urn={impactAnalysisUrn}
+                    onLineageClick={routeToLineage}
+                    isLineageTab
                     direction={lineageDirection as LineageDirection}
                     startTimeMillis={startTimeMillis}
                     endTimeMillis={endTimeMillis}
diff --git a/docker/kafka-setup/Dockerfile b/docker/kafka-setup/Dockerfile
index 6f8b52c5e0bb6..7265e35a85cab 100644
--- a/docker/kafka-setup/Dockerfile
+++ b/docker/kafka-setup/Dockerfile
@@ -52,8 +52,8 @@ RUN ls -la
 COPY --from=confluent_base /usr/share/java/cp-base-new/ /usr/share/java/cp-base-new/
 COPY --from=confluent_base /etc/cp-base-new/log4j.properties /etc/cp-base-new/log4j.properties
 
-ADD --chown=kafka:kafka ${GITHUB_REPO_URL}/aws/aws-msk-iam-auth/releases/download/v1.1.6/aws-msk-iam-auth-1.1.6-all.jar /usr/share/java/cp-base-new
-ADD --chown=kafka:kafka ${GITHUB_REPO_URL}/aws/aws-msk-iam-auth/releases/download/v1.1.6/aws-msk-iam-auth-1.1.6-all.jar /opt/kafka/libs
+ADD --chown=kafka:kafka ${GITHUB_REPO_URL}/aws/aws-msk-iam-auth/releases/download/v2.0.3/aws-msk-iam-auth-2.0.3-all.jar /usr/share/java/cp-base-new
+ADD --chown=kafka:kafka ${GITHUB_REPO_URL}/aws/aws-msk-iam-auth/releases/download/v2.0.3/aws-msk-iam-auth-2.0.3-all.jar /opt/kafka/libs
 
 ENV METADATA_AUDIT_EVENT_NAME="MetadataAuditEvent_v4"
 ENV METADATA_CHANGE_EVENT_NAME="MetadataChangeEvent_v4"
diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js
index 3e948e07ecbb8..52392d23b3957 100644
--- a/docs-website/docusaurus.config.js
+++ b/docs-website/docusaurus.config.js
@@ -66,7 +66,7 @@ module.exports = {
       },
       items: [
         {
-          to: "docs/",
+          to: "docs/features",
           activeBasePath: "docs",
           label: "Docs",
           position: "right",
@@ -137,12 +137,28 @@ module.exports = {
           dropdownActiveClassDisabled: true,
             dropdownItemsAfter: [
                 {
-                href: "https://docs-website-irpoe2osc-acryldata.vercel.app/docs/",
-                label: "0.11.0",
+                type: 'html',
+                value: '<hr class="dropdown-separator" style="margin: 0.4rem;">',
                 },
                 {
-                href: "https://docs-website-1gv2yzn9d-acryldata.vercel.app/docs/",
-                label: "0.10.5",
+                type: 'html',
+                value: '<div class="dropdown__link"><b>Archived versions</b></div>',
+                },
+                {
+                value: `
+                   <a class="dropdown__link" href="https://docs-website-irpoe2osc-acryldata.vercel.app/docs/features">0.11.0
+                   <svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
+                   </a>
+                   `,
+                type: "html",
+                },
+                {
+                value: `
+                   <a class="dropdown__link" href="https://docs-website-1gv2yzn9d-acryldata.vercel.app/docs/features">0.10.5
+                   <svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg>
+                   </a>
+                   `,
+                type: "html",
                 },
             ],
         },
diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 13bda5d735f3e..2b60906b794a2 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -173,6 +173,7 @@ module.exports = {
         },
         {
           "Managed DataHub Release History": [
+            "docs/managed-datahub/release-notes/v_0_2_16",
             "docs/managed-datahub/release-notes/v_0_2_15",
             "docs/managed-datahub/release-notes/v_0_2_14",
             "docs/managed-datahub/release-notes/v_0_2_13",
diff --git a/docs-website/src/pages/docs/_components/FeatureCard/featurecard.module.scss b/docs-website/src/pages/docs/_components/FeatureCard/featurecard.module.scss
index 61739d5b6922c..69558d986ada9 100644
--- a/docs-website/src/pages/docs/_components/FeatureCard/featurecard.module.scss
+++ b/docs-website/src/pages/docs/_components/FeatureCard/featurecard.module.scss
@@ -1,11 +1,15 @@
+@media (min-width: 997px) and (max-width: 1465px) {
+  .feature {
+    min-height: 20rem !important;
+    max-height: 30rem !important;
+  }
+}
 .feature {
   flex-direction: row;
-  padding: 1.75rem;
   color: var(--ifm-hero-text-color);
   margin: 0rem 2rem 1rem 0rem;
-  min-height: 14rem;
-  max-height: 15rem; 
-  overflow: hidden;
+  min-height: 15rem;
+  max-height: 15rem;
   text-decoration: none !important;
 
   img {
@@ -36,3 +40,4 @@
     border-color: var(--ifm-color-primary);
   }
 }
+
diff --git a/docs-website/src/pages/docs/_components/FeatureCard/index.jsx b/docs-website/src/pages/docs/_components/FeatureCard/index.jsx
index 407e8eb401987..8fb24493e50e9 100644
--- a/docs-website/src/pages/docs/_components/FeatureCard/index.jsx
+++ b/docs-website/src/pages/docs/_components/FeatureCard/index.jsx
@@ -8,7 +8,7 @@ const FeatureCard = ({icon, title, description, to}) => {
 return (
     <div className="col col--4">
       <Link to={useBaseUrl(to)} className={clsx("card", styles.feature)}>
-        <div> 
+        <div className={styles.card_content}>
           {icon}
           <strong>{title}&nbsp;→</strong>
           <span>{description}</span>
diff --git a/docs-website/src/pages/docs/_components/QuickstartCard/index.jsx b/docs-website/src/pages/docs/_components/QuickstartCard/index.jsx
index b4e3895fa40e7..d23901506dcce 100644
--- a/docs-website/src/pages/docs/_components/QuickstartCard/index.jsx
+++ b/docs-website/src/pages/docs/_components/QuickstartCard/index.jsx
@@ -9,9 +9,13 @@ const QuickstartCard = ({ icon, title, to, color, fontColor }) => {
   return (
     <div className="col col--6">
       <Link to={to} className={clsx("card", styles.feature)} style={{ background: color, color: fontColor}}>
-        <img src={useBaseUrl(`/img/${icon}.svg`)} />
-        <div style={{ margin: "auto 0"}}>
-          <strong>{title}&nbsp;→</strong>
+        <div className={styles.card_content}>
+            <img src={useBaseUrl(`/img/${icon}.svg`)} />
+            <div style={{ margin: "auto 0"}}>
+              <div className={styles.card_title}>
+                {title}&nbsp;→
+              </div>
+            </div>
         </div>
       </Link>
     </div>
diff --git a/docs-website/src/pages/docs/_components/QuickstartCard/quickstartcard.module.scss b/docs-website/src/pages/docs/_components/QuickstartCard/quickstartcard.module.scss
index fd35a4b777c99..70515919060e6 100644
--- a/docs-website/src/pages/docs/_components/QuickstartCard/quickstartcard.module.scss
+++ b/docs-website/src/pages/docs/_components/QuickstartCard/quickstartcard.module.scss
@@ -2,47 +2,34 @@
   flex-direction: row;
   height: 10rem;
   flex-shrink: 0;
-  padding: 3rem;
   color: var(--ifm-text-color);
   margin: 0rem 2rem 1rem 0rem;
   min-height: calc(100% - 1rem);
   text-decoration: none !important;
 
-
   img {
     width: 3rem;
     height: 3rem;
-    margin: auto 1rem;
+    margin: auto;
   }
   svg {
     width: 1.5rem;
     height: 1.5rem;
     margin-right: 0.75rem;
   }
-  strong,
-  span {
-    display: block;
-    margin-bottom: 0.25rem;
-  }
-  strong {
-    font-weight: 600;
-    padding: auto 0;
-  }
 
-  span {
-    font-size: 0.875rem;
-    line-height: 1.25em;
-  }
   &:hover {
     border-color: var(--ifm-color-primary);
   }
 
-  .quickstart-text {
-    margin: auto 0;
-  }
-  
 }
 
-.quickstart-text {
-  margin: auto 0;
+.card_content {
+    display: flex;
+    margin: 0 auto;
+}
+
+.card_title {
+    padding-left: 1rem;
+    font-weight: 600;
 }
\ No newline at end of file
diff --git a/docs-website/src/pages/docs/_components/QuickstartCards/quickstartcards.module.scss b/docs-website/src/pages/docs/_components/QuickstartCards/quickstartcards.module.scss
index 4fbbc4583d662..833ec97b15ca3 100644
--- a/docs-website/src/pages/docs/_components/QuickstartCards/quickstartcards.module.scss
+++ b/docs-website/src/pages/docs/_components/QuickstartCards/quickstartcards.module.scss
@@ -15,12 +15,9 @@
     height: 1.5rem;
     margin-right: 0.75rem;
   }
-  strong,
-  span {
+  strong {
     display: block;
     margin-bottom: 0.25rem;
-  }
-  strong {
     font-weight: 600;
   }
 
diff --git a/docs-website/src/pages/docs/index.js b/docs-website/src/pages/docs/index.js
index 11f1b3344a3d8..3f123e7b488ba 100644
--- a/docs-website/src/pages/docs/index.js
+++ b/docs-website/src/pages/docs/index.js
@@ -1,288 +1,8 @@
-import React from "react";
-import Layout from "@theme/Layout";
-import useDocusaurusContext from "@docusaurus/useDocusaurusContext";
-import SearchBar from "./_components/SearchBar";
-import QuickLinkCards from "./_components/QuickLinkCards";
-import GuideList from "./_components/GuideList";
+import React from 'react';
+import { Redirect } from '@docusaurus/router';
 
-import {
-  FolderTwoTone,
-  BookTwoTone,
-  TagsTwoTone,
-  ApiTwoTone,
-  SearchOutlined,
-  CompassTwoTone,
-  NodeExpandOutlined,
-  CheckCircleTwoTone,
-  SafetyCertificateTwoTone,
-  LockTwoTone,
-  SlackOutlined,
-  HistoryOutlined,
-  InteractionOutlined,
-  GlobalOutlined,
-  FileTextOutlined,
-} from "@ant-design/icons";
+const Home = () => {
+  return <Redirect to="/docs/next" />;
+};
 
-//quickLinkCards
-import {
-  ThunderboltTwoTone,
-  DeploymentUnitOutlined,
-  SyncOutlined,
-  CodeTwoTone,
-  QuestionCircleTwoTone,
-  SlidersTwoTone,
-  HeartTwoTone,
-} from "@ant-design/icons";
-
-const deploymentGuideContent = [
-  {
-    title: "Managed DataHub",
-    platformIcon: "acryl",
-    to: "docs/managed-datahub/managed-datahub-overview",
-  },
-  {
-    title: "Docker",
-    platformIcon: "docker",
-    to: "docs/docker",
-  },
-  // {
-  //   title: "AWS ECS",
-  //   platformIcon: "amazon-ecs",
-  //   to: "docs/deploy/aws",
-  // },
-  {
-    title: "AWS",
-    platformIcon: "amazon-eks",
-    to: "docs/deploy/aws",
-  },
-  {
-    title: "GCP",
-    platformIcon: "google-cloud",
-    to: "docs/deploy/gcp",
-  },
-];
-
-const ingestionGuideContent = [
-  {
-    title: "Snowflake",
-    platformIcon: "snowflake",
-    to: "docs/generated/ingestion/sources/snowflake",
-  },
-  {
-    title: "Looker",
-    platformIcon: "looker",
-    to: "docs/generated/ingestion/sources/looker",
-  },
-  {
-    title: "Redshift",
-    platformIcon: "redshift",
-    to: "docs/generated/ingestion/sources/redshift",
-  },
-  {
-    title: "Hive",
-    platformIcon: "hive",
-    to: "docs/generated/ingestion/sources/hive",
-  },
-  {
-    title: "BigQuery",
-    platformIcon: "bigquery",
-    to: "docs/generated/ingestion/sources/bigquery",
-  },
-  {
-    title: "dbt",
-    platformIcon: "dbt",
-    to: "docs/generated/ingestion/sources/dbt",
-  },
-  {
-    title: "Athena",
-    platformIcon: "athena",
-    to: "docs/generated/ingestion/sources/athena",
-  },
-  {
-    title: "PostgreSQL",
-    platformIcon: "postgres",
-    to: "docs/generated/ingestion/sources/postgres",
-  },
-];
-
-const featureGuideContent = [
-  { title: "Domains", icon: <FolderTwoTone />, to: "docs/domains" },
-  {
-    title: "Glossary Terms",
-    icon: <BookTwoTone />,
-    to: "docs/glossary/business-glossary",
-  },
-  { title: "Tags", icon: <TagsTwoTone />, to: "docs/tags" },
-  {
-    title: "Ingestion",
-    icon: <ApiTwoTone />,
-    to: "docs/ui-ingestion",
-  },
-  { title: "Search", icon: <SearchOutlined />, to: "docs/how/search" },
-  // { title: "Browse", icon: <CompassTwoTone />, to: "/docs/quickstart" },
-  {
-    title: "Lineage Impact Analysis",
-    icon: <NodeExpandOutlined />,
-    to: "docs/act-on-metadata/impact-analysis",
-  },
-  {
-    title: "Metadata Tests",
-    icon: <CheckCircleTwoTone />,
-    to: "docs/tests/metadata-tests",
-  },
-  {
-    title: "Approval Flows",
-    icon: <SafetyCertificateTwoTone />,
-    to: "docs/managed-datahub/approval-workflows",
-  },
-  {
-    title: "Personal Access Tokens",
-    icon: <LockTwoTone />,
-    to: "docs/authentication/personal-access-tokens",
-  },
-  {
-    title: "Slack Notifications",
-    icon: <SlackOutlined />,
-    to: "docs/managed-datahub/saas-slack-setup",
-  },
-  {
-    title: "Schema History",
-    icon: <HistoryOutlined />,
-    to: "docs/schema-history",
-  },
-];
-
-const quickLinkContent = [
-  {
-    title: "Get Started",
-    icon: <ThunderboltTwoTone />,
-    description: "Details on how to get DataHub up and running",
-    to: "/docs/quickstart",
-  },
-  {
-    title: "Ingest Metadata",
-    icon: <ApiTwoTone />,
-    description: "Details on how to get Metadata loaded into DataHub",
-    to: "/docs/metadata-ingestion",
-  },
-  {
-    title: "API",
-    icon: <DeploymentUnitOutlined />,
-    description: "Details on how to utilize Metadata programmatically",
-    to: "docs/api/datahub-apis",
-  },
-  {
-    title: "Act on Metadata",
-    icon: <SyncOutlined />,
-    description: "Step-by-step guides for acting on Metadata Events",
-    to: "docs/act-on-metadata",
-  },
-  {
-    title: "Developer Guides",
-    icon: <CodeTwoTone />,
-    description: "Interact with DataHub programmatically",
-    to: "/docs/api/datahub-apis",
-  },
-  {
-    title: "Feature Guides",
-    icon: <QuestionCircleTwoTone />,
-    description: "Step-by-step guides for making the most of DataHub",
-    to: "/docs/how/search",
-  },
-  {
-    title: "Deployment Guides",
-    icon: <SlidersTwoTone />,
-    description: "Step-by-step guides for deploying DataHub to production",
-    to: "/docs/deploy/aws",
-  },
-  {
-    title: "Join the Community",
-    icon: <HeartTwoTone />,
-    description: "Collaborate, learn, and grow with us",
-    to: "/docs/slack",
-  },
-];
-
-const gitLinkContent = [
-  {
-    title: "datahub",
-    icon: <ThunderboltTwoTone />,
-    to: "https://github.com/datahub-project/datahub",
-  },
-  {
-    title: "datahub-actions",
-    icon: <ApiTwoTone />,
-    to: "https://github.com/acryldata/datahub-actions",
-  },
-  {
-    title: "datahub-helm",
-    icon: <FileTextOutlined />,
-    to: "https://github.com/acryldata/datahub-helm",
-  },
-  {
-    title: "meta-world",
-    icon: <GlobalOutlined />,
-    to: "https://github.com/acryldata/meta-world",
-  },
-  {
-    title: "business-glossary-sync-action",
-    icon: <InteractionOutlined />,
-    to: "https://github.com/acryldata/business-glossary-sync-action",
-  },
-  {
-    title: "dbt-impact-action",
-    icon: <NodeExpandOutlined />,
-    to: "https://github.com/acryldata/dbt-impact-action",
-  },
-];
-
-function Docs() {
-  const context = useDocusaurusContext();
-  const { siteConfig = {} } = context;
-
-  return (
-    <Layout
-      title={siteConfig.tagline}
-      description="DataHub is a data discovery application built on an extensible metadata platform that helps you tame the complexity of diverse data ecosystems."
-    >
-      <header className={"hero"}>
-        <div className="container">
-          <div className="hero__content">
-            <div>
-              <h1 className="hero__title">Documentation</h1>
-              <p className="hero__subtitle">
-                Guides and tutorials for everything DataHub.
-              </p>
-              <SearchBar />
-            </div>
-          </div>
-          <QuickLinkCards quickLinkContent={quickLinkContent} />
-          <GuideList
-            title="Deployment Guides"
-            content={deploymentGuideContent}
-          />
-          <GuideList
-            title="Ingestion Guides"
-            content={ingestionGuideContent}
-            seeMoreLink={{ label: "See all 50+ sources", to: "/integrations" }}
-          />
-          <GuideList
-            title="Feature Guides"
-            content={featureGuideContent}
-            seeMoreLink={{ label: "See all guides", to: "/docs/how/search" }}
-          />
-          <GuideList
-            title="Github Repositories"
-            content={gitLinkContent}
-            seeMoreLink={{
-              label: "See all repositories",
-              to: "https://github.com/datahub-project/datahub#source-code-and-repositories",
-            }}
-          />
-        </div>
-      </header>
-    </Layout>
-  );
-}
-
-export default Docs;
+export default Home;
\ No newline at end of file
diff --git a/docs/api/datahub-apis.md b/docs/api/datahub-apis.md
index e9942cb19a4c9..252c96cab56c3 100644
--- a/docs/api/datahub-apis.md
+++ b/docs/api/datahub-apis.md
@@ -59,33 +59,61 @@ Get started with our Rest.li API
 DataHub supports several APIs, each with its own unique usage and format.
 Here's an overview of what each API can do.
 
-> Last Updated : Apr 8 2023
-
-| Feature                                                 | GraphQL                                                                      | Python SDK                                                                   | OpenAPI |
-| ------------------------------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | ------- |
-| Create a dataset                                        | 🚫                                                                           | ✅ [[Guide]](/docs/api/tutorials/datasets.md)                                | ✅      |
-| Delete a dataset (Soft delete)                          | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset)                 | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset)                 | ✅      |
-| Delete a dataset (Hard delele)                          | 🚫                                                                           | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset)                 | ✅      |
-| Search a dataset                                        | ✅                                                                           | ✅                                                                           | ✅      |
-| Create a tag                                            | ✅ [[Guide]](/docs/api/tutorials/tags.md)                                    | ✅ [[Guide]](/docs/api/tutorials/tags.md)                                    | ✅      |
-| Read a tag                                              | ✅ [[Guide]](/docs/api/tutorials/tags.md)                                    | ✅ [[Guide]](/docs/api/tutorials/tags.md)                                    | ✅      |
-| Add tags to a dataset                                   | ✅ [[Guide]](/docs/api/tutorials/tags.md)                                    | ✅ [[Guide]](/docs/api/tutorials/tags.md)                                    | ✅      |
-| Add tags to a column of a dataset                       | ✅ [[Guide]](/docs/api/tutorials/tags.md)                                    | ✅ [[Guide]](/docs/api/tutorials/tags.md)                                    | ✅      |
-| Remove tags from a dataset                              | ✅ [[Guide]](/docs/api/tutorials/tags.md)                                    | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags)                           | ✅      |
-| Create glossary terms                                   | ✅ [[Guide]](/docs/api/tutorials/terms.md)                                   | ✅ [[Guide]](/docs/api/tutorials/terms.md)                                   | ✅      |
-| Read terms from a dataset                               | ✅ [[Guide]](/docs/api/tutorials/terms.md)                                   | ✅ [[Guide]](/docs/api/tutorials/terms.md)                                   | ✅      |
-| Add terms to a column of a dataset                      | ✅ [[Guide]](/docs/api/tutorials/terms.md)                                   | ✅ [[Guide]](/docs/api/tutorials/terms.md)                                   | ✅      |
-| Add terms to a dataset                                  | ✅ [[Guide]](/docs/api/tutorials/terms.md)                                   | ✅ [[Guide]](/docs/api/tutorials/terms.md)                                   | ✅      |
-| Create domains                                          | ✅ [[Guide]](/docs/api/tutorials/domains.md)                                 | ✅ [[Guide]](/docs/api/tutorials/domains.md)                                 | ✅      |
-| Read domains                                            | ✅ [[Guide]](/docs/api/tutorials/domains.md)                                 | ✅ [[Guide]](/docs/api/tutorials/domains.md)                                 | ✅      |
-| Add domains to a dataset                                | ✅ [[Guide]](/docs/api/tutorials/domains.md)                                 | ✅ [[Guide]](/docs/api/tutorials/domains.md)                                 | ✅      |
-| Remove domains from a dataset                           | ✅ [[Guide]](/docs/api/tutorials/domains.md)                                 | ✅ [[Guide]](/docs/api/tutorials/domains.md)                                 | ✅      |
-| Crate users and groups                                  | ✅ [[Guide]](/docs/api/tutorials/owners.md)                                  | ✅ [[Guide]](/docs/api/tutorials/owners.md)                                  | ✅      |
-| Read owners of a dataset                                | ✅ [[Guide]](/docs/api/tutorials/owners.md)                                  | ✅ [[Guide]](/docs/api/tutorials/owners.md)                                  | ✅      |
-| Add owner to a dataset                                  | ✅ [[Guide]](/docs/api/tutorials/owners.md)                                  | ✅ [[Guide]](/docs/api/tutorials/owners.md)                                  | ✅      |
-| Remove owner from a dataset                             | ✅ [[Guide]](/docs/api/tutorials/owners.md)                                  | ✅ [[Guide]](/docs/api/tutorials/owners.md)                                  | ✅      |
-| Add lineage                                             | ✅ [[Guide]](/docs/api/tutorials/lineage.md)                                 | ✅ [[Guide]](/docs/api/tutorials/lineage.md)                                 | ✅      |
-| Add column level(Fine Grained) lineage                  | 🚫                                                                           | ✅                                                                           | ✅      |
-| Add documentation(description) to a column of a dataset | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-column)  | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-column)  | ✅      |
-| Add documentation(description) to a dataset             | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-dataset) | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-dataset) | ✅      |
-| Add / Remove / Replace custom properties on a dataset   | 🚫 [[Guide]](/docs/api/tutorials/custom-properties.md)                       | ✅ [[Guide]](/docs/api/tutorials/custom-properties.md)                       | ✅      |
+> Last Updated : Feb 16 2024
+
+| Feature                            | GraphQL                                                                      | Python SDK                                                                   | OpenAPI |
+|------------------------------------|------------------------------------------------------------------------------|------------------------------------------------------------------------------|---------|
+| Create a Dataset                   | 🚫                                                                           | ✅ [[Guide]](/docs/api/tutorials/datasets.md)                                 | ✅       |
+| Delete a Dataset (Soft Delete)     | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset)                  | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset)                  | ✅       |
+| Delete a Dataset (Hard Delete)     | 🚫                                                                           | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset)                  | ✅       |
+| Search a Dataset                   | ✅                                                                            | ✅                                                                            | ✅       |
+| Read a Dataset Deprecation         | ✅                                                                            | ✅                                                                            | ✅       |
+| Read Dataset Entities (V2)         | ✅                                                                            | ✅                                                                            | ✅       |
+| Create a Tag                       | ✅ [[Guide]](/docs/api/tutorials/tags.md#create-tags)                         | ✅ [[Guide]](/docs/api/tutorials/tags.md#create-tags)                         | ✅       |
+| Read a Tag                         | ✅ [[Guide]](/docs/api/tutorials/tags.md#read-tags)                           | ✅ [[Guide]](/docs/api/tutorials/tags.md#read-tags)                           | ✅       |
+| Add Tags to a Dataset              | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-dataset)               | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-dataset)               | ✅       |
+| Add Tags to a Column of a Dataset  | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-column-of-a-dataset)   | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags-to-a-column-of-a-dataset)   | ✅       |
+| Remove Tags from a Dataset         | ✅ [[Guide]](/docs/api/tutorials/tags.md#remove-tags)                         | ✅ [[Guide]](/docs/api/tutorials/tags.md#add-tags#remove-tags)                | ✅       |
+| Create Glossary Terms              | ✅ [[Guide]](/docs/api/tutorials/terms.md#create-terms)                       | ✅ [[Guide]](/docs/api/tutorials/terms.md#create-terms)                       | ✅       |
+| Read Terms from a Dataset          | ✅ [[Guide]](/docs/api/tutorials/terms.md#read-terms)                         | ✅ [[Guide]](/docs/api/tutorials/terms.md#read-terms)                         | ✅       |
+| Add Terms to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-column-of-a-dataset) | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-column-of-a-dataset) | ✅       |
+| Add Terms to a Dataset             | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-dataset)             | ✅ [[Guide]](/docs/api/tutorials/terms.md#add-terms-to-a-dataset)             | ✅       |
+| Create Domains                     | ✅ [[Guide]](/docs/api/tutorials/domains.md#create-domain)                    | ✅ [[Guide]](/docs/api/tutorials/domains.md#create-domain)                    | ✅       |
+| Read Domains                       | ✅ [[Guide]](/docs/api/tutorials/domains.md#read-domains)                     | ✅ [[Guide]](/docs/api/tutorials/domains.md#read-domains)                     | ✅       |
+| Add Domains to a Dataset           | ✅ [[Guide]](/docs/api/tutorials/domains.md#add-domains)                      | ✅ [[Guide]](/docs/api/tutorials/domains.md#add-domains)                      | ✅       |
+| Remove Domains from a Dataset      | ✅ [[Guide]](/docs/api/tutorials/domains.md#remove-domains)                   | ✅ [[Guide]](/docs/api/tutorials/domains.md#remove-domains)                   | ✅       |
+| Create / Upsert Users              | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-users)                      | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-users)                      | ✅       |
+| Create / Upsert Group              | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-group)                      | ✅ [[Guide]](/docs/api/tutorials/owners.md#upsert-group)                      | ✅       |
+| Read Owners of a Dataset           | ✅ [[Guide]](/docs/api/tutorials/owners.md#read-owners)                       | ✅ [[Guide]](/docs/api/tutorials/owners.md#read-owners)                       | ✅       |
+| Add Owner to a Dataset             | ✅ [[Guide]](/docs/api/tutorials/owners.md#add-owners)                        | ✅ [[Guide]](/docs/api/tutorials/owners.md#add-owners#remove-owners)          | ✅       |
+| Remove Owner from a Dataset        | ✅ [[Guide]](/docs/api/tutorials/owners.md#remove-owners)                     | ✅ [[Guide]](/docs/api/tutorials/owners.md)                                   | ✅       |
+| Add Lineage                        | ✅ [[Guide]](/docs/api/tutorials/lineage.md)                     | ✅ [[Guide]](/docs/api/tutorials/lineage.md#add-lineage)                      | ✅ |
+| Add Column Level (Fine Grained) Lineage                  | 🚫                                                            | ✅ [[Guide]](docs/api/tutorials/lineage.md#add-column-level-lineage)                                                           | ✅       |
+| Add Documentation (Description) to a Column of a Dataset | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-column) | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-column) | ✅       |
+| Add Documentation (Description) to a Dataset             | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-dataset) | ✅ [[Guide]](/docs/api/tutorials/descriptions.md#add-description-on-dataset) | ✅       |
+| Add / Remove / Replace Custom Properties on a Dataset    | 🚫                                                            | ✅ [[Guide]](/docs/api/tutorials/custom-properties.md)        | ✅       |
+| Add ML Feature to ML Feature Table                       | 🚫                                                            | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlfeature-to-mlfeaturetable)        | ✅       |
+| Add ML Feature to MLModel                                | 🚫                                                            | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlfeature-to-mlmodel)        | ✅       |
+| Add ML Group to MLFeatureTable                           | 🚫                                                            | ✅ [[Guide]](/docs/api/tutorials/ml.md#add-mlgroup-to-mlfeaturetable)        | ✅       |
+| Create MLFeature                                         | 🚫                                                            | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeature)      | ✅       |
+| Create MLFeatureTable                                    | 🚫                                                            | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeaturetable) | ✅       |
+| Create MLModel                                           | 🚫                                                            | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlmodel)        | ✅       |
+| Create MLModelGroup                                      | 🚫                                                            | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlmodelgroup)   | ✅       |
+| Create MLPrimaryKey                                      | 🚫                                                            | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlprimarykey)   | ✅       |
+| Create MLFeatureTable                                    | 🚫                                                            | ✅ [[Guide]](/docs/api/tutorials/ml.md#create-mlfeaturetable)| ✅       |
+| Read MLFeature                                           | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeature)        | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeature)        | ✅       |
+| Read MLFeatureTable                                      | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeaturetable)   | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlfeaturetable)   | ✅       |
+| Read MLModel                                             | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodel)          | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodel)          | ✅       |
+| Read MLModelGroup                                        | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodelgroup)     | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlmodelgroup)     | ✅       |
+| Read MLPrimaryKey                                        | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlprimarykey)     | ✅ [[Guide]](/docs/api/tutorials/ml.md#read-mlprimarykey)     | ✅       |
+| Create Data Product                                      | 🚫                                                            | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/create_dataproduct.py)        | ✅       |
+| Create Lineage Between Chart and Dashboard               | 🚫                                                            | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_chart_dashboard.py) | ✅       |
+| Create Lineage Between Dataset and Chart                 | 🚫                                                            | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_dataset_chart.py) | ✅       |
+| Create Lineage Between Dataset and DataJob               | 🚫                                                            | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_dataset_job_dataset.py) | ✅       |
+| Create Finegrained Lineage as DataJob for Dataset        | 🚫                                                            | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_datajob_finegrained.py) | ✅       |
+| Create Finegrained Lineage for Dataset                  | 🚫                                                            | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py)        | ✅       |
+| Create Dataset Lineage with Kafka                       | 🚫                                                            | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_kafka.py)        | ✅       |
+| Create Dataset Lineage with MCPW & Rest Emitter         | 🚫                                                            | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py)        | ✅       |
+| Create Dataset Lineage with Rest Emitter                | 🚫                                                            | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_rest.py)        | ✅       |
+| Create DataJob with Dataflow                            | 🚫                                                            | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow.py) [[Simple]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py) [[Verbose]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_verbose.py) | ✅       |
+| Create Programmatic Pipeline                            | 🚫                                                            | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/programatic_pipeline.py) | ✅       |
\ No newline at end of file
diff --git a/docs/features.md b/docs/features.md
index 9ce85d83ee54a..cac822aba06c5 100644
--- a/docs/features.md
+++ b/docs/features.md
@@ -1,3 +1,9 @@
+---
+hide_title: true
+slug: /
+---
+
+
 import QuickstartCards from '@site/src/pages/docs/_components/QuickstartCards';
 import FeatureCardSection from '@site/src/pages/docs/_components/FeatureCardSection';
 
diff --git a/docs/managed-datahub/release-notes/v_0_2_16.md b/docs/managed-datahub/release-notes/v_0_2_16.md
new file mode 100644
index 0000000000000..29d7aa762ec00
--- /dev/null
+++ b/docs/managed-datahub/release-notes/v_0_2_16.md
@@ -0,0 +1,16 @@
+# v0.2.16
+---
+
+Release Availability Date
+---
+18-Mar-2023
+
+Recommended CLI/SDK
+---
+- `v0.13.1` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.13.1
+
+If you are using an older CLI/SDK version then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, github actions, airflow, in python SDK somewhere, Java SKD etc. This is a strong recommendation to upgrade as we keep on pushing fixes in the CLI and it helps us support you better.
+
+## Release Changelog
+---
+- Since `v0.2.15` these changes from OSS DataHub https://github.com/datahub-project/datahub/compare/92850ac55625f3fbee6cdd8699970b43c18a6f58...55bc955304c4c192c04a0393a47355a295f5770a have been pulled in.
diff --git a/metadata-ingestion/docs/dev_guides/classification.md b/metadata-ingestion/docs/dev_guides/classification.md
index 04318d06bca71..be7e1e5013318 100644
--- a/metadata-ingestion/docs/dev_guides/classification.md
+++ b/metadata-ingestion/docs/dev_guides/classification.md
@@ -28,25 +28,25 @@ DataHub Classifier is the default classifier implementation, which uses [acryl-d
 
 ### Config Details
 
-| Field                                                  | Required                                              | Type                                           | Description                                                                                                                                               | Default                                                                                                                                                               |
-| ------------------------------------------------------ | ----------------------------------------------------- | ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| confidence_level_threshold                             |                                                       | number                                         |                                                                                                                                                           | 0.68                                                                                                                                                                  |
-| strip_exclusion_formatting                                             |                                                       | bool                                   | A flag that determines whether the exclusion list uses exact matching or format stripping (case-insensitivity, punctuation removal, and special character removal). | True |
-| info_types                                             |                                                       | list[string]                                   | List of infotypes to be predicted. By default, all supported infotypes are considered, along with any custom infotypes configured in `info_types_config`. | None                                                                                                                                                                  |
-| info_types_config                                      | Configuration details for infotypes                   | Dict[str, InfoTypeConfig]                      |                                                                                                                                                           | See [reference_input.py](https://github.com/acryldata/datahub-classify/blob/main/datahub-classify/src/datahub_classify/reference_input.py) for default configuration. |
-| info_types_config.`key`.prediction_factors_and_weights | ❓ (required if info_types_config.`key` is set)        | Dict[str,number]                               | Factors and their weights to consider when predicting info types                                                                                          |                                                                                                                                                                       |
-| info_types_config.`key`.exclude_name                           |                                                       | list[string]        | Optional list of names to exclude from classification.                                                                                                                                                           | None                                                                                                                                                                       |
-| info_types_config.`key`.name                           |                                                       | NameFactorConfig (see below for fields)        |                                                                                                                                                           |                                                                                                                                                                       |
-| info_types_config.`key`.name.regex                     |                                                       | Array of string                                | List of regex patterns the column name follows for the info type                                                                                          | ['.*']                                                                                                                                                                |
-| info_types_config.`key`.description                    |                                                       | DescriptionFactorConfig (see below for fields) |                                                                                                                                                           |                                                                                                                                                                       |
-| info_types_config.`key`.description.regex              |                                                       | Array of string                                | List of regex patterns the column description follows for the info type                                                                                   | ['.*']                                                                                                                                                                |
-| info_types_config.`key`.datatype                       |                                                       | DataTypeFactorConfig (see below for fields)    |                                                                                                                                                           |                                                                                                                                                                       |
-| info_types_config.`key`.datatype.type                  |                                                       | Array of string                                | List of data types for the info type                                                                                                                      | ['.*']                                                                                                                                                                |
-| info_types_config.`key`.values                         |                                                       | ValuesFactorConfig (see below for fields)      |                                                                                                                                                           |                                                                                                                                                                       |
-| info_types_config.`key`.values.prediction_type         | ❓ (required if info_types_config.`key`.values is set) | string                                         |                                                                                                                                                           | None                                                                                                                                                                  |
-| info_types_config.`key`.values.regex                   |                                                       | Array of string                                | List of regex patterns the column value follows for the info type                                                                                         | None                                                                                                                                                                  |
-| info_types_config.`key`.values.library                 |                                                       | Array of string                                | Library used for prediction                                                                                                                               | None                                                                                                                                                                  |
-| minimum_values_threshold                               |                                                       | number                                         | Minimum number of non-null column values required to process `values` prediction factor.                                                                  | 50                                                                                                                                                                    |
+| Field                                                  | Required                                              | Type                                           | Description                                                                                                                                                         | Default                                                                                                                                                               |
+| ------------------------------------------------------ | ----------------------------------------------------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| confidence_level_threshold                             |                                                       | number                                         |                                                                                                                                                                     | 0.68                                                                                                                                                                  |
+| strip_exclusion_formatting                             |                                                       | bool                                           | A flag that determines whether the exclusion list uses exact matching or format stripping (case-insensitivity, punctuation removal, and special character removal). | True                                                                                                                                                                  |
+| info_types                                             |                                                       | list[string]                                   | List of infotypes to be predicted. By default, all supported infotypes are considered, along with any custom infotypes configured in `info_types_config`.           | None                                                                                                                                                                  |
+| info_types_config                                      | Configuration details for infotypes                   | Dict[str, InfoTypeConfig]                      |                                                                                                                                                                     | See [reference_input.py](https://github.com/acryldata/datahub-classify/blob/main/datahub-classify/src/datahub_classify/reference_input.py) for default configuration. |
+| info_types_config.`key`.prediction_factors_and_weights | ❓ (required if info_types_config.`key` is set)        | Dict[str,number]                               | Factors and their weights to consider when predicting info types                                                                                                    |                                                                                                                                                                       |
+| info_types_config.`key`.exclude_name                   |                                                       | list[string]                                   | Optional list of names to exclude from classification.                                                                                                              | None                                                                                                                                                                  |
+| info_types_config.`key`.name                           |                                                       | NameFactorConfig (see below for fields)        |                                                                                                                                                                     |                                                                                                                                                                       |
+| info_types_config.`key`.name.regex                     |                                                       | Array of string                                | List of regex patterns the column name follows for the info type                                                                                                    | ['.*']                                                                                                                                                                |
+| info_types_config.`key`.description                    |                                                       | DescriptionFactorConfig (see below for fields) |                                                                                                                                                                     |                                                                                                                                                                       |
+| info_types_config.`key`.description.regex              |                                                       | Array of string                                | List of regex patterns the column description follows for the info type                                                                                             | ['.*']                                                                                                                                                                |
+| info_types_config.`key`.datatype                       |                                                       | DataTypeFactorConfig (see below for fields)    |                                                                                                                                                                     |                                                                                                                                                                       |
+| info_types_config.`key`.datatype.type                  |                                                       | Array of string                                | List of data types for the info type                                                                                                                                | ['.*']                                                                                                                                                                |
+| info_types_config.`key`.values                         |                                                       | ValuesFactorConfig (see below for fields)      |                                                                                                                                                                     |                                                                                                                                                                       |
+| info_types_config.`key`.values.prediction_type         | ❓ (required if info_types_config.`key`.values is set) | string                                         |                                                                                                                                                                     | None                                                                                                                                                                  |
+| info_types_config.`key`.values.regex                   |                                                       | Array of string                                | List of regex patterns the column value follows for the info type                                                                                                   | None                                                                                                                                                                  |
+| info_types_config.`key`.values.library                 |                                                       | Array of string                                | Library used for prediction                                                                                                                                         | None                                                                                                                                                                  |
+| minimum_values_threshold                               |                                                       | number                                         | Minimum number of non-null column values required to process `values` prediction factor.                                                                            | 50                                                                                                                                                                    |
 |                                                        |
 ### Supported infotypes
 - `Email_Address`
@@ -63,12 +63,20 @@ DataHub Classifier is the default classifier implementation, which uses [acryl-d
 - `IP_Address_v6`
 - `US_Driving_License_Number`
 - `Swift_Code`
+- Regex based Custom InfoTypes
 
-### Supported sources
+## Supported sources
 
-* snowflake
+- All SQL sources
 
-#### Example
+## Future Work
+
+- Classification for nested columns (struct, array type)
+
+
+## Examples 
+
+### Basic
 
 ```yml
 source:
@@ -94,7 +102,7 @@ source:
         - type: datahub          
 ```
 
-#### Example with Advanced Configuration: Customizing configuration for supported info types
+### Advanced Configuration: Customizing configuration for supported info types
 
 ```yml
 source:
@@ -399,7 +407,7 @@ source:
 ```
 
 
-#### Example with Advanced Configuration: Specifying custom info type
+### Advanced Configuration: Specifying Custom InfoType
 
 ```yml
 source:
@@ -438,3 +446,10 @@ source:
                   regex:
                     - "(af|ap|ca|eu|me|sa|us)-(central|north|(north(?:east|west))|south|south(?:east|west)|east|west)-\\d+"
                   library: []
+```
+
+## Additional Resources
+
+### DataHub Blog
+
+* [PII Classification just got easier with DataHub](https://blog.datahubproject.io/pii-classification-just-got-easier-with-datahub-6bab2b63abcb)
\ No newline at end of file
diff --git a/metadata-ingestion/examples/recipes/file_to_datahub-jobs-golden.dhub.yaml b/metadata-ingestion/examples/recipes/file_to_datahub-jobs-golden.dhub.yaml
new file mode 100644
index 0000000000000..bdad337b607de
--- /dev/null
+++ b/metadata-ingestion/examples/recipes/file_to_datahub-jobs-golden.dhub.yaml
@@ -0,0 +1,11 @@
+---
+# see https://datahubproject.io/docs/generated/ingestion/sources/file for complete documentation
+source:
+  type: "file"
+  config:
+    filename: ./examples/test_examples/via_node_test_example_fivetran.json
+# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
+sink:
+  type: "datahub-rest"
+  config:
+    server: "http://localhost:8080"
diff --git a/metadata-ingestion/examples/test_examples/via_node_test_example_fivetran.json b/metadata-ingestion/examples/test_examples/via_node_test_example_fivetran.json
new file mode 100644
index 0000000000000..886ad2e2005ca
--- /dev/null
+++ b/metadata-ingestion/examples/test_examples/via_node_test_example_fivetran.json
@@ -0,0 +1,731 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {},
+            "name": "postgres"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:fivetran"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "paused": "False",
+                "sync_frequency": "1440",
+                "destination_id": "'interval_unconstitutional'"
+            },
+            "name": "postgres",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+    {
+        "entityType": "dataset",
+        "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+        "changeType": "UPSERT",
+        "aspectName": "schemaMetadata",
+        "aspect": {
+            "json": {
+                "schemaName": "project-id-1.bigquery-dataset-1.table-1",
+                "platform": "urn:li:dataPlatform:bigquery",
+                "version": 0,
+                "created": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                },
+                "lastModified": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                },
+                "hash": "",
+                "platformSchema": {
+                    "com.linkedin.schema.MySqlDDL": {
+                        "tableSchema": ""
+                    }
+                },
+                "fields": [{
+                    "fieldPath": "id",
+                    "nullable": true,
+                    "description": "mock comment for column",
+                    "type": {
+                        "type": {
+                            "com.linkedin.pegasus2avro.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "NUMBER(asdecimal=False)",
+                    "recursive": false,
+                    "isPartOfKey": true
+                }]
+            }
+        },
+        "systemMetadata": {
+            "lastObserved": 1643871600000,
+            "runId": "bigquery-2022_02_03-07_00_00"
+        }
+    },
+    {
+        "entityType": "dataset",
+        "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)",
+        "changeType": "UPSERT",
+        "aspectName": "schemaMetadata",
+        "aspect": {
+            "json": {
+                "schemaName": "project-id-1.bigquery-dataset-1.table-1",
+                "platform": "urn:li:dataPlatform:bigquery",
+                "version": 0,
+                "created": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                },
+                "lastModified": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                },
+                "hash": "",
+                "platformSchema": {
+                    "com.linkedin.schema.MySqlDDL": {
+                        "tableSchema": ""
+                    }
+                },
+                "fields": [{
+                    "fieldPath": "id",
+                    "nullable": true,
+                    "description": "mock comment for column",
+                    "type": {
+                        "type": {
+                            "com.linkedin.pegasus2avro.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "NUMBER(asdecimal=False)",
+                    "recursive": false,
+                    "isPartOfKey": true
+                }]
+            }
+        },
+        "systemMetadata": {
+            "lastObserved": 1643871600000,
+            "runId": "bigquery-2022_02_03-07_00_00"
+        }
+    },
+    {
+        "entityType": "dataset",
+        "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)",
+        "changeType": "UPSERT",
+        "aspectName": "upstreamLineage",
+        "aspect": {
+            "json": {
+                "upstreams": [
+                    {
+                        "auditStamp": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "dataset": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)",
+                        "type": "VIEW"
+                    }
+                ],
+                "fineGrainedLineages": [
+                    {
+                        "upstreamType": "FIELD_SET",
+                        "upstreams": [
+                            "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV),id)"
+                        ],
+                        "downstreamType": "FIELD",
+                        "downstreams": [
+                            "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD),field_bar)"
+                        ],
+                        "confidenceScore": 1.0
+                    }
+                ]
+            }
+        },
+        "systemMetadata": {
+            "lastObserved": 1643871600000,
+            "runId": "oracle-2022_02_03-07_00_00",
+            "lastRunId": "no-run-id-provided"
+        }
+    },
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD),field_foo_2)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD),field_bar)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:Shubham Jagtap",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:fivetran"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {},
+            "name": "4c9a03d6-eded-4422-a46a-163266e58243",
+            "type": "BATCH_SCHEDULED",
+            "created": {
+                "time": 1695191853000,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+            "upstreamInstances": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.employee,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.company,PROD)"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1695191853000,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1695191885000,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "fivetran"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {},
+            "name": "f773d1e9-c791-48f4-894f-8cf9b3dfc834",
+            "type": "BATCH_SCHEDULED",
+            "created": {
+                "time": 1696343730000,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+            "upstreamInstances": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.employee,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.company,PROD)"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696343730000,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696343732000,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SKIPPED",
+                "nativeResultType": "fivetran"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {},
+            "name": "63c2fc85-600b-455f-9ba0-f576522465be",
+            "type": "BATCH_SCHEDULED",
+            "created": {
+                "time": 1696343755000,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+            "upstreamInstances": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.employee,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.company,PROD)"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696343755000,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696343790000,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "FAILURE",
+                "nativeResultType": "fivetran"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1654621200000,
+        "runId": "powerbi-test",
+        "lastRunId": "no-run-id-provided"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 5873b7ac25c09..5570893b7d1df 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -99,7 +99,7 @@
 sqlglot_lib = {
     # Using an Acryl fork of sqlglot.
     # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1
-    "acryl-sqlglot==22.3.1.dev3",
+    "acryl-sqlglot==22.4.1.dev4",
 }
 
 classification_lib = {
@@ -302,7 +302,8 @@
     | {
         *sqlglot_lib,
         "google-cloud-datacatalog-lineage==0.2.2",
-    },
+    }
+    | classification_lib,
     "clickhouse": sql_common | clickhouse_common,
     "clickhouse-usage": sql_common | usage_common | clickhouse_common,
     "datahub-lineage-file": set(),
@@ -370,6 +371,8 @@
     | redshift_common
     | usage_common
     | sqlglot_lib
+    | classification_lib
+    | {"db-dtypes"}  # Pandas extension data types
     | {"cachetools"},
     "s3": {*s3_base, *data_lake_profiling},
     "gcs": {*s3_base, *data_lake_profiling},
diff --git a/metadata-ingestion/src/datahub/cli/check_cli.py b/metadata-ingestion/src/datahub/cli/check_cli.py
index 419ae5668292d..6c5db13608414 100644
--- a/metadata-ingestion/src/datahub/cli/check_cli.py
+++ b/metadata-ingestion/src/datahub/cli/check_cli.py
@@ -1,4 +1,7 @@
+import dataclasses
+import json
 import logging
+import pathlib
 import pprint
 import shutil
 import tempfile
@@ -17,6 +20,7 @@
 from datahub.ingestion.source.source_registry import source_registry
 from datahub.ingestion.transformer.transform_registry import transform_registry
 from datahub.telemetry import telemetry
+from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedList
 
 logger = logging.getLogger(__name__)
 
@@ -339,3 +343,28 @@ def test_path_spec(config: str, input: str, path_spec_key: str) -> None:
             f"Failed to validate pattern {pattern_dicts} in path {path_spec_key}"
         )
         raise e
+
+
+@check.command()
+@click.argument("query-log-file", type=click.Path(exists=True, dir_okay=False))
+@click.option("--output", type=click.Path())
+def extract_sql_agg_log(query_log_file: str, output: Optional[str]) -> None:
+    """Convert a sqlite db generated by the SqlParsingAggregator into a JSON."""
+
+    from datahub.sql_parsing.sql_parsing_aggregator import LoggedQuery
+
+    assert dataclasses.is_dataclass(LoggedQuery)
+
+    shared_connection = ConnectionWrapper(pathlib.Path(query_log_file))
+    query_log = FileBackedList[LoggedQuery](
+        shared_connection=shared_connection, tablename="stored_queries"
+    )
+    logger.info(f"Extracting {len(query_log)} queries from {query_log_file}")
+    queries = [dataclasses.asdict(query) for query in query_log]
+
+    if output:
+        with open(output, "w") as f:
+            json.dump(queries, f, indent=2, default=str)
+        logger.info(f"Extracted {len(queries)} queries to {output}")
+    else:
+        click.echo(json.dumps(queries, indent=2))
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py
index 906a431666e17..d299f1009d51a 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source.py
@@ -57,6 +57,7 @@ class SourceCapability(Enum):
     TAGS = "Extract Tags"
     SCHEMA_METADATA = "Schema Metadata"
     CONTAINERS = "Asset Containers"
+    CLASSIFICATION = "Classification"
 
 
 @dataclass
diff --git a/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py b/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py
index c6c95e76d196f..c0de827b21131 100644
--- a/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py
+++ b/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py
@@ -1,16 +1,20 @@
 import concurrent.futures
 import logging
 from dataclasses import dataclass, field
+from functools import partial
 from math import ceil
-from typing import Dict, Iterable, List, Optional
+from typing import Callable, Dict, Iterable, List, Optional, Union
 
 from datahub_classify.helper_classes import ColumnInfo, Metadata
 from pydantic import Field
 
 from datahub.configuration.common import ConfigModel, ConfigurationError
 from datahub.emitter.mce_builder import get_sys_time, make_term_urn, make_user_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.glossary.classifier import ClassificationConfig, Classifier
 from datahub.ingestion.glossary.classifier_registry import classifier_registry
+from datahub.ingestion.source.sql.data_reader import DataReader
 from datahub.metadata.com.linkedin.pegasus2avro.common import (
     AuditStamp,
     GlossaryTermAssociation,
@@ -25,9 +29,12 @@
 
 @dataclass
 class ClassificationReportMixin:
+
+    num_tables_fetch_sample_values_failed: int = 0
+
     num_tables_classification_attempted: int = 0
     num_tables_classification_failed: int = 0
-    num_tables_classified: int = 0
+    num_tables_classification_found: int = 0
 
     info_types_detected: LossyDict[str, LossyList[str]] = field(
         default_factory=LossyDict
@@ -99,8 +106,22 @@ def classify_schema_fields(
         self,
         dataset_name: str,
         schema_metadata: SchemaMetadata,
-        sample_data: Dict[str, list],
+        sample_data: Union[Dict[str, list], Callable[[], Dict[str, list]]],
     ) -> None:
+
+        if not isinstance(sample_data, Dict):
+            try:
+                # TODO: In future, sample_data fetcher can be lazily called if classification
+                # requires values as prediction factor
+                sample_data = sample_data()
+            except Exception as e:
+                self.report.num_tables_fetch_sample_values_failed += 1
+                logger.warning(
+                    f"Failed to get sample values for dataset. Make sure you have granted SELECT permissions on dataset. {dataset_name}",
+                )
+                sample_data = dict()
+                logger.debug("Error", exc_info=e)
+
         column_infos = self.get_columns_to_classify(
             dataset_name, schema_metadata, sample_data
         )
@@ -137,7 +158,7 @@ def classify_schema_fields(
                 )
 
         if field_terms:
-            self.report.num_tables_classified += 1
+            self.report.num_tables_classification_found += 1
             self.populate_terms_in_schema_metadata(schema_metadata, field_terms)
 
     def update_field_terms(
@@ -234,8 +255,11 @@ def get_columns_to_classify(
                 )
                 continue
 
-            # TODO: Let's auto-skip passing sample_data for complex(array/struct) columns
-            # for initial rollout
+            # As a result of custom field path specification e.g. [version=2.0].[type=struct].[type=struct].service'
+            # Sample values for a nested field (an array , union or struct) are not read / passed in classifier correctly.
+            # TODO: Fix this behavior for nested fields. This would probably involve:
+            # 1. Preprocessing field path spec v2 back to native field representation. (without [*] constructs)
+            # 2. Preprocessing retrieved structured sample data to pass in sample values correctly for nested fields.
 
             column_infos.append(
                 ColumnInfo(
@@ -256,3 +280,47 @@ def get_columns_to_classify(
             )
 
         return column_infos
+
+
+def classification_workunit_processor(
+    table_wu_generator: Iterable[MetadataWorkUnit],
+    classification_handler: ClassificationHandler,
+    data_reader: Optional[DataReader],
+    table_id: List[str],
+    data_reader_kwargs: dict = {},
+) -> Iterable[MetadataWorkUnit]:
+    table_name = ".".join(table_id)
+    if not classification_handler.is_classification_enabled_for_table(table_name):
+        yield from table_wu_generator
+    for wu in table_wu_generator:
+        maybe_schema_metadata = wu.get_aspect_of_type(SchemaMetadata)
+        if maybe_schema_metadata:
+            try:
+                classification_handler.classify_schema_fields(
+                    table_name,
+                    maybe_schema_metadata,
+                    (
+                        partial(
+                            data_reader.get_sample_data_for_table,
+                            table_id,
+                            classification_handler.config.classification.sample_size
+                            * 1.2,
+                            **data_reader_kwargs,
+                        )
+                        if data_reader
+                        else dict()
+                    ),
+                )
+                yield MetadataChangeProposalWrapper(
+                    aspect=maybe_schema_metadata, entityUrn=wu.get_urn()
+                ).as_workunit(
+                    is_primary_source=wu.is_primary_source,
+                )
+            except Exception as e:
+                logger.debug(
+                    f"Failed to classify table columns for {table_name} due to error -> {e}",
+                    exc_info=e,
+                )
+                yield wu
+        else:
+            yield wu
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index bcc0aa50ed22e..8452399bddf5d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -35,11 +35,16 @@
     TestConnectionReport,
 )
 from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.glossary.classification_mixin import (
+    ClassificationHandler,
+    classification_workunit_processor,
+)
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import (
     BigqueryTableIdentifier,
     BigQueryTableRef,
 )
 from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
+from datahub.ingestion.source.bigquery_v2.bigquery_data_reader import BigQueryDataReader
 from datahub.ingestion.source.bigquery_v2.bigquery_helper import (
     unquote_and_decode_unicode_escape_seq,
 )
@@ -167,6 +172,11 @@ def cleanup(config: BigQueryV2Config) -> None:
     "Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
     supported=True,
 )
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
     # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
     BIGQUERY_FIELD_TYPE_MAPPINGS: Dict[
@@ -214,6 +224,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
         super(BigqueryV2Source, self).__init__(config, ctx)
         self.config: BigQueryV2Config = config
         self.report: BigQueryV2Report = BigQueryV2Report()
+        self.classification_handler = ClassificationHandler(self.config, self.report)
         self.platform: str = "bigquery"
 
         BigqueryTableIdentifier._BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX = (
@@ -227,6 +238,12 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
         )
         self.sql_parser_schema_resolver = self._init_schema_resolver()
 
+        self.data_reader: Optional[BigQueryDataReader] = None
+        if self.classification_handler.is_classification_enabled():
+            self.data_reader = BigQueryDataReader.create(
+                self.config.get_bigquery_client()
+            )
+
         redundant_lineage_run_skip_handler: Optional[
             RedundantLineageRunSkipHandler
         ] = None
@@ -713,6 +730,7 @@ def _process_schema(
         )
 
         columns = None
+
         if (
             self.config.include_tables
             or self.config.include_views
@@ -732,12 +750,27 @@ def _process_schema(
 
             for table in db_tables[dataset_name]:
                 table_columns = columns.get(table.name, []) if columns else []
-                yield from self._process_table(
+                table_wu_generator = self._process_table(
                     table=table,
                     columns=table_columns,
                     project_id=project_id,
                     dataset_name=dataset_name,
                 )
+                yield from classification_workunit_processor(
+                    table_wu_generator,
+                    self.classification_handler,
+                    self.data_reader,
+                    [project_id, dataset_name, table.name],
+                    data_reader_kwargs=dict(
+                        sample_size_percent=(
+                            self.config.classification.sample_size
+                            * 1.2
+                            / table.rows_count
+                            if table.rows_count
+                            else None
+                        )
+                    ),
+                )
         elif self.store_table_refs:
             # Need table_refs to calculate lineage and usage
             for table_item in self.bigquery_data_dictionary.list_tables(
@@ -1071,14 +1104,16 @@ def gen_dataset_workunits(
         )
 
         yield self.gen_schema_metadata(
-            dataset_urn, table, columns, str(datahub_dataset_name)
+            dataset_urn, table, columns, datahub_dataset_name
         )
 
         dataset_properties = DatasetProperties(
             name=datahub_dataset_name.get_table_display_name(),
-            description=unquote_and_decode_unicode_escape_seq(table.comment)
-            if table.comment
-            else "",
+            description=(
+                unquote_and_decode_unicode_escape_seq(table.comment)
+                if table.comment
+                else ""
+            ),
             qualifiedName=str(datahub_dataset_name),
             created=(
                 TimeStamp(time=int(table.created.timestamp() * 1000))
@@ -1238,10 +1273,10 @@ def gen_schema_metadata(
         dataset_urn: str,
         table: Union[BigqueryTable, BigqueryView, BigqueryTableSnapshot],
         columns: List[BigqueryColumn],
-        dataset_name: str,
+        dataset_name: BigqueryTableIdentifier,
     ) -> MetadataWorkUnit:
         schema_metadata = SchemaMetadata(
-            schemaName=dataset_name,
+            schemaName=str(dataset_name),
             platform=make_data_platform_urn(self.platform),
             version=0,
             hash="",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index 2f4978d49e687..28f0be2c38033 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -10,6 +10,9 @@
 
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
 from datahub.configuration.validate_field_removal import pydantic_removed_field
+from datahub.ingestion.glossary.classification_mixin import (
+    ClassificationSourceConfigMixin,
+)
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
 from datahub.ingestion.source.state.stateful_ingestion_base import (
     StatefulLineageConfigMixin,
@@ -64,9 +67,9 @@ def __init__(self, **data: Any):
             )
             os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self._credentials_path
 
-    def get_bigquery_client(config) -> bigquery.Client:
-        client_options = config.extra_client_options
-        return bigquery.Client(config.project_on_behalf, **client_options)
+    def get_bigquery_client(self) -> bigquery.Client:
+        client_options = self.extra_client_options
+        return bigquery.Client(self.project_on_behalf, **client_options)
 
     def make_gcp_logging_client(
         self, project_id: Optional[str] = None
@@ -96,6 +99,7 @@ class BigQueryV2Config(
     StatefulUsageConfigMixin,
     StatefulLineageConfigMixin,
     StatefulProfilingConfigMixin,
+    ClassificationSourceConfigMixin,
 ):
     project_id_pattern: AllowDenyPattern = Field(
         default=AllowDenyPattern.allow_all(),
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py
new file mode 100644
index 0000000000000..37dfd14ce125e
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py
@@ -0,0 +1,72 @@
+import logging
+from collections import defaultdict
+from typing import Dict, List, Optional
+
+from google.cloud import bigquery
+
+from datahub.ingestion.source.sql.data_reader import DataReader
+from datahub.utilities.perf_timer import PerfTimer
+
+logger = logging.Logger(__name__)
+
+
+class BigQueryDataReader(DataReader):
+    @staticmethod
+    def create(
+        client: bigquery.Client,
+    ) -> "BigQueryDataReader":
+        return BigQueryDataReader(client)
+
+    def __init__(
+        self,
+        client: bigquery.Client,
+    ) -> None:
+        self.client = client
+
+    def get_sample_data_for_table(
+        self,
+        table_id: List[str],
+        sample_size: int,
+        *,
+        sample_size_percent: Optional[float] = None,
+        filter: Optional[str] = None,
+    ) -> Dict[str, list]:
+        """
+        table_id should be in the form [project, dataset, schema]
+        """
+
+        assert len(table_id) == 3
+        project = table_id[0]
+        dataset = table_id[1]
+        table_name = table_id[2]
+
+        column_values: Dict[str, list] = defaultdict(list)
+        if sample_size_percent is None:
+            return column_values
+        # Ideally we always know the actual row count.
+        # The alternative to perform limit query scans entire BQ table
+        # and is never a recommended option due to cost factor, unless
+        # additional filter clause (e.g. where condition on partition) is available.
+
+        logger.debug(
+            f"Collecting sample values for table {project}.{dataset}.{table_name}"
+        )
+        with PerfTimer() as timer:
+            sample_pc = sample_size_percent * 100
+            # TODO: handle for sharded+compulsory partitioned tables
+            sql = (
+                f"SELECT * FROM `{project}.{dataset}.{table_name}` "
+                + f"TABLESAMPLE SYSTEM ({sample_pc:.8f} percent)"
+            )
+            # Ref: https://cloud.google.com/bigquery/docs/samples/bigquery-query-results-dataframe
+            df = self.client.query_and_wait(sql).to_dataframe()
+            time_taken = timer.elapsed_seconds()
+            logger.debug(
+                f"Finished collecting sample values for table {project}.{dataset}.{table_name};"
+                f"{df.shape[0]} rows; took {time_taken:.3f} seconds"
+            )
+
+        return df.to_dict(orient="list")
+
+    def close(self) -> None:
+        self.client.close()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
index ad7b86219e7c1..54eca61dfe1c9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
@@ -7,6 +7,7 @@
 import pydantic
 
 from datahub.ingestion.api.report import Report
+from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin
 from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport
 from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
 from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
@@ -42,7 +43,12 @@ class BigQueryProcessingPerfReport(Report):
 
 
 @dataclass
-class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowReport):
+class BigQueryV2Report(
+    ProfilingSqlReport,
+    IngestionStageReport,
+    BaseTimeWindowReport,
+    ClassificationReportMixin,
+):
     num_total_lineage_entries: TopKDict[str, int] = field(default_factory=TopKDict)
     num_skipped_lineage_entries_missing_data: TopKDict[str, int] = field(
         default_factory=int_top_k_dict
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py
index 4083eb6db77c1..dbaf28fabc9d4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py
@@ -91,7 +91,7 @@ def generate_partition_profiler_query(
                     )
                 else:
                     logger.warning(
-                        f"Partitioned table {table.name} without partiton column"
+                        f"Partitioned table {table.name} without partition column"
                     )
                     self.report.profiling_skipped_invalid_partition_ids[
                         f"{project}.{schema}.{table.name}"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
index 6a642e6566132..27b44bafc4b4e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
@@ -9,6 +9,9 @@
 from datahub.configuration.common import AllowDenyPattern
 from datahub.configuration.source_common import DatasetLineageProviderConfigBase
 from datahub.configuration.validate_field_removal import pydantic_removed_field
+from datahub.ingestion.glossary.classification_mixin import (
+    ClassificationSourceConfigMixin,
+)
 from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
 from datahub.ingestion.source.sql.sql_config import BasicSQLAlchemyConfig
 from datahub.ingestion.source.state.stateful_ingestion_base import (
@@ -70,6 +73,7 @@ class RedshiftConfig(
     RedshiftUsageConfig,
     StatefulLineageConfigMixin,
     StatefulProfilingConfigMixin,
+    ClassificationSourceConfigMixin,
 ):
     database: str = Field(default="dev", description="database")
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py
index 1a38f11a52449..1c7d275c34867 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py
@@ -455,49 +455,70 @@ def list_insert_create_queries_sql(
         db_name: str, start_time: datetime, end_time: datetime
     ) -> str:
         return """
-                select
-                    distinct cluster,
-                    target_schema,
-                    target_table,
-                    username,
-                    query as query_id,
-                    LISTAGG(CASE WHEN LEN(RTRIM(querytxt)) = 0 THEN querytxt ELSE RTRIM(querytxt) END) WITHIN GROUP (ORDER BY sequence) as ddl,
-                    ANY_VALUE(pid) as session_id,
-                    starttime as timestamp
-                from
-                        (
+            with query_txt as
+                (
                     select
-                        distinct tbl as target_table_id,
-                        sti.schema as target_schema,
-                        sti.table as target_table,
-                        sti.database as cluster,
-                        usename as username,
-                        text as querytxt,
-                        sq.query,
-                        sequence,
-                        si.starttime as starttime,
-                        pid
+                        query,
+                        pid,
+                        LISTAGG(case
+                            when LEN(RTRIM(text)) = 0 then text
+                            else RTRIM(text)
+                        end) within group (
+                    order by
+                        sequence) as ddl
                     from
-                        stl_insert as si
-                    join SVV_TABLE_INFO sti on
-                        sti.table_id = tbl
-                    left join svl_user_info sui on
-                        si.userid = sui.usesysid
-                    left join STL_QUERYTEXT sq on
-                        si.query = sq.query
-                    left join stl_load_commits slc on
-                        slc.query = si.query
-                    where
+                        (
+                        select
+                            query,
+                            pid,
+                            text,
+                            sequence
+                        from
+                            STL_QUERYTEXT
+                        where
+                            sequence < 320
+                        order by
+                            sequence
+                    )
+                    group by
+                        query,
+                        pid
+                )
+                        select
+                    distinct tbl as target_table_id,
+                    sti.schema as target_schema,
+                    sti.table as target_table,
+                    sti.database as cluster,
+                    usename as username,
+                    ddl,
+                    sq.query as query_id,
+                    min(si.starttime) as starttime,
+                    ANY_VALUE(pid) as session_id
+                from
+                    stl_insert as si
+                left join SVV_TABLE_INFO sti on
+                    sti.table_id = tbl
+                left join svl_user_info sui on
+                    si.userid = sui.usesysid
+                left join query_txt sq on
+                    si.query = sq.query
+                left join stl_load_commits slc on
+                    slc.query = si.query
+                where
                         sui.usename <> 'rdsdb'
-                        and slc.query IS NULL
                         and cluster = '{db_name}'
+                        and slc.query IS NULL
                         and si.starttime >= '{start_time}'
                         and si.starttime < '{end_time}'
-                        and sequence < 320
-                    ) as target_tables
-                    group by cluster, query_id, target_schema, target_table, username, starttime
-                    order by cluster, query_id, target_schema, target_table, starttime asc
-                """.format(
+                group by
+                    target_table_id,
+                    target_schema,
+                    target_table,
+                    cluster,
+                    username,
+                    ddl,
+                    sq.query
+        """.format(
             # We need the original database name for filtering
             db_name=db_name,
             start_time=start_time.strftime(redshift_datetime_format),
@@ -551,7 +572,7 @@ def temp_table_ddl_query(start_time: datetime, end_time: datetime) -> str:
                     REGEXP_REPLACE(REGEXP_SUBSTR(REGEXP_REPLACE(query_text,'\\\\n','\\n'), '(CREATE(?:[\\n\\s\\t]+(?:temp|temporary))?(?:[\\n\\s\\t]+)table(?:[\\n\\s\\t]+)[^\\n\\s\\t()-]+)', 0, 1, 'ipe'),'[\\n\\s\\t]+',' ',1,'p') as create_command,
                     query_text,
                     row_number() over (
-                        partition by TRIM(query_text)
+                        partition by session_id, TRIM(query_text)
                         order by start_time desc
                     ) rn
                 from
@@ -615,7 +636,7 @@ def temp_table_ddl_query(start_time: datetime, end_time: datetime) -> str:
 
             )
             where
-                rn = 1;
+                rn = 1
             """
 
     # Add this join to the sql query for more metrics on completed queries
@@ -936,6 +957,8 @@ def list_copy_commands_sql(
     # also similar happens if for example table name contains special characters quoted with " i.e. "test-table1"
     # it is also worth noting that "query_type" field from SYS_QUERY_HISTORY could be probably used to improve many
     # of complicated queries in this file
+    # However, note that we can't really use this query fully everywhere, despite it being simpler, because
+    # the SYS_QUERY_TEXT.text field is truncated to 4000 characters and strips out linebreaks.
     @staticmethod
     def temp_table_ddl_query(start_time: datetime, end_time: datetime) -> str:
         start_time_str: str = start_time.strftime(redshift_datetime_format)
@@ -955,7 +978,7 @@ def temp_table_ddl_query(start_time: datetime, end_time: datetime) -> str:
                                     query_text,
                                     REGEXP_REPLACE(REGEXP_SUBSTR(REGEXP_REPLACE(query_text,'\\\\n','\\n'), '(CREATE(?:[\\n\\s\\t]+(?:temp|temporary))?(?:[\\n\\s\\t]+)table(?:[\\n\\s\\t]+)[^\\n\\s\\t()-]+)', 0, 1, 'ipe'),'[\\n\\s\\t]+',' ',1,'p') AS create_command,
                                     ROW_NUMBER() OVER (
-                                    PARTITION BY query_text
+                                    PARTITION BY session_id, query_text
                                     ORDER BY start_time DESC
                                     ) rn
                             FROM
@@ -990,6 +1013,7 @@ def temp_table_ddl_query(start_time: datetime, end_time: datetime) -> str:
                     )
                     WHERE
                             rn = 1
+                    ORDER BY start_time ASC
                     ;
             """
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
index ef290518acd08..b893c0833954d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
@@ -35,6 +35,10 @@
 )
 from datahub.ingestion.api.source_helpers import create_dataset_props_patch_builder
 from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.glossary.classification_mixin import (
+    ClassificationHandler,
+    classification_workunit_processor,
+)
 from datahub.ingestion.source.common.subtypes import (
     DatasetContainerSubTypes,
     DatasetSubTypes,
@@ -43,6 +47,7 @@
 from datahub.ingestion.source.redshift.lineage import RedshiftLineageExtractor
 from datahub.ingestion.source.redshift.lineage_v2 import RedshiftSqlLineageV2
 from datahub.ingestion.source.redshift.profile import RedshiftProfiler
+from datahub.ingestion.source.redshift.redshift_data_reader import RedshiftDataReader
 from datahub.ingestion.source.redshift.redshift_schema import (
     RedshiftColumn,
     RedshiftDataDictionary,
@@ -52,6 +57,7 @@
 )
 from datahub.ingestion.source.redshift.report import RedshiftReport
 from datahub.ingestion.source.redshift.usage import RedshiftUsageExtractor
+from datahub.ingestion.source.sql.data_reader import DataReader
 from datahub.ingestion.source.sql.sql_common import SqlWorkUnit
 from datahub.ingestion.source.sql.sql_types import resolve_postgres_modified_type
 from datahub.ingestion.source.sql.sql_utils import (
@@ -127,6 +133,11 @@
     "Enabled by default, can be disabled via configuration `include_usage_statistics`",
 )
 @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
     """
     This plugin extracts the following:
@@ -313,6 +324,7 @@ def __init__(self, config: RedshiftConfig, ctx: PipelineContext):
         self.catalog_metadata: Dict = {}
         self.config: RedshiftConfig = config
         self.report: RedshiftReport = RedshiftReport()
+        self.classification_handler = ClassificationHandler(self.config, self.report)
         self.platform = "redshift"
         self.domain_registry = None
         if self.config.domain:
@@ -490,6 +502,15 @@ def process_schemas(self, connection, database):
             self.db_schemas[database][schema.name] = schema
             yield from self.process_schema(connection, database, schema)
 
+    def make_data_reader(
+        self,
+        connection: redshift_connector.Connection,
+    ) -> Optional[DataReader]:
+        if self.classification_handler.is_classification_enabled():
+            return RedshiftDataReader.create(connection)
+
+        return None
+
     def process_schema(
         self,
         connection: redshift_connector.Connection,
@@ -529,6 +550,7 @@ def process_schema(
             )
 
             if self.config.include_tables:
+                data_reader = self.make_data_reader(connection)
                 logger.info(f"Process tables in schema {database}.{schema.name}")
                 if (
                     self.db_tables[schema.database]
@@ -536,7 +558,15 @@ def process_schema(
                 ):
                     for table in self.db_tables[schema.database][schema.name]:
                         table.columns = schema_columns[schema.name].get(table.name, [])
-                        yield from self._process_table(table, database=database)
+                        table_wu_generator = self._process_table(
+                            table, database=database
+                        )
+                        yield from classification_workunit_processor(
+                            table_wu_generator,
+                            self.classification_handler,
+                            data_reader,
+                            [schema.database, schema.name, table.name],
+                        )
                         self.report.table_processed[report_key] = (
                             self.report.table_processed.get(
                                 f"{database}.{schema.name}", 0
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_data_reader.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_data_reader.py
new file mode 100644
index 0000000000000..5b92cf5c45688
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_data_reader.py
@@ -0,0 +1,48 @@
+import logging
+from typing import Any, Dict, List
+
+import redshift_connector
+
+from datahub.ingestion.source.sql.data_reader import DataReader
+from datahub.utilities.perf_timer import PerfTimer
+
+logger = logging.Logger(__name__)
+
+
+class RedshiftDataReader(DataReader):
+    @staticmethod
+    def create(conn: redshift_connector.Connection) -> "RedshiftDataReader":
+        return RedshiftDataReader(conn)
+
+    def __init__(self, conn: redshift_connector.Connection) -> None:
+        # The lifecycle of this connection is managed externally
+        self.conn = conn
+
+    def get_sample_data_for_table(
+        self, table_id: List[str], sample_size: int, **kwargs: Any
+    ) -> Dict[str, list]:
+        """
+        For redshift, table_id should be in form (db_name, schema_name, table_name)
+        """
+        assert len(table_id) == 3
+        db_name = table_id[0]
+        schema_name = table_id[1]
+        table_name = table_id[2]
+
+        logger.debug(
+            f"Collecting sample values for table {db_name}.{schema_name}.{table_name}"
+        )
+        with PerfTimer() as timer, self.conn.cursor() as cursor:
+            sql = f"select * from {db_name}.{schema_name}.{table_name} limit {sample_size};"
+            cursor.execute(sql)
+            df = cursor.fetch_dataframe()
+            # Fetch the result set from the cursor and deliver it as the Pandas DataFrame.
+            time_taken = timer.elapsed_seconds()
+            logger.debug(
+                f"Finished collecting sample values for table {db_name}.{schema_name}.{table_name};"
+                f"{df.shape[0]} rows; took {time_taken:.3f} seconds"
+            )
+            return df.to_dict(orient="list")
+
+    def close(self) -> None:
+        pass
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py
index 586771c4fb7b5..f3a8dfb8f3e85 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py
@@ -107,6 +107,12 @@ class AlterTableRow:
     start_time: datetime
 
 
+def _stringy(x: Optional[int]) -> Optional[str]:
+    if x is None:
+        return None
+    return str(x)
+
+
 # this is a class to be a proxy to query Redshift
 class RedshiftDataDictionary:
     def __init__(self, is_serverless):
@@ -419,9 +425,8 @@ def get_lineage_rows(
                         else None
                     ),
                     session_id=(
-                        str(row[field_names.index("session_id")])
+                        _stringy(row[field_names.index("session_id")])
                         if "session_id" in field_names
-                        and row[field_names.index("session_id")]
                         else None
                     ),
                 )
@@ -441,9 +446,13 @@ def get_temporary_rows(
         rows = cursor.fetchmany()
         while rows:
             for row in rows:
+                # Skipping roews with no session_id
+                session_id = _stringy(row[field_names.index("session_id")])
+                if session_id is None:
+                    continue
                 yield TempTableRow(
                     transaction_id=row[field_names.index("transaction_id")],
-                    session_id=row[field_names.index("session_id")],
+                    session_id=session_id,
                     # See https://docs.aws.amazon.com/redshift/latest/dg/r_STL_QUERYTEXT.html
                     # for why we need to replace the \n with a newline.
                     query_text=row[field_names.index("query_text")].replace(
@@ -468,9 +477,12 @@ def get_alter_table_commands(
         rows = cursor.fetchmany()
         while rows:
             for row in rows:
+                session_id = _stringy(row[field_names.index("session_id")])
+                if session_id is None:
+                    continue
                 yield AlterTableRow(
                     transaction_id=row[field_names.index("transaction_id")],
-                    session_id=row[field_names.index("session_id")],
+                    session_id=session_id,
                     query_text=row[field_names.index("query_text")],
                     start_time=row[field_names.index("start_time")],
                 )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py
index 6c2a12498f2c0..e2a035091d0ad 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py
@@ -2,6 +2,7 @@
 from datetime import datetime
 from typing import Dict, Optional
 
+from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin
 from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport
 from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
 from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
@@ -11,7 +12,12 @@
 
 
 @dataclass
-class RedshiftReport(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowReport):
+class RedshiftReport(
+    ProfilingSqlReport,
+    IngestionStageReport,
+    BaseTimeWindowReport,
+    ClassificationReportMixin,
+):
     num_usage_workunits_emitted: Optional[int] = None
     num_operational_stats_workunits_emitted: Optional[int] = None
     upstream_lineage: LossyDict = field(default_factory=LossyDict)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_data_reader.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_data_reader.py
new file mode 100644
index 0000000000000..afb8cca707160
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_data_reader.py
@@ -0,0 +1,57 @@
+import logging
+from typing import Any, Callable, Dict, List
+
+import pandas as pd
+from snowflake.connector import SnowflakeConnection
+
+from datahub.ingestion.source.sql.data_reader import DataReader
+from datahub.utilities.perf_timer import PerfTimer
+
+logger = logging.Logger(__name__)
+
+
+class SnowflakeDataReader(DataReader):
+    @staticmethod
+    def create(
+        conn: SnowflakeConnection, col_name_preprocessor: Callable[[str], str]
+    ) -> "SnowflakeDataReader":
+        return SnowflakeDataReader(conn, col_name_preprocessor)
+
+    def __init__(
+        self, conn: SnowflakeConnection, col_name_preprocessor: Callable[[str], str]
+    ) -> None:
+        # The lifecycle of this connection is managed externally
+        self.conn = conn
+        self.col_name_preprocessor = col_name_preprocessor
+
+    def get_sample_data_for_table(
+        self, table_id: List[str], sample_size: int, **kwargs: Any
+    ) -> Dict[str, list]:
+        """
+        For snowflake, table_id should be in form (db_name, schema_name, table_name)
+        """
+
+        assert len(table_id) == 3
+        db_name = table_id[0]
+        schema_name = table_id[1]
+        table_name = table_id[2]
+
+        logger.debug(
+            f"Collecting sample values for table {db_name}.{schema_name}.{table_name}"
+        )
+        with PerfTimer() as timer, self.conn.cursor() as cursor:
+            sql = f'select * from "{db_name}"."{schema_name}"."{table_name}" sample ({sample_size} rows);'
+            cursor.execute(sql)
+            dat = cursor.fetchall()
+            # Fetch the result set from the cursor and deliver it as the Pandas DataFrame.
+            df = pd.DataFrame(dat, columns=[col.name for col in cursor.description])
+            df.columns = [self.col_name_preprocessor(col) for col in df.columns]
+            time_taken = timer.elapsed_seconds()
+            logger.debug(
+                f"Finished collecting sample values for table {db_name}.{schema_name}.{table_name};"
+                f"{df.shape[0]} rows; took {time_taken:.3f} seconds"
+            )
+            return df.to_dict(orient="list")
+
+    def close(self) -> None:
+        pass
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
index 9526bdec4b05d..292c57494632c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
@@ -5,7 +5,6 @@
 from functools import lru_cache
 from typing import Dict, List, Optional
 
-import pandas as pd
 from snowflake.connector import SnowflakeConnection
 
 from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
@@ -84,7 +83,6 @@ class SnowflakeTable(BaseTable):
     foreign_keys: List[SnowflakeFK] = field(default_factory=list)
     tags: Optional[List[SnowflakeTag]] = None
     column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
-    sample_data: Optional[pd.DataFrame] = None
 
 
 @dataclass
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index 591bdffed5819..318cec8482996 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -7,7 +7,6 @@
 from functools import partial
 from typing import Callable, Dict, Iterable, List, Optional, Union
 
-import pandas as pd
 from snowflake.connector import SnowflakeConnection
 
 from datahub.configuration.pattern_utils import is_schema_allowed
@@ -37,7 +36,10 @@
     TestConnectionReport,
 )
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.glossary.classification_mixin import ClassificationHandler
+from datahub.ingestion.glossary.classification_mixin import (
+    ClassificationHandler,
+    classification_workunit_processor,
+)
 from datahub.ingestion.source.common.subtypes import (
     DatasetContainerSubTypes,
     DatasetSubTypes,
@@ -52,6 +54,7 @@
     SnowflakeV2Config,
     TagOption,
 )
+from datahub.ingestion.source.snowflake.snowflake_data_reader import SnowflakeDataReader
 from datahub.ingestion.source.snowflake.snowflake_lineage_v2 import (
     SnowflakeLineageExtractor,
 )
@@ -134,7 +137,6 @@
 )
 from datahub.metadata.com.linkedin.pegasus2avro.tag import TagProperties
 from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator
-from datahub.utilities.perf_timer import PerfTimer
 from datahub.utilities.registries.domain_registry import DomainRegistry
 
 logger: logging.Logger = logging.getLogger(__name__)
@@ -212,6 +214,11 @@
     "Optionally enabled via `extract_tags`",
     supported=True,
 )
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class SnowflakeV2Source(
     SnowflakeQueryMixin,
     SnowflakeConnectionMixin,
@@ -305,10 +312,7 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config):
                 config, self.report, self.profiling_state_handler
             )
 
-        if self.config.classification.enabled:
-            self.classification_handler = ClassificationHandler(
-                self.config, self.report
-            )
+        self.classification_handler = ClassificationHandler(self.config, self.report)
 
         # Caches tables for a single database. Consider moving to disk or S3 when possible.
         self.db_tables: Dict[str, List[SnowflakeTable]] = {}
@@ -422,6 +426,9 @@ def query(query):
                     _report[SourceCapability.DATA_PROFILING] = CapabilityReport(
                         capable=True
                     )
+                    _report[SourceCapability.CLASSIFICATION] = CapabilityReport(
+                        capable=True
+                    )
 
                     if privilege.object_name.startswith("SNOWFLAKE.ACCOUNT_USAGE."):
                         # if access to "snowflake" shared database, access to all account_usage views is automatically granted
@@ -459,6 +466,7 @@ def query(query):
             SourceCapability.SCHEMA_METADATA: "Either no tables exist or current role does not have permissions to access them",
             SourceCapability.DESCRIPTIONS: "Either no tables exist or current role does not have permissions to access them",
             SourceCapability.DATA_PROFILING: "Either no tables exist or current role does not have permissions to access them",
+            SourceCapability.CLASSIFICATION: "Either no tables exist or current role does not have permissions to access them",
             SourceCapability.CONTAINERS: "Current role does not have permissions to use any database",
             SourceCapability.LINEAGE_COARSE: "Current role does not have permissions to snowflake account usage views",
             SourceCapability.LINEAGE_FINE: "Current role does not have permissions to snowflake account usage views",
@@ -472,6 +480,7 @@ def query(query):
                 SourceCapability.SCHEMA_METADATA,
                 SourceCapability.DESCRIPTIONS,
                 SourceCapability.DATA_PROFILING,
+                SourceCapability.CLASSIFICATION,
                 SourceCapability.LINEAGE_COARSE,
                 SourceCapability.LINEAGE_FINE,
                 SourceCapability.USAGE_STATS,
@@ -775,8 +784,17 @@ def _process_schema(
             self.db_tables[schema_name] = tables
 
             if self.config.include_technical_schema:
+                data_reader = self.make_data_reader()
                 for table in tables:
-                    yield from self._process_table(table, schema_name, db_name)
+                    table_wu_generator = self._process_table(
+                        table, schema_name, db_name
+                    )
+                    yield from classification_workunit_processor(
+                        table_wu_generator,
+                        self.classification_handler,
+                        data_reader,
+                        [db_name, schema_name, table.name],
+                    )
 
         if self.config.include_views:
             views = self.fetch_views_for_schema(snowflake_schema, db_name, schema_name)
@@ -876,6 +894,14 @@ def fetch_tables_for_schema(
                 )
                 return []
 
+    def make_data_reader(self) -> Optional[SnowflakeDataReader]:
+        if self.classification_handler.is_classification_enabled() and self.connection:
+            return SnowflakeDataReader.create(
+                self.connection, self.snowflake_identifier
+            )
+
+        return None
+
     def _process_table(
         self,
         table: SnowflakeTable,
@@ -890,12 +916,6 @@ def _process_table(
 
         self.fetch_foreign_keys_for_table(table, schema_name, db_name, table_identifier)
 
-        dataset_name = self.get_dataset_identifier(table.name, schema_name, db_name)
-
-        self.fetch_sample_data_for_classification(
-            table, schema_name, db_name, dataset_name
-        )
-
         if self.config.extract_tags != TagOption.skip:
             table.tags = self.tag_extractor.get_tags_on_object(
                 table_name=table.name,
@@ -914,36 +934,6 @@ def _process_table(
 
             yield from self.gen_dataset_workunits(table, schema_name, db_name)
 
-    def fetch_sample_data_for_classification(
-        self, table: SnowflakeTable, schema_name: str, db_name: str, dataset_name: str
-    ) -> None:
-        if (
-            table.columns
-            and self.config.classification.enabled
-            and self.classification_handler.is_classification_enabled_for_table(
-                dataset_name
-            )
-        ):
-            try:
-                table.sample_data = self.get_sample_values_for_table(
-                    table.name, schema_name, db_name
-                )
-            except Exception as e:
-                logger.debug(
-                    f"Failed to get sample values for dataset {dataset_name} due to error {e}",
-                    exc_info=e,
-                )
-                if isinstance(e, SnowflakePermissionError):
-                    self.report_warning(
-                        "Failed to get sample values for dataset. Please grant SELECT permissions on dataset.",
-                        dataset_name,
-                    )
-                else:
-                    self.report_warning(
-                        "Failed to get sample values for dataset",
-                        dataset_name,
-                    )
-
     def fetch_foreign_keys_for_table(
         self,
         table: SnowflakeTable,
@@ -1073,9 +1063,7 @@ def gen_dataset_workunits(
         ).as_workunit()
 
         schema_metadata = self.gen_schema_metadata(table, schema_name, db_name)
-        # TODO: classification is only run for snowflake tables.
-        # Should we run classification for snowflake views as well?
-        self.classify_snowflake_table(table, dataset_name, schema_metadata)
+
         yield MetadataChangeProposalWrapper(
             entityUrn=dataset_urn, aspect=schema_metadata
         ).as_workunit()
@@ -1296,47 +1284,6 @@ def build_foreign_keys(
             )
         return foreign_keys
 
-    def classify_snowflake_table(
-        self,
-        table: Union[SnowflakeTable, SnowflakeView],
-        dataset_name: str,
-        schema_metadata: SchemaMetadata,
-    ) -> None:
-        if (
-            isinstance(table, SnowflakeTable)
-            and self.config.classification.enabled
-            and self.classification_handler.is_classification_enabled_for_table(
-                dataset_name
-            )
-        ):
-            if table.sample_data is not None:
-                table.sample_data.columns = [
-                    self.snowflake_identifier(col) for col in table.sample_data.columns
-                ]
-
-            try:
-                self.classification_handler.classify_schema_fields(
-                    dataset_name,
-                    schema_metadata,
-                    (
-                        table.sample_data.to_dict(orient="list")
-                        if table.sample_data is not None
-                        else {}
-                    ),
-                )
-            except Exception as e:
-                logger.debug(
-                    f"Failed to classify table columns for {dataset_name} due to error -> {e}",
-                    exc_info=e,
-                )
-                self.report_warning(
-                    "Failed to classify table columns",
-                    dataset_name,
-                )
-            finally:
-                # Cleaning up sample_data fetched for classification
-                table.sample_data = None
-
     def get_report(self) -> SourceReport:
         return self.report
 
@@ -1551,37 +1498,6 @@ def inspect_session_metadata(self) -> None:
         except Exception:
             self.report.edition = None
 
-    # Ideally we do not want null values in sample data for a column.
-    # However that would require separate query per column and
-    # that would be expensive, hence not done. To compensale for possibility
-    # of some null values in collected sample, we fetch extra (20% more)
-    # rows than configured sample_size.
-    def get_sample_values_for_table(
-        self, table_name: str, schema_name: str, db_name: str
-    ) -> pd.DataFrame:
-        # Create a cursor object.
-        logger.debug(
-            f"Collecting sample values for table {db_name}.{schema_name}.{table_name}"
-        )
-
-        actual_sample_size = self.config.classification.sample_size * 1.2
-        with PerfTimer() as timer:
-            cur = self.get_connection().cursor()
-            # Execute a statement that will generate a result set.
-            sql = f'select * from "{db_name}"."{schema_name}"."{table_name}" sample ({actual_sample_size} rows);'
-
-            cur.execute(sql)
-            # Fetch the result set from the cursor and deliver it as the Pandas DataFrame.
-
-            dat = cur.fetchall()
-            df = pd.DataFrame(dat, columns=[col.name for col in cur.description])
-            time_taken = timer.elapsed_seconds()
-            logger.debug(
-                f"Finished collecting sample values for table {db_name}.{schema_name}.{table_name};{df.shape[0]} rows; took {time_taken:.3f} seconds"
-            )
-
-        return df
-
     # domain is either "view" or "table"
     def get_external_url_for_table(
         self, table_name: str, schema_name: str, db_name: str, domain: str
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
index c3759875b2769..eed5b1cb6c9eb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
@@ -291,6 +291,11 @@ def get_sql_alchemy_url(self):
 )
 @capability(SourceCapability.LINEAGE_COARSE, "Supported for S3 tables")
 @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class AthenaSource(SQLAlchemySource):
     """
     This plugin supports extracting the following metadata from Athena
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
index 84c1d3844a7b4..7d32b5a20df11 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
@@ -380,6 +380,11 @@ def get_columns(self, connection, table_name, schema=None, **kw):
 @support_status(SupportStatus.CERTIFIED)
 @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
 @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class ClickHouseSource(TwoTierSQLAlchemySource):
     """
     This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/data_reader.py b/metadata-ingestion/src/datahub/ingestion/source/sql/data_reader.py
index 73730a9ea0ef7..75d3236a0a5ad 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/data_reader.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/data_reader.py
@@ -1,29 +1,62 @@
 import logging
 from abc import abstractmethod
 from collections import defaultdict
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Optional, Union
 
 import sqlalchemy as sa
 from sqlalchemy.engine import Connection, Engine
 from sqlalchemy.engine.reflection import Inspector
-from sqlalchemy.engine.row import LegacyRow
 
 from datahub.ingestion.api.closeable import Closeable
+from datahub.utilities.perf_timer import PerfTimer
 
 logger: logging.Logger = logging.getLogger(__name__)
 
 
 class DataReader(Closeable):
-    @abstractmethod
     def get_sample_data_for_column(
-        self, table_id: List[str], column_name: str, sample_size: int = 100
+        self, table_id: List[str], column_name: str, sample_size: int
     ) -> list:
-        pass
+        raise NotImplementedError()
 
     @abstractmethod
     def get_sample_data_for_table(
-        self, table_id: List[str], sample_size: int = 100
+        self,
+        table_id: List[str],
+        sample_size: int,
+        *,
+        sample_size_percent: Optional[float] = None,
+        filter: Optional[str] = None,
     ) -> Dict[str, list]:
+        """
+        Fetches table values , approx sample_size rows
+
+        Args:
+            table_id (List[str]): Table name identifier. One of
+                        - [<db_name>, <schema_name>, <table_name>] or
+                        - [<schema_name>, <table_name>] or
+                        - [<table_name>]
+            sample_size (int): sample size
+
+        Keyword Args:
+            sample_size_percent(float, between 0 and 1): For bigquery-like data platforms that provide only
+                    percentage based sampling methods. If present, actual sample_size
+                    may be ignored.
+
+            filter (string): For bigquery-like data platforms that need mandatory filter on partition
+                    column for some cases
+
+
+        Returns:
+            Dict[str, list]: dictionary of (column name -> list of column values)
+        """
+
+        # Ideally we do not want null values in sample data for a column.
+        # However that would require separate query per column and
+        # that would be expensive, hence not done. To compensate for possibility
+        # of some null values in collected sample, its usually recommended to
+        # fetch extra (20% more) rows than configured sample_size.
+
         pass
 
 
@@ -36,8 +69,7 @@ def __init__(
         self,
         conn: Union[Engine, Connection],
     ) -> None:
-        # TODO: How can this use a connection pool instead ?
-        self.engine = conn.engine.connect()
+        self.connection = conn.engine.connect()
 
     def _table(self, table_id: List[str]) -> sa.Table:
         return sa.Table(
@@ -46,91 +78,42 @@ def _table(self, table_id: List[str]) -> sa.Table:
             schema=table_id[-2] if len(table_id) > 1 else None,
         )
 
-    def get_sample_data_for_column(
-        self, table_id: List[str], column_name: str, sample_size: int = 100
-    ) -> list:
-        """
-        Fetches non-null column values, upto <sample_size> count
-        Args:
-            table_id: Table name identifier. One of
-                        - [<db_name>, <schema_name>, <table_name>] or
-                        - [<schema_name>, <table_name>] or
-                        - [<table_name>]
-            column: Column name
-        Returns:
-            list of column values
-        """
-
-        table = self._table(table_id)
-        query: Any
-        ignore_null_condition = sa.column(column_name).is_(None)
-        # limit doesn't compile properly for oracle so we will append rownum to query string later
-        if self.engine.dialect.name.lower() == "oracle":
-            raw_query = (
-                sa.select([sa.column(column_name)])
-                .select_from(table)
-                .where(sa.not_(ignore_null_condition))
-            )
-
-            query = str(
-                raw_query.compile(self.engine, compile_kwargs={"literal_binds": True})
-            )
-            query += "\nAND ROWNUM <= %d" % sample_size
-        else:
-            query = (
-                sa.select([sa.column(column_name)])
-                .select_from(table)
-                .where(sa.not_(ignore_null_condition))
-                .limit(sample_size)
-            )
-        query_results = self.engine.execute(query)
-
-        return [x[column_name] for x in query_results.fetchall()]
-
     def get_sample_data_for_table(
-        self, table_id: List[str], sample_size: int = 100
+        self, table_id: List[str], sample_size: int, **kwargs: Any
     ) -> Dict[str, list]:
-        """
-        Fetches table values, upto <sample_size>*1.2 count
-        Args:
-            table_id: Table name identifier. One of
-                        - [<db_name>, <schema_name>, <table_name>] or
-                        - [<schema_name>, <table_name>] or
-                        - [<table_name>]
-        Returns:
-            dictionary of (column name -> list of column values)
-        """
-        column_values: Dict[str, list] = defaultdict(list)
-        table = self._table(table_id)
 
-        # Ideally we do not want null values in sample data for a column.
-        # However that would require separate query per column and
-        # that would be expensiv. To compensate for possibility
-        # of some null values in collected sample, we fetch extra (20% more)
-        # rows than configured sample_size.
-        sample_size = int(sample_size * 1.2)
+        logger.debug(f"Collecting sample values for table {'.'.join(table_id)}")
 
-        query: Any
+        with PerfTimer() as timer:
+            column_values: Dict[str, list] = defaultdict(list)
+            table = self._table(table_id)
 
-        # limit doesn't compile properly for oracle so we will append rownum to query string later
-        if self.engine.dialect.name.lower() == "oracle":
-            raw_query = sa.select([sa.text("*")]).select_from(table)
+            query: Any
 
-            query = str(
-                raw_query.compile(self.engine, compile_kwargs={"literal_binds": True})
-            )
-            query += "\nAND ROWNUM <= %d" % sample_size
-        else:
-            query = sa.select([sa.text("*")]).select_from(table).limit(sample_size)
-        query_results = self.engine.execute(query)
-
-        # Not ideal - creates a parallel structure in column_values. Can we use pandas here ?
-        for row in query_results.fetchall():
-            if isinstance(row, LegacyRow):
-                for col, col_value in row.items():
-                    column_values[col].append(col_value)
+            # limit doesn't compile properly for oracle so we will append rownum to query string later
+            if self.connection.dialect.name.lower() == "oracle":
+                raw_query = sa.select([sa.text("*")]).select_from(table)
 
+                query = str(
+                    raw_query.compile(
+                        self.connection, compile_kwargs={"literal_binds": True}
+                    )
+                )
+                query += "\nAND ROWNUM <= %d" % sample_size
+            else:
+                query = sa.select([sa.text("*")]).select_from(table).limit(sample_size)
+            query_results = self.connection.execute(query)
+
+            # Not ideal - creates a parallel structure in column_values. Can we use pandas here ?
+            for row in query_results.fetchall():
+                for col, col_value in row._mapping.items():
+                    column_values[col].append(col_value)
+            time_taken = timer.elapsed_seconds()
+            logger.debug(
+                f"Finished collecting sample values for table {'.'.join(table_id)};"
+                f"took {time_taken:.3f} seconds"
+            )
         return column_values
 
     def close(self) -> None:
-        self.engine.close()
+        self.connection.close()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/druid.py b/metadata-ingestion/src/datahub/ingestion/source/sql/druid.py
index 3f20e0a0f18b6..fdec869baa583 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/druid.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/druid.py
@@ -61,6 +61,11 @@ def get_identifier(self, schema: str, table: str) -> str:
 @config_class(DruidConfig)
 @support_status(SupportStatus.INCUBATING)
 @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class DruidSource(SQLAlchemySource):
     """
     This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hana.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hana.py
index 5c9c8f063a1a9..40875809120de 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/hana.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hana.py
@@ -28,6 +28,11 @@ class HanaConfig(BasicSQLAlchemyConfig):
 @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
 @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
 @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class HanaSource(SQLAlchemySource):
     def __init__(self, config: HanaConfig, ctx: PipelineContext):
         super().__init__(config, ctx, "hana")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py
index 003732236ba80..2975bfe820d1b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py
@@ -134,6 +134,11 @@ def clean_host_port(cls, v):
 @support_status(SupportStatus.CERTIFIED)
 @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
 @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class HiveSource(TwoTierSQLAlchemySource):
     """
     This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py
index 9b482beba924f..f3e2cccb9e8d0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py
@@ -66,6 +66,11 @@ def get_identifier(self, *, schema: str, table: str) -> str:
 @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
 @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
 @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class MySQLSource(TwoTierSQLAlchemySource):
     """
     This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py
index bcf0f26008ae3..cf7bdc982ee80 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py
@@ -560,6 +560,11 @@ def __getattr__(self, item: str) -> Any:
 @config_class(OracleConfig)
 @support_status(SupportStatus.INCUBATING)
 @capability(SourceCapability.DOMAINS, "Enabled by default")
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class OracleSource(SQLAlchemySource):
     """
     This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
index 5d1e37fbb68a3..20976c91f7878 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
@@ -132,6 +132,11 @@ class PostgresConfig(BasePostgresConfig):
 @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
 @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
 @capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration")
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class PostgresSource(SQLAlchemySource):
     """
     This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py
index 9657fdab9e2e3..98e2f2ecfbd5a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py
@@ -160,6 +160,11 @@ def get_sql_alchemy_url(
 @support_status(SupportStatus.CERTIFIED)
 @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
 @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class PrestoOnHiveSource(SQLAlchemySource):
     """
     This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index 9ec30d57b8f76..91736b24727c8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -820,13 +820,15 @@ def _classify(
                     dataset_name
                 )
                 and data_reader
+                and schema_metadata.fields
             ):
                 self.classification_handler.classify_schema_fields(
                     dataset_name,
                     schema_metadata,
-                    data_reader.get_sample_data_for_table(
-                        table_id=[schema, table],
-                        sample_size=self.config.classification.sample_size,
+                    partial(
+                        data_reader.get_sample_data_for_table,
+                        [schema, table],
+                        int(self.config.classification.sample_size * 1.2),
                     ),
                 )
         except Exception as e:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic.py
index 345f5bd57b44c..78b0dcf9b7be8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic.py
@@ -64,7 +64,7 @@ def get_sql_alchemy_url(self):
 
 @platform_name("SQLAlchemy", id="sqlalchemy")
 @config_class(SQLAlchemyGenericConfig)
-@support_status(SupportStatus.CERTIFIED)
+@support_status(SupportStatus.INCUBATING)
 @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
 @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
 class SQLAlchemyGenericSource(SQLAlchemySource):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
index 53b1ddfcde595..3d0bacba74a69 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
@@ -447,6 +447,11 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
 @capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration")
 @capability(SourceCapability.LINEAGE_FINE, "Optionally enabled via configuration")
 @capability(SourceCapability.USAGE_STATS, "Optionally enabled via configuration")
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class TeradataSource(TwoTierSQLAlchemySource):
     """
     This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py
index 7668cb01f84bc..1828c5101d4f3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py
@@ -226,6 +226,11 @@ def get_identifier(self: BasicSQLAlchemyConfig, schema: str, table: str) -> str:
 @support_status(SupportStatus.CERTIFIED)
 @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
 @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class TrinoSource(SQLAlchemySource):
     """
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py
index 32f1ba5b8d563..9800660a9ad54 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py
@@ -120,6 +120,11 @@ def clean_host_port(cls, v):
     "Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
     supported=True,
 )
+@capability(
+    SourceCapability.CLASSIFICATION,
+    "Optionally enabled via `classification.enabled`",
+    supported=True,
+)
 class VerticaSource(SQLAlchemySource):
     def __init__(self, config: VerticaConfig, ctx: PipelineContext):
         # self.platform = platform
diff --git a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
index ec52e839212c5..5e2e510533af1 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py
@@ -36,6 +36,9 @@ class SchemaResolverInterface(Protocol):
     def platform(self) -> str:
         ...
 
+    def includes_temp_tables(self) -> bool:
+        ...
+
     def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]:
         ...
 
@@ -74,6 +77,9 @@ def __init__(
     def platform(self) -> str:
         return self._platform
 
+    def includes_temp_tables(self) -> bool:
+        return False
+
     def get_urns(self) -> Set[str]:
         return set(k for k, v in self._schema_cache.items() if v is not None)
 
@@ -246,6 +252,9 @@ def __init__(
     def platform(self) -> str:
         return self._base_resolver.platform
 
+    def includes_temp_tables(self) -> bool:
+        return True
+
     def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]:
         urn = self._base_resolver.get_urn_for_table(
             table, lower=self._base_resolver._prefers_urn_lower()
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
index 8edb131c23297..495f4abfce7d4 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
@@ -1,8 +1,10 @@
+import contextlib
 import dataclasses
 import enum
 import itertools
 import json
 import logging
+import os
 import pathlib
 import tempfile
 import uuid
@@ -15,6 +17,7 @@
 from datahub.emitter.mce_builder import get_sys_time, make_ts_millis
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.sql_parsing_builder import compute_upstream_fields
+from datahub.ingestion.api.closeable import Closeable
 from datahub.ingestion.api.report import Report
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.graph.client import DataHubGraph
@@ -53,9 +56,6 @@
 QueryId = str
 UrnStr = str
 
-_DEFAULT_USER_URN = CorpUserUrn("_ingestion")
-_MISSING_SESSION_ID = "__MISSING_SESSION_ID"
-
 
 class QueryLogSetting(enum.Enum):
     DISABLED = "DISABLED"
@@ -63,6 +63,23 @@ class QueryLogSetting(enum.Enum):
     STORE_FAILED = "STORE_FAILED"
 
 
+_DEFAULT_USER_URN = CorpUserUrn("_ingestion")
+_MISSING_SESSION_ID = "__MISSING_SESSION_ID"
+_DEFAULT_QUERY_LOG_SETTING = QueryLogSetting[
+    os.getenv("DATAHUB_SQL_AGG_QUERY_LOG") or QueryLogSetting.DISABLED.name
+]
+
+
+@dataclasses.dataclass
+class LoggedQuery:
+    query: str
+    session_id: Optional[str]
+    timestamp: Optional[datetime]
+    user: Optional[UrnStr]
+    default_db: Optional[str]
+    default_schema: Optional[str]
+
+
 @dataclasses.dataclass
 class ViewDefinition:
     view_definition: str
@@ -87,6 +104,8 @@ class QueryMetadata:
     column_lineage: List[ColumnLineageInfo]
     confidence_score: float
 
+    used_temp_tables: bool = True
+
     def make_created_audit_stamp(self) -> models.AuditStampClass:
         return models.AuditStampClass(
             time=make_ts_millis(self.latest_timestamp) or 0,
@@ -149,6 +168,9 @@ class SqlAggregatorReport(Report):
     queries_with_temp_upstreams: LossyDict[QueryId, LossyList] = dataclasses.field(
         default_factory=LossyDict
     )
+    queries_with_non_authoritative_session: LossyList[QueryId] = dataclasses.field(
+        default_factory=LossyList
+    )
 
     # Lineage-related.
     schema_resolver_count: Optional[int] = None
@@ -170,7 +192,7 @@ def compute_stats(self) -> None:
         return super().compute_stats()
 
 
-class SqlParsingAggregator:
+class SqlParsingAggregator(Closeable):
     def __init__(
         self,
         *,
@@ -185,7 +207,7 @@ def __init__(
         usage_config: Optional[BaseUsageConfig] = None,
         is_temp_table: Optional[Callable[[UrnStr], bool]] = None,
         format_queries: bool = True,
-        query_log: QueryLogSetting = QueryLogSetting.DISABLED,
+        query_log: QueryLogSetting = _DEFAULT_QUERY_LOG_SETTING,
     ) -> None:
         self.platform = DataPlatformUrn(platform)
         self.platform_instance = platform_instance
@@ -210,13 +232,18 @@ def __init__(
         self.format_queries = format_queries
         self.query_log = query_log
 
+        # The exit stack helps ensure that we close all the resources we open.
+        self._exit_stack = contextlib.ExitStack()
+
         # Set up the schema resolver.
         self._schema_resolver: SchemaResolver
         if graph is None:
-            self._schema_resolver = SchemaResolver(
-                platform=self.platform.platform_name,
-                platform_instance=self.platform_instance,
-                env=self.env,
+            self._schema_resolver = self._exit_stack.enter_context(
+                SchemaResolver(
+                    platform=self.platform.platform_name,
+                    platform_instance=self.platform_instance,
+                    env=self.env,
+                )
             )
         else:
             self._schema_resolver = None  # type: ignore
@@ -235,27 +262,33 @@ def __init__(
 
             # By providing a filename explicitly here, we also ensure that the file
             # is not automatically deleted on exit.
-            self._shared_connection = ConnectionWrapper(filename=query_log_path)
+            self._shared_connection = self._exit_stack.enter_context(
+                ConnectionWrapper(filename=query_log_path)
+            )
 
         # Stores the logged queries.
-        self._logged_queries = FileBackedList[str](
+        self._logged_queries = FileBackedList[LoggedQuery](
             shared_connection=self._shared_connection, tablename="stored_queries"
         )
+        self._exit_stack.push(self._logged_queries)
 
         # Map of query_id -> QueryMetadata
         self._query_map = FileBackedDict[QueryMetadata](
             shared_connection=self._shared_connection, tablename="query_map"
         )
+        self._exit_stack.push(self._query_map)
 
         # Map of downstream urn -> { query ids }
         self._lineage_map = FileBackedDict[OrderedSet[QueryId]](
             shared_connection=self._shared_connection, tablename="lineage_map"
         )
+        self._exit_stack.push(self._lineage_map)
 
         # Map of view urn -> view definition
         self._view_definitions = FileBackedDict[ViewDefinition](
             shared_connection=self._shared_connection, tablename="view_definitions"
         )
+        self._exit_stack.push(self._view_definitions)
 
         # Map of session ID -> {temp table name -> query id}
         # Needs to use the query_map to find the info about the query.
@@ -263,16 +296,20 @@ def __init__(
         self._temp_lineage_map = FileBackedDict[Dict[UrnStr, QueryId]](
             shared_connection=self._shared_connection, tablename="temp_lineage_map"
         )
+        self._exit_stack.push(self._temp_lineage_map)
 
         # Map of query ID -> schema fields, only for query IDs that generate temp tables.
         self._inferred_temp_schemas = FileBackedDict[List[models.SchemaFieldClass]](
-            shared_connection=self._shared_connection, tablename="inferred_temp_schemas"
+            shared_connection=self._shared_connection,
+            tablename="inferred_temp_schemas",
         )
+        self._exit_stack.push(self._inferred_temp_schemas)
 
         # Map of table renames, from original UrnStr to new UrnStr.
         self._table_renames = FileBackedDict[UrnStr](
             shared_connection=self._shared_connection, tablename="table_renames"
         )
+        self._exit_stack.push(self._table_renames)
 
         # Usage aggregator. This will only be initialized if usage statistics are enabled.
         # TODO: Replace with FileBackedDict.
@@ -281,6 +318,9 @@ def __init__(
             assert self.usage_config is not None
             self._usage_aggregator = UsageAggregator(config=self.usage_config)
 
+    def close(self) -> None:
+        self._exit_stack.close()
+
     @property
     def _need_schemas(self) -> bool:
         return self.generate_lineage or self.generate_usage_statistics
@@ -492,6 +532,7 @@ def add_observed_query(
         schema_resolver: SchemaResolverInterface = (
             self._make_schema_resolver_for_session(session_id)
         )
+        session_has_temp_tables = schema_resolver.includes_temp_tables()
 
         # Run the SQL parser.
         parsed = self._run_sql_parser(
@@ -499,6 +540,9 @@ def add_observed_query(
             default_db=default_db,
             default_schema=default_schema,
             schema_resolver=schema_resolver,
+            session_id=session_id,
+            timestamp=query_timestamp,
+            user=user,
         )
         if parsed.debug_info.error:
             self.report.observed_query_parse_failures.append(
@@ -565,6 +609,7 @@ def add_observed_query(
                 upstreams=parsed.in_tables,
                 column_lineage=parsed.column_lineage or [],
                 confidence_score=parsed.debug_info.confidence,
+                used_temp_tables=session_has_temp_tables,
             )
         )
 
@@ -700,6 +745,9 @@ def _run_sql_parser(
         default_db: Optional[str],
         default_schema: Optional[str],
         schema_resolver: SchemaResolverInterface,
+        session_id: str = _MISSING_SESSION_ID,
+        timestamp: Optional[datetime] = None,
+        user: Optional[CorpUserUrn] = None,
     ) -> SqlParsingResult:
         parsed = sqlglot_lineage(
             query,
@@ -712,7 +760,15 @@ def _run_sql_parser(
         if self.query_log == QueryLogSetting.STORE_ALL or (
             self.query_log == QueryLogSetting.STORE_FAILED and parsed.debug_info.error
         ):
-            self._logged_queries.append(query)
+            query_log_entry = LoggedQuery(
+                query=query,
+                session_id=session_id if session_id != _MISSING_SESSION_ID else None,
+                timestamp=timestamp,
+                user=user.urn() if user else None,
+                default_db=default_db,
+                default_schema=default_schema,
+            )
+            self._logged_queries.append(query_log_entry)
 
         # Also add some extra logging.
         if parsed.debug_info.error:
@@ -734,10 +790,21 @@ def _add_to_query_map(
             # This assumes that queries come in order of increasing timestamps,
             # so the current query is more authoritative than the previous one.
             current.formatted_query_string = new.formatted_query_string
-            current.session_id = new.session_id
             current.latest_timestamp = new.latest_timestamp or current.latest_timestamp
             current.actor = new.actor or current.actor
 
+            if current.used_temp_tables and not new.used_temp_tables:
+                # If we see the same query again, but in a different session,
+                # it's possible that we didn't capture the temp tables in the newer session,
+                # but did in the older one. If that happens, we treat the older session's
+                # lineage as more authoritative. This isn't technically correct, but it's
+                # better than using the newer session's lineage, which is likely incorrect.
+                self.report.queries_with_non_authoritative_session.append(
+                    query_fingerprint
+                )
+                return
+            current.session_id = new.session_id
+
             if not merge_lineage:
                 # An invariant of the fingerprinting is that if two queries have the
                 # same fingerprint, they must also have the same lineage. We overwrite
@@ -1066,9 +1133,12 @@ def _recurse_into_query(
         # - Update the query text to combine the queries
 
         composite_query_id = self._composite_query_id(composed_of_queries)
-        self.report.queries_with_temp_upstreams.setdefault(
-            composite_query_id, LossyList()
-        ).extend(composed_of_queries)
+        composed_of_queries_truncated: LossyList[str] = LossyList()
+        for query_id in composed_of_queries:
+            composed_of_queries_truncated.append(query_id)
+        self.report.queries_with_temp_upstreams[
+            composite_query_id
+        ] = composed_of_queries_truncated
 
         merged_query_text = ";\n\n".join(
             [
diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
index 54f6a6e984c00..91f5d6f914676 100644
--- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
+++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
@@ -62,9 +62,13 @@ def assert_metadata_files_equal(
         # We have to "normalize" the golden file by reading and writing it back out.
         # This will clean up nulls, double serialization, and other formatting issues.
         with tempfile.NamedTemporaryFile() as temp:
-            golden_metadata = read_metadata_file(pathlib.Path(golden_path))
-            write_metadata_file(pathlib.Path(temp.name), golden_metadata)
-            golden = load_json_file(temp.name)
+            try:
+                golden_metadata = read_metadata_file(pathlib.Path(golden_path))
+                write_metadata_file(pathlib.Path(temp.name), golden_metadata)
+                golden = load_json_file(temp.name)
+            except (ValueError, AssertionError) as e:
+                logger.info(f"Error reformatting golden file as MCP/MCEs: {e}")
+                golden = load_json_file(golden_path)
 
     diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order)
     if diff and update_golden:
@@ -107,7 +111,7 @@ def diff_metadata_json(
         # if ignore_order is False, always use DeepDiff
     except CannotCompareMCPs as e:
         logger.info(f"{e}, falling back to MCE diff")
-    except AssertionError as e:
+    except (AssertionError, ValueError) as e:
         logger.warning(f"Reverting to old diff method: {e}")
         logger.debug("Error with new diff method", exc_info=True)
 
diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
index 821b69c968ee4..d264a3970fdde 100644
--- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
+++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
@@ -126,6 +126,7 @@ def executemany(
     def close(self) -> None:
         for obj in self._dependent_objects:
             obj.close()
+        self._dependent_objects.clear()
         with self.conn_lock:
             self.conn.close()
         if self._temp_directory:
@@ -440,7 +441,7 @@ def __del__(self) -> None:
         self.close()
 
 
-class FileBackedList(Generic[_VT]):
+class FileBackedList(Generic[_VT], Closeable):
     """An append-only, list-like object that stores its contents in a SQLite database."""
 
     _len: int = field(default=0)
@@ -456,7 +457,6 @@ def __init__(
         cache_max_size: Optional[int] = None,
         cache_eviction_batch_size: Optional[int] = None,
     ) -> None:
-        self._len = 0
         self._dict = FileBackedDict[_VT](
             shared_connection=shared_connection,
             tablename=tablename,
@@ -468,6 +468,12 @@ def __init__(
             or _DEFAULT_MEMORY_CACHE_EVICTION_BATCH_SIZE,
         )
 
+        if shared_connection:
+            shared_connection._dependent_objects.append(self)
+
+        # In case we're reusing an existing list, we need to run a query to get the length.
+        self._len = len(self._dict)
+
     @property
     def tablename(self) -> str:
         return self._dict.tablename
diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json
index da9589d2195ac..f8763d48d35ef 100644
--- a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json
+++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json
@@ -236,7 +236,62 @@
                     "tableSchema": ""
                 }
             },
-            "fields": []
+            "fields": [
+                {
+                    "fieldPath": "age",
+                    "nullable": false,
+                    "description": "comment",
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "INT",
+                    "recursive": false,
+                    "globalTags": {
+                        "tags": []
+                    },
+                    "glossaryTerms": {
+                        "terms": [
+                            {
+                                "urn": "urn:li:glossaryTerm:Age"
+                            }
+                        ],
+                        "auditStamp": {
+                            "time": 1643871600000,
+                            "actor": "urn:li:corpuser:datahub"
+                        }
+                    },
+                    "isPartOfKey": false
+                },
+                {
+                    "fieldPath": "email",
+                    "nullable": false,
+                    "description": "comment",
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "STRING",
+                    "recursive": false,
+                    "globalTags": {
+                        "tags": []
+                    },
+                    "glossaryTerms": {
+                        "terms": [
+                            {
+                                "urn": "urn:li:glossaryTerm:Email_Address"
+                            }
+                        ],
+                        "auditStamp": {
+                            "time": 1643871600000,
+                            "actor": "urn:li:corpuser:datahub"
+                        }
+                    },
+                    "isPartOfKey": false
+                }
+            ]
         }
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py
index 602401134dcd3..e79bbbe995aae 100644
--- a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py
+++ b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py
@@ -1,11 +1,20 @@
+import random
+import string
 from typing import Any, Dict
 from unittest.mock import patch
 
 from freezegun import freeze_time
 from google.cloud.bigquery.table import TableListItem
 
+from datahub.ingestion.glossary.classifier import (
+    ClassificationConfig,
+    DynamicTypedClassifierConfig,
+)
+from datahub.ingestion.glossary.datahub_classifier import DataHubClassifierConfig
 from datahub.ingestion.source.bigquery_v2.bigquery import BigqueryV2Source
+from datahub.ingestion.source.bigquery_v2.bigquery_data_reader import BigQueryDataReader
 from datahub.ingestion.source.bigquery_v2.bigquery_schema import (
+    BigqueryColumn,
     BigqueryDataset,
     BigQuerySchemaApi,
     BigqueryTable,
@@ -16,13 +25,29 @@
 FROZEN_TIME = "2022-02-03 07:00:00"
 
 
+def random_email():
+    return (
+        "".join(
+            [
+                random.choice(string.ascii_lowercase)
+                for i in range(random.randint(10, 15))
+            ]
+        )
+        + "@xyz.com"
+    )
+
+
 @freeze_time(FROZEN_TIME)
 @patch.object(BigQuerySchemaApi, "get_tables_for_dataset")
 @patch.object(BigqueryV2Source, "get_core_table_details")
 @patch.object(BigQuerySchemaApi, "get_datasets_for_project_id")
+@patch.object(BigQuerySchemaApi, "get_columns_for_dataset")
+@patch.object(BigQueryDataReader, "get_sample_data_for_table")
 @patch("google.cloud.bigquery.Client")
 def test_bigquery_v2_ingest(
     client,
+    get_sample_data_for_table,
+    get_columns_for_dataset,
     get_datasets_for_project_id,
     get_core_table_details,
     get_tables_for_dataset,
@@ -42,6 +67,34 @@ def test_bigquery_v2_ingest(
     )
     table_name = "table-1"
     get_core_table_details.return_value = {table_name: table_list_item}
+    get_columns_for_dataset.return_value = {
+        table_name: [
+            BigqueryColumn(
+                name="age",
+                ordinal_position=1,
+                is_nullable=False,
+                field_path="col_1",
+                data_type="INT",
+                comment="comment",
+                is_partition_column=False,
+                cluster_column_position=None,
+            ),
+            BigqueryColumn(
+                name="email",
+                ordinal_position=1,
+                is_nullable=False,
+                field_path="col_2",
+                data_type="STRING",
+                comment="comment",
+                is_partition_column=False,
+                cluster_column_position=None,
+            ),
+        ]
+    }
+    get_sample_data_for_table.return_value = {
+        "age": [random.randint(1, 80) for i in range(20)],
+        "email": [random_email() for i in range(20)],
+    }
 
     bigquery_table = BigqueryTable(
         name=table_name,
@@ -58,6 +111,18 @@ def test_bigquery_v2_ingest(
         "include_usage_statistics": False,
         "include_table_lineage": False,
         "include_data_platform_instance": True,
+        "classification": ClassificationConfig(
+            enabled=True,
+            classifiers=[
+                DynamicTypedClassifierConfig(
+                    type="datahub",
+                    config=DataHubClassifierConfig(
+                        minimum_values_threshold=1,
+                    ),
+                )
+            ],
+            max_workers=1,
+        ).dict(),
     }
 
     pipeline_config_dict: Dict[str, Any] = {
diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json
index d59fce788c95e..3c5b0027ea8ad 100644
--- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json
+++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json
@@ -12,7 +12,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -34,7 +35,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -63,19 +65,19 @@
             },
             "fields": [
                 {
-                    "fieldPath": "2",
+                    "fieldPath": "Sampling Date",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.StringType": {}
+                            "com.linkedin.schema.DateType": {}
                         }
                     },
-                    "nativeDataType": "string",
+                    "nativeDataType": "date",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "3",
+                    "fieldPath": "Site ID",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -87,7 +89,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Br \n(mg/L)",
+                    "fieldPath": "Park ID",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -99,7 +101,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Ca \n(mg/L)",
+                    "fieldPath": "Lat (\u00b0N)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -111,7 +113,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Cl \n(mg/L)",
+                    "fieldPath": "Long (\u00b0W)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -123,7 +125,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Cond (\u00b5S/cm)",
+                    "fieldPath": "Water Temp (\u00b0C)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -135,31 +137,31 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "DO (mg/L)",
+                    "fieldPath": "Cond (\u00b5S/cm)",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.StringType": {}
+                            "com.linkedin.schema.NumberType": {}
                         }
                     },
-                    "nativeDataType": "string",
+                    "nativeDataType": "number",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "DOC [mg/L C]",
+                    "fieldPath": "pH",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.NumberType": {}
+                            "com.linkedin.schema.StringType": {}
                         }
                     },
-                    "nativeDataType": "number",
+                    "nativeDataType": "string",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "F \n(mg/L)",
+                    "fieldPath": "DO (mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -171,19 +173,19 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "K \n(mg/L)",
+                    "fieldPath": "Secchi Depth (m)",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.NumberType": {}
+                            "com.linkedin.schema.StringType": {}
                         }
                     },
-                    "nativeDataType": "number",
+                    "nativeDataType": "string",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Lat (\u00b0N)",
+                    "fieldPath": "UV Absorbance, 254nm",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -195,7 +197,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Long (\u00b0W)",
+                    "fieldPath": "DOC [mg/L C]",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -207,7 +209,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Mg \n(mg/L)",
+                    "fieldPath": "SUVA, 254nm",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -243,31 +245,31 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Na \n(mg/L)",
+                    "fieldPath": "PO4-P \n(mg P/L)",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.NumberType": {}
+                            "com.linkedin.schema.StringType": {}
                         }
                     },
-                    "nativeDataType": "number",
+                    "nativeDataType": "string",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "PO4-P \n(mg P/L)",
+                    "fieldPath": "TDN \n(mg N/L)",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.StringType": {}
+                            "com.linkedin.schema.NumberType": {}
                         }
                     },
-                    "nativeDataType": "string",
+                    "nativeDataType": "number",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Park ID",
+                    "fieldPath": "TDP \n(mg P/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -279,7 +281,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "SO4-S \n(mg/L)",
+                    "fieldPath": "Cl \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -291,7 +293,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "SUVA, 254nm",
+                    "fieldPath": "SO4-S \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -303,7 +305,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Sampling Date",
+                    "fieldPath": "F \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -315,7 +317,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Secchi Depth (m)",
+                    "fieldPath": "Br \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -327,19 +329,19 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Site ID",
+                    "fieldPath": "Na \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.StringType": {}
+                            "com.linkedin.schema.NumberType": {}
                         }
                     },
-                    "nativeDataType": "string",
+                    "nativeDataType": "number",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "TDN \n(mg N/L)",
+                    "fieldPath": "K \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -351,19 +353,19 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "TDP \n(mg P/L)",
+                    "fieldPath": "Ca \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.StringType": {}
+                            "com.linkedin.schema.NumberType": {}
                         }
                     },
-                    "nativeDataType": "string",
+                    "nativeDataType": "number",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "UV Absorbance, 254nm",
+                    "fieldPath": "Mg \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -375,19 +377,19 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Water Temp (\u00b0C)",
+                    "fieldPath": "d18O",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.NumberType": {}
+                            "com.linkedin.schema.StringType": {}
                         }
                     },
-                    "nativeDataType": "number",
+                    "nativeDataType": "string",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "d18O",
+                    "fieldPath": "dD",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -399,7 +401,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "dD",
+                    "fieldPath": "field29",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -411,7 +413,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "field29",
+                    "fieldPath": "2",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -423,7 +425,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "pH",
+                    "fieldPath": "3",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -439,7 +441,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -460,7 +463,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -481,7 +485,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -496,7 +501,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -512,7 +518,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -529,7 +536,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -549,7 +557,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -570,7 +579,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -585,7 +595,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -601,7 +612,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -618,7 +630,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -633,7 +646,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -657,7 +671,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -678,7 +693,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -693,7 +709,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -709,7 +726,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -726,7 +744,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -741,7 +760,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -769,7 +789,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -790,7 +811,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -805,7 +827,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -821,7 +844,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -838,7 +862,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -853,7 +878,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -885,7 +911,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -906,7 +933,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -921,7 +949,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -937,7 +966,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -954,7 +984,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -969,7 +1000,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1005,7 +1037,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1026,7 +1059,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1041,7 +1075,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1057,7 +1092,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1074,7 +1110,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1089,7 +1126,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1129,7 +1167,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1150,7 +1189,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1165,7 +1205,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1181,7 +1222,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1198,7 +1240,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1213,7 +1256,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1257,7 +1301,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1278,7 +1323,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1293,7 +1339,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1309,7 +1356,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1326,7 +1374,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1341,7 +1390,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1389,7 +1439,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1404,7 +1455,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2701,7 +2753,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2753,7 +2806,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2769,7 +2823,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2791,7 +2846,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2884,7 +2940,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2905,7 +2962,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2920,7 +2978,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3270,7 +3329,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3322,7 +3382,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3338,7 +3399,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3360,7 +3422,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3453,7 +3516,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3474,7 +3538,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3489,7 +3554,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3839,7 +3905,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3891,7 +3958,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3913,7 +3981,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3929,7 +3998,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3998,7 +4068,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4019,7 +4090,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4034,7 +4106,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4067,7 +4140,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4119,7 +4193,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4141,7 +4216,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4234,7 +4310,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4255,7 +4332,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4271,7 +4349,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4286,7 +4365,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4590,7 +4670,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4642,7 +4723,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4664,7 +4746,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4680,7 +4763,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4773,7 +4857,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4794,7 +4879,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4809,7 +4895,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -4972,7 +5059,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -5024,7 +5112,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -5046,7 +5135,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -5451,7 +5541,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -5472,7 +5563,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -5487,7 +5579,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7647,7 +7740,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7699,7 +7793,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7715,7 +7810,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7730,7 +7826,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7745,7 +7842,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7760,7 +7858,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7775,7 +7874,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7790,7 +7890,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7805,7 +7906,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7820,7 +7922,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json
index 58c225e1ec4c9..d7a9bca716fd6 100644
--- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json
+++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json
@@ -12,7 +12,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -34,7 +35,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -63,19 +65,19 @@
             },
             "fields": [
                 {
-                    "fieldPath": "2",
+                    "fieldPath": "Sampling Date",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.StringType": {}
+                            "com.linkedin.schema.DateType": {}
                         }
                     },
-                    "nativeDataType": "string",
+                    "nativeDataType": "date",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "3",
+                    "fieldPath": "Site ID",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -87,7 +89,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Br \n(mg/L)",
+                    "fieldPath": "Park ID",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -99,7 +101,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Ca \n(mg/L)",
+                    "fieldPath": "Lat (\u00b0N)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -111,7 +113,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Cl \n(mg/L)",
+                    "fieldPath": "Long (\u00b0W)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -123,7 +125,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Cond (\u00b5S/cm)",
+                    "fieldPath": "Water Temp (\u00b0C)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -135,31 +137,31 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "DO (mg/L)",
+                    "fieldPath": "Cond (\u00b5S/cm)",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.StringType": {}
+                            "com.linkedin.schema.NumberType": {}
                         }
                     },
-                    "nativeDataType": "string",
+                    "nativeDataType": "number",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "DOC [mg/L C]",
+                    "fieldPath": "pH",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.NumberType": {}
+                            "com.linkedin.schema.StringType": {}
                         }
                     },
-                    "nativeDataType": "number",
+                    "nativeDataType": "string",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "F \n(mg/L)",
+                    "fieldPath": "DO (mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -171,19 +173,19 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "K \n(mg/L)",
+                    "fieldPath": "Secchi Depth (m)",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.NumberType": {}
+                            "com.linkedin.schema.StringType": {}
                         }
                     },
-                    "nativeDataType": "number",
+                    "nativeDataType": "string",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Lat (\u00b0N)",
+                    "fieldPath": "UV Absorbance, 254nm",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -195,7 +197,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Long (\u00b0W)",
+                    "fieldPath": "DOC [mg/L C]",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -207,7 +209,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Mg \n(mg/L)",
+                    "fieldPath": "SUVA, 254nm",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -243,31 +245,31 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Na \n(mg/L)",
+                    "fieldPath": "PO4-P \n(mg P/L)",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.NumberType": {}
+                            "com.linkedin.schema.StringType": {}
                         }
                     },
-                    "nativeDataType": "number",
+                    "nativeDataType": "string",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "PO4-P \n(mg P/L)",
+                    "fieldPath": "TDN \n(mg N/L)",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.StringType": {}
+                            "com.linkedin.schema.NumberType": {}
                         }
                     },
-                    "nativeDataType": "string",
+                    "nativeDataType": "number",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Park ID",
+                    "fieldPath": "TDP \n(mg P/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -279,7 +281,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "SO4-S \n(mg/L)",
+                    "fieldPath": "Cl \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -291,7 +293,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "SUVA, 254nm",
+                    "fieldPath": "SO4-S \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -303,7 +305,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Sampling Date",
+                    "fieldPath": "F \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -315,7 +317,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Secchi Depth (m)",
+                    "fieldPath": "Br \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -327,19 +329,19 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Site ID",
+                    "fieldPath": "Na \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.StringType": {}
+                            "com.linkedin.schema.NumberType": {}
                         }
                     },
-                    "nativeDataType": "string",
+                    "nativeDataType": "number",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "TDN \n(mg N/L)",
+                    "fieldPath": "K \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -351,19 +353,19 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "TDP \n(mg P/L)",
+                    "fieldPath": "Ca \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.StringType": {}
+                            "com.linkedin.schema.NumberType": {}
                         }
                     },
-                    "nativeDataType": "string",
+                    "nativeDataType": "number",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "UV Absorbance, 254nm",
+                    "fieldPath": "Mg \n(mg/L)",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -375,19 +377,19 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "Water Temp (\u00b0C)",
+                    "fieldPath": "d18O",
                     "nullable": false,
                     "type": {
                         "type": {
-                            "com.linkedin.schema.NumberType": {}
+                            "com.linkedin.schema.StringType": {}
                         }
                     },
-                    "nativeDataType": "number",
+                    "nativeDataType": "string",
                     "recursive": false,
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "d18O",
+                    "fieldPath": "dD",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -399,7 +401,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "dD",
+                    "fieldPath": "field29",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -411,7 +413,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "field29",
+                    "fieldPath": "2",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -423,7 +425,7 @@
                     "isPartOfKey": false
                 },
                 {
-                    "fieldPath": "pH",
+                    "fieldPath": "3",
                     "nullable": false,
                     "type": {
                         "type": {
@@ -439,7 +441,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -460,7 +463,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -481,7 +485,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -496,7 +501,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -512,7 +518,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -529,7 +536,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -549,7 +557,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -570,7 +579,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -585,7 +595,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -601,7 +612,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -618,7 +630,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -633,7 +646,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -657,7 +671,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -678,7 +693,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -693,7 +709,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -709,7 +726,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -726,7 +744,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -741,7 +760,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -769,7 +789,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -790,7 +811,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -805,7 +827,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -821,7 +844,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -838,7 +862,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -853,7 +878,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -885,7 +911,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -900,7 +927,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -936,7 +964,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -958,7 +987,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -974,7 +1004,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1067,7 +1098,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1088,7 +1120,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1103,7 +1136,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1139,7 +1173,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1155,7 +1190,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1177,7 +1213,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1270,7 +1307,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1291,7 +1329,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1306,7 +1345,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1342,7 +1382,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1364,7 +1405,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1380,7 +1422,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1449,7 +1492,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1470,7 +1514,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1485,7 +1530,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1521,7 +1567,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1543,7 +1590,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1559,7 +1607,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1652,7 +1701,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1673,7 +1723,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1688,7 +1739,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1724,7 +1776,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1740,7 +1793,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1762,7 +1816,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1855,7 +1910,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1876,7 +1932,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1891,7 +1948,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1927,7 +1985,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1949,7 +2008,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2354,7 +2414,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2375,7 +2436,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2391,7 +2453,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2406,7 +2469,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2442,7 +2506,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2457,7 +2522,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2472,7 +2538,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2487,7 +2554,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2502,7 +2570,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2517,7 +2586,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2532,7 +2602,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2547,7 +2618,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "multiple_files.json"
+        "runId": "multiple_files.json",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
index 88354ba74c417..81487d38eda7d 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
@@ -4,7 +4,6 @@
 from typing import cast
 from unittest import mock
 
-import pandas as pd
 import pytest
 from freezegun import freeze_time
 
@@ -65,7 +64,7 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
     golden_file = test_resources_dir / "snowflake_golden.json"
 
     with mock.patch("snowflake.connector.connect") as mock_connect, mock.patch(
-        "datahub.ingestion.source.snowflake.snowflake_v2.SnowflakeV2Source.get_sample_values_for_table"
+        "datahub.ingestion.source.snowflake.snowflake_data_reader.SnowflakeDataReader.get_sample_data_for_table"
     ) as mock_sample_values:
         sf_connection = mock.MagicMock()
         sf_cursor = mock.MagicMock()
@@ -74,13 +73,11 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
 
         sf_cursor.execute.side_effect = default_query_results
 
-        mock_sample_values.return_value = pd.DataFrame(
-            data={
-                "col_1": [random.randint(1, 80) for i in range(20)],
-                "col_2": [random_email() for i in range(20)],
-                "col_3": [random_cloud_region() for i in range(20)],
-            }
-        )
+        mock_sample_values.return_value = {
+            "col_1": [random.randint(1, 80) for i in range(20)],
+            "col_2": [random_email() for i in range(20)],
+            "col_3": [random_cloud_region() for i in range(20)],
+        }
 
         datahub_classifier_config = DataHubClassifierConfig(
             minimum_values_threshold=10,
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_classification.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_classification.py
index 427b6e562ebd1..75a9df4f28051 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_classification.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_classification.py
@@ -91,7 +91,8 @@ def test_snowflake_classification_perf(num_workers, num_cols_per_table, num_tabl
         source_report = pipeline.source.get_report()
         assert isinstance(source_report, SnowflakeV2Report)
         assert (
-            cast(SnowflakeV2Report, source_report).num_tables_classified == num_tables
+            cast(SnowflakeV2Report, source_report).num_tables_classification_found
+            == num_tables
         )
         assert (
             len(
diff --git a/metadata-ingestion/tests/unit/redshift_query_mocker.py b/metadata-ingestion/tests/unit/redshift_query_mocker.py
index 631e6e7ceaf1f..ada76e624032b 100644
--- a/metadata-ingestion/tests/unit/redshift_query_mocker.py
+++ b/metadata-ingestion/tests/unit/redshift_query_mocker.py
@@ -63,7 +63,7 @@ def mock_stl_insert_table_cursor(cursor: MagicMock) -> None:
         "\\\\n\\\\s\\\\t]+(?:temp|temporary))?(?:[\\\\n\\\\s\\\\t]+)table(?:[\\\\n\\\\s\\\\t]+)["
         "^\\\\n\\\\s\\\\t()-]+)', 0, 1, 'ipe'),'[\\\\n\\\\s\\\\t]+',' ',1,'p') as create_command,\n          "
         "          query_text,\n                    row_number() over (\n                        partition "
-        "by TRIM(query_text)\n                        order by start_time desc\n                    ) rn\n   "
+        "by session_id, TRIM(query_text)\n                        order by start_time desc\n                    ) rn\n   "
         "             from\n                    (\n                    select\n                        pid "
         "as session_id,\n                        xid as transaction_id,\n                        starttime "
         "as start_time,\n                        type,\n                        query_text,\n                "
@@ -94,7 +94,7 @@ def mock_stl_insert_table_cursor(cursor: MagicMock) -> None:
         "TABLE volt_tt_'\n                    -- We need to filter out our query and it was not possible "
         "earlier when we did not have any comment in the query\n                    and query_text not ilike "
         "'%https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl"
-        "-statementtext%'\n\n            )\n            where\n                rn = 1;\n            "
+        "-statementtext%'\n\n            )\n            where\n                rn = 1\n            "
     ): mock_temp_table_cursor,
     "select * from test_collapse_temp_lineage": mock_stl_insert_table_cursor,
 }
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_lineage_query_log.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_lineage_query_log.json
new file mode 100644
index 0000000000000..e8e72bf25d303
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_basic_lineage_query_log.json
@@ -0,0 +1,10 @@
+[
+  {
+    "query": "create table foo as select a, b from bar",
+    "session_id": null,
+    "timestamp": null,
+    "user": null,
+    "default_db": "dev",
+    "default_schema": "public"
+  }
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_multistep_temp_table.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_multistep_temp_table.json
new file mode 100644
index 0000000000000..c4d3bee43faa1
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_multistep_temp_table.json
@@ -0,0 +1,122 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.prod_foo,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 1707182625000,
+                        "actor": "urn:li:corpuser:_ingestion"
+                    },
+                    "created": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:_ingestion"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.upstream1,PROD)",
+                    "type": "TRANSFORMED",
+                    "query": "urn:li:query:composite_c89ee7c127c64a5d3a42ee875305087991891c80f42a25012910524bd2c77c45"
+                },
+                {
+                    "auditStamp": {
+                        "time": 1707182625000,
+                        "actor": "urn:li:corpuser:_ingestion"
+                    },
+                    "created": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:_ingestion"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.upstream2,PROD)",
+                    "type": "TRANSFORMED",
+                    "query": "urn:li:query:composite_c89ee7c127c64a5d3a42ee875305087991891c80f42a25012910524bd2c77c45"
+                }
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.upstream1,PROD),a)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.prod_foo,PROD),a)"
+                    ],
+                    "confidenceScore": 0.2,
+                    "query": "urn:li:query:composite_c89ee7c127c64a5d3a42ee875305087991891c80f42a25012910524bd2c77c45"
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.upstream1,PROD),b)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.prod_foo,PROD),b)"
+                    ],
+                    "confidenceScore": 0.2,
+                    "query": "urn:li:query:composite_c89ee7c127c64a5d3a42ee875305087991891c80f42a25012910524bd2c77c45"
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.upstream2,PROD),c)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.prod_foo,PROD),c)"
+                    ],
+                    "confidenceScore": 0.2,
+                    "query": "urn:li:query:composite_c89ee7c127c64a5d3a42ee875305087991891c80f42a25012910524bd2c77c45"
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "query",
+    "entityUrn": "urn:li:query:composite_c89ee7c127c64a5d3a42ee875305087991891c80f42a25012910524bd2c77c45",
+    "changeType": "UPSERT",
+    "aspectName": "queryProperties",
+    "aspect": {
+        "json": {
+            "statement": {
+                "value": "CREATE TABLE #temp2 AS\nSELECT\n  b,\n  c\nFROM upstream2;\n\nCREATE TABLE #temp1 AS\nSELECT\n  a,\n  2 * b AS b\nFROM upstream1;\n\nCREATE TEMPORARY TABLE staging_foo AS\nSELECT\n  up1.a,\n  up1.b,\n  up2.c\nFROM #temp1 AS up1\nLEFT JOIN #temp2 AS up2\n  ON up1.b = up2.b\nWHERE\n  up1.b > 0;\n\nINSERT INTO prod_foo\nSELECT\n  *\nFROM staging_foo",
+                "language": "SQL"
+            },
+            "source": "SYSTEM",
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:_ingestion"
+            },
+            "lastModified": {
+                "time": 1707182625000,
+                "actor": "urn:li:corpuser:_ingestion"
+            }
+        }
+    }
+},
+{
+    "entityType": "query",
+    "entityUrn": "urn:li:query:composite_c89ee7c127c64a5d3a42ee875305087991891c80f42a25012910524bd2c77c45",
+    "changeType": "UPSERT",
+    "aspectName": "querySubjects",
+    "aspect": {
+        "json": {
+            "subjects": [
+                {
+                    "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.prod_foo,PROD)"
+                },
+                {
+                    "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.upstream1,PROD)"
+                },
+                {
+                    "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.upstream2,PROD)"
+                }
+            ]
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_overlapping_inserts_from_temp_tables.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_overlapping_inserts_from_temp_tables.json
new file mode 100644
index 0000000000000..d2076aa1529d3
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_overlapping_inserts_from_temp_tables.json
@@ -0,0 +1,231 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.all_returns,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 1707182625000,
+                        "actor": "urn:li:corpuser:_ingestion"
+                    },
+                    "created": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:_ingestion"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.customer,PROD)",
+                    "type": "TRANSFORMED",
+                    "query": "urn:li:query:composite_ad747ecae933492280d24dfa7f3a4ae3a3c67457e145803d05f7d8bd7efa7d17"
+                },
+                {
+                    "auditStamp": {
+                        "time": 1707182625000,
+                        "actor": "urn:li:corpuser:_ingestion"
+                    },
+                    "created": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:_ingestion"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.online_returns,PROD)",
+                    "type": "TRANSFORMED",
+                    "query": "urn:li:query:composite_ad747ecae933492280d24dfa7f3a4ae3a3c67457e145803d05f7d8bd7efa7d17"
+                },
+                {
+                    "auditStamp": {
+                        "time": 1707182625000,
+                        "actor": "urn:li:corpuser:_ingestion"
+                    },
+                    "created": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:_ingestion"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.online_survey,PROD)",
+                    "type": "TRANSFORMED",
+                    "query": "urn:li:query:composite_ad747ecae933492280d24dfa7f3a4ae3a3c67457e145803d05f7d8bd7efa7d17"
+                },
+                {
+                    "auditStamp": {
+                        "time": 1707182625000,
+                        "actor": "urn:li:corpuser:_ingestion"
+                    },
+                    "created": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:_ingestion"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.in_person_returns,PROD)",
+                    "type": "TRANSFORMED",
+                    "query": "urn:li:query:composite_638945c382e30206a8f8a57894d375e5f6f2a3562fe68480badf37e38e836d75"
+                }
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.in_person_returns,PROD),customer_id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.all_returns,PROD),customer_id)"
+                    ],
+                    "confidenceScore": 0.2,
+                    "query": "urn:li:query:composite_638945c382e30206a8f8a57894d375e5f6f2a3562fe68480badf37e38e836d75"
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.online_returns,PROD),customer_id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.all_returns,PROD),customer_id)"
+                    ],
+                    "confidenceScore": 0.2,
+                    "query": "urn:li:query:composite_ad747ecae933492280d24dfa7f3a4ae3a3c67457e145803d05f7d8bd7efa7d17"
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.customer,PROD),customer_email)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.all_returns,PROD),customer_email)"
+                    ],
+                    "confidenceScore": 0.2,
+                    "query": "urn:li:query:composite_ad747ecae933492280d24dfa7f3a4ae3a3c67457e145803d05f7d8bd7efa7d17"
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.in_person_returns,PROD),return_date)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.all_returns,PROD),return_date)"
+                    ],
+                    "confidenceScore": 0.2,
+                    "query": "urn:li:query:composite_638945c382e30206a8f8a57894d375e5f6f2a3562fe68480badf37e38e836d75"
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.online_returns,PROD),return_date)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.all_returns,PROD),return_date)"
+                    ],
+                    "confidenceScore": 0.2,
+                    "query": "urn:li:query:composite_ad747ecae933492280d24dfa7f3a4ae3a3c67457e145803d05f7d8bd7efa7d17"
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.online_survey,PROD),return_reason)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.all_returns,PROD),return_reason)"
+                    ],
+                    "confidenceScore": 0.2,
+                    "query": "urn:li:query:composite_ad747ecae933492280d24dfa7f3a4ae3a3c67457e145803d05f7d8bd7efa7d17"
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "query",
+    "entityUrn": "urn:li:query:composite_ad747ecae933492280d24dfa7f3a4ae3a3c67457e145803d05f7d8bd7efa7d17",
+    "changeType": "UPSERT",
+    "aspectName": "queryProperties",
+    "aspect": {
+        "json": {
+            "statement": {
+                "value": "CREATE TABLE #stage_online_returns AS\nSELECT\n  online_ret.customer_id,\n  customer.customer_email,\n  online_ret.return_date,\n  online_survey.return_reason\nFROM online_returns AS online_ret\nLEFT JOIN customer\n  ON online_ret.customer_id = customer.customer_id\nLEFT JOIN online_survey\n  ON online_ret.customer_id = online_survey.customer_id\n  AND online_ret.return_id = online_survey.event_id;\n\nINSERT INTO all_returns (\n  customer_id,\n  customer_email,\n  return_date,\n  return_reason\n)\nSELECT\n  customer_id,\n  customer_email,\n  return_date,\n  return_reason\nFROM #stage_online_returns",
+                "language": "SQL"
+            },
+            "source": "SYSTEM",
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:_ingestion"
+            },
+            "lastModified": {
+                "time": 1707182625000,
+                "actor": "urn:li:corpuser:_ingestion"
+            }
+        }
+    }
+},
+{
+    "entityType": "query",
+    "entityUrn": "urn:li:query:composite_ad747ecae933492280d24dfa7f3a4ae3a3c67457e145803d05f7d8bd7efa7d17",
+    "changeType": "UPSERT",
+    "aspectName": "querySubjects",
+    "aspect": {
+        "json": {
+            "subjects": [
+                {
+                    "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.all_returns,PROD)"
+                },
+                {
+                    "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.customer,PROD)"
+                },
+                {
+                    "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.online_returns,PROD)"
+                },
+                {
+                    "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.online_survey,PROD)"
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "query",
+    "entityUrn": "urn:li:query:composite_638945c382e30206a8f8a57894d375e5f6f2a3562fe68480badf37e38e836d75",
+    "changeType": "UPSERT",
+    "aspectName": "queryProperties",
+    "aspect": {
+        "json": {
+            "statement": {
+                "value": "CREATE TABLE #stage_in_person_returns AS\nSELECT\n  ipr.customer_id,\n  customer.customer_email,\n  ipr.return_date\nFROM in_person_returns AS ipr\nLEFT JOIN customer\n  ON in_person_returns.customer_id = customer.customer_id;\n\nINSERT INTO all_returns (\n  customer_id,\n  customer_email,\n  return_date\n)\nSELECT\n  customer_id,\n  customer_email,\n  return_date\nFROM #stage_in_person_returns",
+                "language": "SQL"
+            },
+            "source": "SYSTEM",
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:_ingestion"
+            },
+            "lastModified": {
+                "time": 1707182625000,
+                "actor": "urn:li:corpuser:_ingestion"
+            }
+        }
+    }
+},
+{
+    "entityType": "query",
+    "entityUrn": "urn:li:query:composite_638945c382e30206a8f8a57894d375e5f6f2a3562fe68480badf37e38e836d75",
+    "changeType": "UPSERT",
+    "aspectName": "querySubjects",
+    "aspect": {
+        "json": {
+            "subjects": [
+                {
+                    "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.all_returns,PROD)"
+                },
+                {
+                    "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.customer,PROD)"
+                },
+                {
+                    "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.in_person_returns,PROD)"
+                }
+            ]
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py
index ab2fc0f005e76..826016d07317b 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py
@@ -13,6 +13,7 @@
 from datahub.sql_parsing.sql_parsing_common import QueryType
 from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo, ColumnRef
 from tests.test_helpers import mce_helpers
+from tests.test_helpers.click_helpers import run_datahub_cmd
 
 RESOURCE_DIR = pathlib.Path(__file__).parent / "aggregator_goldens"
 FROZEN_TIME = "2024-02-06 01:23:45"
@@ -23,12 +24,13 @@ def _ts(ts: int) -> datetime:
 
 
 @freeze_time(FROZEN_TIME)
-def test_basic_lineage(pytestconfig: pytest.Config) -> None:
+def test_basic_lineage(pytestconfig: pytest.Config, tmp_path: pathlib.Path) -> None:
     aggregator = SqlParsingAggregator(
         platform="redshift",
         generate_lineage=True,
         generate_usage_statistics=False,
         generate_operations=False,
+        query_log=QueryLogSetting.STORE_ALL,
     )
 
     aggregator.add_observed_query(
@@ -45,6 +47,23 @@ def test_basic_lineage(pytestconfig: pytest.Config) -> None:
         golden_path=RESOURCE_DIR / "test_basic_lineage.json",
     )
 
+    # This test also validates the query log storage functionality.
+    aggregator.close()
+    query_log_db = aggregator.report.query_log_path
+    query_log_json = tmp_path / "query_log.json"
+    run_datahub_cmd(
+        [
+            "check",
+            "extract-sql-agg-log",
+            str(query_log_db),
+            "--output",
+            str(query_log_json),
+        ]
+    )
+    mce_helpers.check_golden_file(
+        pytestconfig, query_log_json, RESOURCE_DIR / "test_basic_lineage_query_log.json"
+    )
+
 
 @freeze_time(FROZEN_TIME)
 def test_overlapping_inserts(pytestconfig: pytest.Config) -> None:
@@ -128,6 +147,127 @@ def test_temp_table(pytestconfig: pytest.Config) -> None:
     )
 
 
+@freeze_time(FROZEN_TIME)
+def test_multistep_temp_table(pytestconfig: pytest.Config) -> None:
+    aggregator = SqlParsingAggregator(
+        platform="redshift",
+        generate_lineage=True,
+        generate_usage_statistics=False,
+        generate_operations=False,
+    )
+
+    aggregator.add_observed_query(
+        query="create table #temp1 as select a, 2*b as b from upstream1",
+        default_db="dev",
+        default_schema="public",
+        session_id="session1",
+    )
+    aggregator.add_observed_query(
+        query="create table #temp2 as select b, c from upstream2",
+        default_db="dev",
+        default_schema="public",
+        session_id="session1",
+    )
+    aggregator.add_observed_query(
+        query="create temp table staging_foo as select up1.a, up1.b, up2.c from #temp1 up1 left join #temp2 up2 on up1.b = up2.b where up1.b > 0",
+        default_db="dev",
+        default_schema="public",
+        session_id="session1",
+    )
+    aggregator.add_observed_query(
+        query="insert into table prod_foo\nselect * from staging_foo",
+        default_db="dev",
+        default_schema="public",
+        session_id="session1",
+    )
+
+    mcps = list(aggregator.gen_metadata())
+
+    # Extra check to make sure that the report is populated correctly.
+    report = aggregator.report
+    assert len(report.queries_with_temp_upstreams) == 1
+    assert (
+        len(
+            report.queries_with_temp_upstreams[
+                "composite_c89ee7c127c64a5d3a42ee875305087991891c80f42a25012910524bd2c77c45"
+            ]
+        )
+        == 4
+    )
+
+    mce_helpers.check_goldens_stream(
+        pytestconfig,
+        outputs=mcps,
+        golden_path=RESOURCE_DIR / "test_multistep_temp_table.json",
+    )
+
+
+@freeze_time(FROZEN_TIME)
+def test_overlapping_inserts_from_temp_tables(pytestconfig: pytest.Config) -> None:
+    aggregator = SqlParsingAggregator(
+        platform="redshift",
+        generate_lineage=True,
+        generate_usage_statistics=False,
+        generate_operations=False,
+    )
+    report = aggregator.report
+
+    # The "all_returns" table is populated from "#stage_in_person_returns" and "#stage_online_returns".
+    # #stage_in_person_returns is populated from "in_person_returns" and "customer".
+    # #stage_online_returns is populated from "online_returns", "customer", and "online_survey".
+
+    aggregator.add_observed_query(
+        query="create table #stage_in_person_returns as select ipr.customer_id, customer.customer_email, ipr.return_date "
+        "from in_person_returns ipr "
+        "left join customer on in_person_returns.customer_id = customer.customer_id",
+        default_db="dev",
+        default_schema="public",
+        session_id="1234",
+    )
+
+    aggregator.add_observed_query(
+        query="create table #stage_online_returns as select online_ret.customer_id, customer.customer_email, online_ret.return_date, online_survey.return_reason "
+        "from online_returns online_ret "
+        "left join customer on online_ret.customer_id = customer.customer_id "
+        "left join online_survey on online_ret.customer_id = online_survey.customer_id and online_ret.return_id = online_survey.event_id",
+        default_db="dev",
+        default_schema="public",
+        session_id="2323",
+    )
+
+    aggregator.add_observed_query(
+        query="insert into all_returns (customer_id, customer_email, return_date) select customer_id, customer_email, return_date from #stage_in_person_returns",
+        default_db="dev",
+        default_schema="public",
+        session_id="1234",
+    )
+
+    aggregator.add_observed_query(
+        query="insert into all_returns (customer_id, customer_email, return_date, return_reason) select customer_id, customer_email, return_date, return_reason from #stage_online_returns",
+        default_db="dev",
+        default_schema="public",
+        session_id="2323",
+    )
+
+    # We only have one create temp table, but the same insert command from multiple sessions.
+    # This should get ignored.
+    assert len(report.queries_with_non_authoritative_session) == 0
+    aggregator.add_observed_query(
+        query="insert into all_returns (customer_id, customer_email, return_date, return_reason) select customer_id, customer_email, return_date, return_reason from #stage_online_returns",
+        default_db="dev",
+        default_schema="public",
+        session_id="5435",
+    )
+    assert len(report.queries_with_non_authoritative_session) == 1
+
+    mcps = list(aggregator.gen_metadata())
+    mce_helpers.check_goldens_stream(
+        pytestconfig,
+        outputs=mcps,
+        golden_path=RESOURCE_DIR / "test_overlapping_inserts_from_temp_tables.json",
+    )
+
+
 @freeze_time(FROZEN_TIME)
 def test_aggregate_operations(pytestconfig: pytest.Config) -> None:
     aggregator = SqlParsingAggregator(
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java
index 5fc3dfc779fa4..8aa27363e985d 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java
@@ -55,11 +55,13 @@
 import org.opensearch.action.search.SearchResponse;
 import org.opensearch.client.RequestOptions;
 import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.common.lucene.search.function.CombineFunction;
 import org.opensearch.index.query.BoolQueryBuilder;
 import org.opensearch.index.query.QueryBuilder;
 import org.opensearch.index.query.QueryBuilders;
 import org.opensearch.search.SearchHit;
 import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.rescore.QueryRescorerBuilder;
 
 /** A search DAO for Elasticsearch backend. */
 @Slf4j
@@ -116,6 +118,9 @@ private SearchResponse executeSearchQuery(
     searchSourceBuilder.size(count);
 
     searchSourceBuilder.query(query);
+    if (graphQueryConfiguration.isBoostViaNodes()) {
+      addViaNodeBoostQuery(searchSourceBuilder);
+    }
 
     searchRequest.source(searchSourceBuilder);
 
@@ -457,7 +462,7 @@ private List<LineageRelationship> getLineageRelationships(
   }
 
   @VisibleForTesting
-  public static QueryBuilder getLineageQuery(
+  public QueryBuilder getLineageQuery(
       @Nonnull Map<String, List<Urn>> urnsPerEntityType,
       @Nonnull Map<String, List<EdgeInfo>> edgesPerEntityType,
       @Nonnull GraphFilters graphFilters,
@@ -497,7 +502,7 @@ public static QueryBuilder getLineageQuery(
   }
 
   @VisibleForTesting
-  public static QueryBuilder getLineageQueryForEntityType(
+  public QueryBuilder getLineageQueryForEntityType(
       @Nonnull List<Urn> urns,
       @Nonnull List<EdgeInfo> lineageEdges,
       @Nonnull GraphFilters graphFilters) {
@@ -520,6 +525,25 @@ public static QueryBuilder getLineageQueryForEntityType(
     return query;
   }
 
+  /**
+   * Replaces score from initial lineage query against the graph index with score from whether a via
+   * edge exists or not. We don't currently sort the results for the graph query for anything else,
+   * we just do a straight filter, but this will need to be re-evaluated if we do.
+   *
+   * @param sourceBuilder source builder for the lineage query
+   */
+  private void addViaNodeBoostQuery(final SearchSourceBuilder sourceBuilder) {
+    QueryBuilders.functionScoreQuery(QueryBuilders.existsQuery(EDGE_FIELD_VIA))
+        .boostMode(CombineFunction.REPLACE);
+    QueryRescorerBuilder queryRescorerBuilder =
+        new QueryRescorerBuilder(
+            QueryBuilders.functionScoreQuery(QueryBuilders.existsQuery(EDGE_FIELD_VIA))
+                .boostMode(CombineFunction.REPLACE));
+    queryRescorerBuilder.windowSize(
+        graphQueryConfiguration.getMaxResult()); // Will rescore all results
+    sourceBuilder.addRescorer(queryRescorerBuilder);
+  }
+
   /**
    * Adds an individual relationship edge to a running set of unique paths to each node in the
    * graph.
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
index 90f46190ac18e..0235edbcd30cb 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
@@ -36,12 +36,8 @@
 import com.linkedin.structured.StructuredPropertyDefinition;
 import io.opentelemetry.extension.annotations.WithSpan;
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Base64;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
@@ -67,8 +63,6 @@ public class ElasticSearchGraphService implements GraphService, ElasticSearchInd
   private final ESGraphWriteDAO _graphWriteDAO;
   private final ESGraphQueryDAO _graphReadDAO;
   private final ESIndexBuilder _indexBuilder;
-
-  private static final String DOC_DELIMETER = "--";
   public static final String INDEX_NAME = "graph_service_v1";
   private static final Map<String, Object> EMPTY_HASH = new HashMap<>();
 
@@ -123,25 +117,6 @@ private String toDocument(@Nonnull final Edge edge) {
     return searchDocument.toString();
   }
 
-  private String toDocId(@Nonnull final Edge edge) {
-    String rawDocId =
-        edge.getSource().toString()
-            + DOC_DELIMETER
-            + edge.getRelationshipType()
-            + DOC_DELIMETER
-            + edge.getDestination().toString();
-
-    try {
-      byte[] bytesOfRawDocID = rawDocId.getBytes(StandardCharsets.UTF_8);
-      MessageDigest md = MessageDigest.getInstance("MD5");
-      byte[] thedigest = md.digest(bytesOfRawDocID);
-      return Base64.getEncoder().encodeToString(thedigest);
-    } catch (NoSuchAlgorithmException e) {
-      e.printStackTrace();
-      return rawDocId;
-    }
-  }
-
   @Override
   public LineageRegistry getLineageRegistry() {
     return _lineageRegistry;
@@ -149,7 +124,7 @@ public LineageRegistry getLineageRegistry() {
 
   @Override
   public void addEdge(@Nonnull final Edge edge) {
-    String docId = toDocId(edge);
+    String docId = edge.toDocId();
     String edgeDocument = toDocument(edge);
     _graphWriteDAO.upsertDocument(docId, edgeDocument);
   }
@@ -161,7 +136,7 @@ public void upsertEdge(@Nonnull final Edge edge) {
 
   @Override
   public void removeEdge(@Nonnull final Edge edge) {
-    String docId = toDocId(edge);
+    String docId = edge.toDocId();
     _graphWriteDAO.deleteDocument(docId);
   }
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java
index 2de61c8ed31bb..ca5fbfcd27a28 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java
@@ -92,11 +92,25 @@ public int compare(RelatedEntity left, RelatedEntity right) {
   protected static String datasetFiveUrnString =
       "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDatasetFive,PROD)";
 
+  protected static final String schemaFieldUrnOneString =
+      "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:type,SampleDatasetFive,PROD),fieldOne)";
+  protected static final String schemaFieldUrnTwoString =
+      "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:type,SampleDatasetFour,PROD),fieldTwo)";
+
+  protected static final String lifeCycleOwnerOneString =
+      "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)";
+  protected static final String lifeCycleOwnerTwoString =
+      "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)";
+
   protected static Urn datasetOneUrn = createFromString(datasetOneUrnString);
   protected static Urn datasetTwoUrn = createFromString(datasetTwoUrnString);
   protected static Urn datasetThreeUrn = createFromString(datasetThreeUrnString);
   protected static Urn datasetFourUrn = createFromString(datasetFourUrnString);
   protected static Urn datasetFiveUrn = createFromString(datasetFiveUrnString);
+  protected static final Urn schemaFieldUrnOne = createFromString(schemaFieldUrnOneString);
+  protected static final Urn schemaFieldUrnTwo = createFromString(schemaFieldUrnTwoString);
+  protected static final Urn lifeCycleOwnerOne = createFromString(lifeCycleOwnerOneString);
+  protected static final Urn lifeCycleOwnerTwo = createFromString(lifeCycleOwnerTwoString);
 
   protected static String unknownUrnString = "urn:li:unknown:(urn:li:unknown:Unknown)";
 
@@ -139,6 +153,14 @@ public int compare(RelatedEntity left, RelatedEntity right) {
       new RelatedEntity(downstreamOf, datasetThreeUrnString);
   protected static RelatedEntity downstreamOfDatasetFourRelatedEntity =
       new RelatedEntity(downstreamOf, datasetFourUrnString);
+  protected static final RelatedEntity downstreamOfSchemaFieldOneVia =
+      new RelatedEntity(downstreamOf, schemaFieldUrnOneString, lifeCycleOwnerOneString);
+  protected static final RelatedEntity downstreamOfSchemaFieldOne =
+      new RelatedEntity(downstreamOf, schemaFieldUrnOneString);
+  protected static final RelatedEntity downstreamOfSchemaFieldTwoVia =
+      new RelatedEntity(downstreamOf, schemaFieldUrnTwoString, lifeCycleOwnerOneString);
+  protected static final RelatedEntity downstreamOfSchemaFieldTwo =
+      new RelatedEntity(downstreamOf, schemaFieldUrnTwoString);
 
   protected static RelatedEntity hasOwnerDatasetOneRelatedEntity =
       new RelatedEntity(hasOwner, datasetOneUrnString);
@@ -244,7 +266,29 @@ protected GraphService getPopulatedGraphService() throws Exception {
             new Edge(datasetThreeUrn, userTwoUrn, hasOwner, null, null, null, null, null),
             new Edge(datasetFourUrn, userTwoUrn, hasOwner, null, null, null, null, null),
             new Edge(userOneUrn, userTwoUrn, knowsUser, null, null, null, null, null),
-            new Edge(userTwoUrn, userOneUrn, knowsUser, null, null, null, null, null));
+            new Edge(userTwoUrn, userOneUrn, knowsUser, null, null, null, null, null),
+            new Edge(
+                schemaFieldUrnOne,
+                schemaFieldUrnTwo,
+                downstreamOf,
+                0L,
+                null,
+                0L,
+                null,
+                null,
+                lifeCycleOwnerOne,
+                lifeCycleOwnerOne),
+            new Edge(
+                schemaFieldUrnOne,
+                schemaFieldUrnTwo,
+                downstreamOf,
+                0L,
+                null,
+                0L,
+                null,
+                null,
+                lifeCycleOwnerTwo,
+                null));
 
     edges.forEach(service::addEdge);
     syncAfterWrite();
@@ -412,12 +456,14 @@ public void testPopulatedGraphService() throws Exception {
             outgoingRelationships,
             0,
             100);
+    // All downstreamOf, hasOwner, or knowsUser relationships, outgoing
     assertEqualsAnyOrder(
         relatedOutgoingEntitiesBeforeRemove,
         Arrays.asList(
             downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity,
             hasOwnerUserOneRelatedEntity, hasOwnerUserTwoRelatedEntity,
-            knowsUserOneRelatedEntity, knowsUserTwoRelatedEntity));
+            knowsUserOneRelatedEntity, knowsUserTwoRelatedEntity,
+            downstreamOfSchemaFieldTwoVia, downstreamOfSchemaFieldTwo));
     RelatedEntitiesResult relatedIncomingEntitiesBeforeRemove =
         service.findRelatedEntities(
             anyType,
@@ -428,6 +474,7 @@ public void testPopulatedGraphService() throws Exception {
             incomingRelationships,
             0,
             100);
+    // All downstreamOf, hasOwner, or knowsUser relationships, incoming
     assertEqualsAnyOrder(
         relatedIncomingEntitiesBeforeRemove,
         Arrays.asList(
@@ -439,7 +486,44 @@ public void testPopulatedGraphService() throws Exception {
             hasOwnerDatasetThreeRelatedEntity,
             hasOwnerDatasetFourRelatedEntity,
             knowsUserOneRelatedEntity,
-            knowsUserTwoRelatedEntity));
+            knowsUserTwoRelatedEntity,
+            downstreamOfSchemaFieldOneVia,
+            downstreamOfSchemaFieldOne));
+    EntityLineageResult viaNodeResult =
+        service.getLineage(
+            schemaFieldUrnOne,
+            LineageDirection.UPSTREAM,
+            new GraphFilters(List.of("schemaField")),
+            0,
+            1000,
+            100,
+            null,
+            null);
+    // Multi-path enabled
+    assertEquals(viaNodeResult.getRelationships().size(), 2);
+    // First one is via node
+    assertTrue(
+        viaNodeResult.getRelationships().get(0).getPaths().get(0).contains(lifeCycleOwnerOne));
+    EntityLineageResult viaNodeResultNoMulti =
+        getGraphService(false)
+            .getLineage(
+                schemaFieldUrnOne,
+                LineageDirection.UPSTREAM,
+                new GraphFilters(List.of("schemaField")),
+                0,
+                1000,
+                100,
+                null,
+                null);
+
+    // Multi-path disabled, still has two because via flow creates both edges in response
+    assertEquals(viaNodeResultNoMulti.getRelationships().size(), 2);
+    // First one is via node
+    assertTrue(
+        viaNodeResult.getRelationships().get(0).getPaths().get(0).contains(lifeCycleOwnerOne));
+
+    // reset graph service
+    getGraphService();
   }
 
   @Test
@@ -685,12 +769,18 @@ private void doTestFindRelatedEntities(
   @DataProvider(name = "FindRelatedEntitiesSourceTypeTests")
   public Object[][] getFindRelatedEntitiesSourceTypeTests() {
     return new Object[][] {
+      // All DownstreamOf relationships, outgoing
       new Object[] {
         null,
         Arrays.asList(downstreamOf),
         outgoingRelationships,
-        Arrays.asList(downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity)
+        Arrays.asList(
+            downstreamOfDatasetOneRelatedEntity,
+            downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfSchemaFieldTwoVia,
+            downstreamOfSchemaFieldTwo)
       },
+      // All DownstreamOf relationships, incoming
       new Object[] {
         null,
         Arrays.asList(downstreamOf),
@@ -698,15 +788,20 @@ public Object[][] getFindRelatedEntitiesSourceTypeTests() {
         Arrays.asList(
             downstreamOfDatasetTwoRelatedEntity,
             downstreamOfDatasetThreeRelatedEntity,
-            downstreamOfDatasetFourRelatedEntity)
+            downstreamOfDatasetFourRelatedEntity,
+            downstreamOfSchemaFieldOneVia,
+            downstreamOfSchemaFieldOne)
       },
+      // All DownstreamOf relationships, both directions
       new Object[] {
         null,
         Arrays.asList(downstreamOf),
         undirectedRelationships,
         Arrays.asList(
             downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity,
-            downstreamOfDatasetThreeRelatedEntity, downstreamOfDatasetFourRelatedEntity)
+            downstreamOfDatasetThreeRelatedEntity, downstreamOfDatasetFourRelatedEntity,
+            downstreamOfSchemaFieldTwoVia, downstreamOfSchemaFieldTwo,
+            downstreamOfSchemaFieldOneVia, downstreamOfSchemaFieldOne)
       },
 
       // "" used to be any type before v0.9.0, which is now encoded by null
@@ -789,16 +884,24 @@ public Object[][] getFindRelatedEntitiesDestinationTypeTests() {
         null,
         Arrays.asList(downstreamOf),
         outgoingRelationships,
-        Arrays.asList(downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity)
+        // All DownstreamOf relationships, outgoing
+        Arrays.asList(
+            downstreamOfDatasetOneRelatedEntity,
+            downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfSchemaFieldTwoVia,
+            downstreamOfSchemaFieldTwo)
       },
       new Object[] {
         null,
         Arrays.asList(downstreamOf),
         incomingRelationships,
+        // All DownstreamOf relationships, incoming
         Arrays.asList(
             downstreamOfDatasetTwoRelatedEntity,
             downstreamOfDatasetThreeRelatedEntity,
-            downstreamOfDatasetFourRelatedEntity)
+            downstreamOfDatasetFourRelatedEntity,
+            downstreamOfSchemaFieldOneVia,
+            downstreamOfSchemaFieldOne)
       },
       new Object[] {
         null,
@@ -806,7 +909,9 @@ public Object[][] getFindRelatedEntitiesDestinationTypeTests() {
         undirectedRelationships,
         Arrays.asList(
             downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity,
-            downstreamOfDatasetThreeRelatedEntity, downstreamOfDatasetFourRelatedEntity)
+            downstreamOfDatasetThreeRelatedEntity, downstreamOfDatasetFourRelatedEntity,
+            downstreamOfSchemaFieldOneVia, downstreamOfSchemaFieldOne,
+            downstreamOfSchemaFieldTwoVia, downstreamOfSchemaFieldTwo)
       },
       new Object[] {
         "", Arrays.asList(downstreamOf), outgoingRelationships, Collections.emptyList()
@@ -1035,12 +1140,14 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception {
             outgoingRelationships,
             0,
             100);
+    // All DownstreamOf relationships, outgoing (destination)
     assertEqualsAnyOrder(
         allOutgoingRelatedEntities,
         Arrays.asList(
             downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity,
             hasOwnerUserOneRelatedEntity, hasOwnerUserTwoRelatedEntity,
-            knowsUserOneRelatedEntity, knowsUserTwoRelatedEntity));
+            knowsUserOneRelatedEntity, knowsUserTwoRelatedEntity,
+            downstreamOfSchemaFieldTwoVia, downstreamOfSchemaFieldTwo));
 
     RelatedEntitiesResult allIncomingRelatedEntities =
         service.findRelatedEntities(
@@ -1052,6 +1159,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception {
             incomingRelationships,
             0,
             100);
+    // All DownstreamOf relationships, incoming (source)
     assertEqualsAnyOrder(
         allIncomingRelatedEntities,
         Arrays.asList(
@@ -1063,7 +1171,9 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception {
             hasOwnerDatasetThreeRelatedEntity,
             hasOwnerDatasetFourRelatedEntity,
             knowsUserOneRelatedEntity,
-            knowsUserTwoRelatedEntity));
+            knowsUserTwoRelatedEntity,
+            downstreamOfSchemaFieldOneVia,
+            downstreamOfSchemaFieldOne));
 
     RelatedEntitiesResult allUnknownRelationshipTypeRelatedEntities =
         service.findRelatedEntities(
@@ -1087,9 +1197,14 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception {
             outgoingRelationships,
             0,
             100);
+    // All DownstreamOf relationships, outgoing (destination)
     assertEqualsAnyOrder(
         someUnknownRelationshipTypeRelatedEntities,
-        Arrays.asList(downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity));
+        Arrays.asList(
+            downstreamOfDatasetOneRelatedEntity,
+            downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfSchemaFieldTwoVia,
+            downstreamOfSchemaFieldTwo));
   }
 
   @Test
@@ -1517,6 +1632,7 @@ public void testRemoveNode() throws Exception {
     syncAfterWrite();
 
     // assert the modified graph
+    // All downstreamOf, hasOwner, knowsUser relationships minus datasetTwo's, outgoing
     assertEqualsAnyOrder(
         service.findRelatedEntities(
             anyType,
@@ -1529,7 +1645,8 @@ public void testRemoveNode() throws Exception {
             100),
         Arrays.asList(
             hasOwnerUserOneRelatedEntity, hasOwnerUserTwoRelatedEntity,
-            knowsUserOneRelatedEntity, knowsUserTwoRelatedEntity));
+            knowsUserOneRelatedEntity, knowsUserTwoRelatedEntity,
+            downstreamOfSchemaFieldTwoVia, downstreamOfSchemaFieldTwo));
   }
 
   @Test
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java
new file mode 100644
index 0000000000000..19ca2e85e8c54
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java
@@ -0,0 +1,386 @@
+package com.linkedin.metadata.graph;
+
+import static com.linkedin.metadata.search.utils.QueryUtils.*;
+import static org.testng.Assert.*;
+
+import java.util.Arrays;
+import java.util.Collections;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public abstract class GraphServiceTestBaseNoVia extends GraphServiceTestBase {
+
+  @DataProvider(name = "NoViaFindRelatedEntitiesDestinationTypeTests")
+  public Object[][] getNoViaFindRelatedEntitiesDestinationTypeTests() {
+    return new Object[][] {
+      new Object[] {
+        null,
+        Arrays.asList(downstreamOf),
+        outgoingRelationships,
+        // All DownstreamOf relationships, outgoing
+        Arrays.asList(
+            downstreamOfDatasetOneRelatedEntity,
+            downstreamOfDatasetTwoRelatedEntity,
+            // TODO: Via not supported in Neo4J and DGraph
+            downstreamOfSchemaFieldTwo)
+      },
+      new Object[] {
+        null,
+        Arrays.asList(downstreamOf),
+        incomingRelationships,
+        // All DownstreamOf relationships, incoming
+        Arrays.asList(
+            downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfDatasetThreeRelatedEntity,
+            downstreamOfDatasetFourRelatedEntity,
+            // TODO: Via not supported in Neo4J and DGraph
+            downstreamOfSchemaFieldOne)
+      },
+      new Object[] {
+        null,
+        Arrays.asList(downstreamOf),
+        undirectedRelationships,
+        Arrays.asList(
+            downstreamOfDatasetOneRelatedEntity,
+            downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfDatasetThreeRelatedEntity,
+            downstreamOfDatasetFourRelatedEntity,
+            // TODO: Via not supported in Neo4J and DGraph
+            downstreamOfSchemaFieldOne,
+            downstreamOfSchemaFieldTwo)
+      },
+      new Object[] {
+        "", Arrays.asList(downstreamOf), outgoingRelationships, Collections.emptyList()
+      },
+      new Object[] {
+        "", Arrays.asList(downstreamOf), incomingRelationships, Collections.emptyList()
+      },
+      new Object[] {
+        "", Arrays.asList(downstreamOf), undirectedRelationships, Collections.emptyList()
+      },
+      new Object[] {
+        datasetType,
+        Arrays.asList(downstreamOf),
+        outgoingRelationships,
+        Arrays.asList(downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity)
+      },
+      new Object[] {
+        datasetType,
+        Arrays.asList(downstreamOf),
+        incomingRelationships,
+        Arrays.asList(
+            downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfDatasetThreeRelatedEntity,
+            downstreamOfDatasetFourRelatedEntity)
+      },
+      new Object[] {
+        datasetType,
+        Arrays.asList(downstreamOf),
+        undirectedRelationships,
+        Arrays.asList(
+            downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfDatasetThreeRelatedEntity, downstreamOfDatasetFourRelatedEntity)
+      },
+      new Object[] {datasetType, Arrays.asList(hasOwner), outgoingRelationships, Arrays.asList()},
+      new Object[] {
+        datasetType,
+        Arrays.asList(hasOwner),
+        incomingRelationships,
+        Arrays.asList(
+            hasOwnerDatasetOneRelatedEntity, hasOwnerDatasetTwoRelatedEntity,
+            hasOwnerDatasetThreeRelatedEntity, hasOwnerDatasetFourRelatedEntity)
+      },
+      new Object[] {
+        datasetType,
+        Arrays.asList(hasOwner),
+        undirectedRelationships,
+        Arrays.asList(
+            hasOwnerDatasetOneRelatedEntity, hasOwnerDatasetTwoRelatedEntity,
+            hasOwnerDatasetThreeRelatedEntity, hasOwnerDatasetFourRelatedEntity)
+      },
+      new Object[] {
+        userType,
+        Arrays.asList(hasOwner),
+        outgoingRelationships,
+        Arrays.asList(hasOwnerUserOneRelatedEntity, hasOwnerUserTwoRelatedEntity)
+      },
+      new Object[] {userType, Arrays.asList(hasOwner), incomingRelationships, Arrays.asList()},
+      new Object[] {
+        userType,
+        Arrays.asList(hasOwner),
+        undirectedRelationships,
+        Arrays.asList(hasOwnerUserOneRelatedEntity, hasOwnerUserTwoRelatedEntity)
+      }
+    };
+  }
+
+  @DataProvider(name = "NoViaFindRelatedEntitiesSourceTypeTests")
+  public Object[][] getNoViaFindRelatedEntitiesSourceTypeTests() {
+    return new Object[][] {
+      // All DownstreamOf relationships, outgoing
+      new Object[] {
+        null,
+        Arrays.asList(downstreamOf),
+        outgoingRelationships,
+        Arrays.asList(
+            downstreamOfDatasetOneRelatedEntity,
+            downstreamOfDatasetTwoRelatedEntity,
+            // TODO: DGraph and Neo4J do not support via
+            downstreamOfSchemaFieldTwo)
+      },
+      // All DownstreamOf relationships, incoming
+      new Object[] {
+        null,
+        Arrays.asList(downstreamOf),
+        incomingRelationships,
+        Arrays.asList(
+            downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfDatasetThreeRelatedEntity,
+            downstreamOfDatasetFourRelatedEntity,
+            // TODO: DGraph and Neo4J do not support via
+            downstreamOfSchemaFieldOne)
+      },
+      // All DownstreamOf relationships, both directions
+      new Object[] {
+        null,
+        Arrays.asList(downstreamOf),
+        undirectedRelationships,
+        Arrays.asList(
+            downstreamOfDatasetOneRelatedEntity,
+            downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfDatasetThreeRelatedEntity,
+            downstreamOfDatasetFourRelatedEntity,
+            // TODO: DGraph and Neo4J do not support via
+            downstreamOfSchemaFieldTwo,
+            downstreamOfSchemaFieldOne)
+      },
+
+      // "" used to be any type before v0.9.0, which is now encoded by null
+      new Object[] {
+        "", Arrays.asList(downstreamOf), outgoingRelationships, Collections.emptyList()
+      },
+      new Object[] {
+        "", Arrays.asList(downstreamOf), incomingRelationships, Collections.emptyList()
+      },
+      new Object[] {
+        "", Arrays.asList(downstreamOf), undirectedRelationships, Collections.emptyList()
+      },
+      new Object[] {
+        datasetType,
+        Arrays.asList(downstreamOf),
+        outgoingRelationships,
+        Arrays.asList(downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity)
+      },
+      new Object[] {
+        datasetType,
+        Arrays.asList(downstreamOf),
+        incomingRelationships,
+        Arrays.asList(
+            downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfDatasetThreeRelatedEntity,
+            downstreamOfDatasetFourRelatedEntity)
+      },
+      new Object[] {
+        datasetType,
+        Arrays.asList(downstreamOf),
+        undirectedRelationships,
+        Arrays.asList(
+            downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfDatasetThreeRelatedEntity, downstreamOfDatasetFourRelatedEntity)
+      },
+      new Object[] {userType, Arrays.asList(downstreamOf), outgoingRelationships, Arrays.asList()},
+      new Object[] {userType, Arrays.asList(downstreamOf), incomingRelationships, Arrays.asList()},
+      new Object[] {
+        userType, Arrays.asList(downstreamOf), undirectedRelationships, Arrays.asList()
+      },
+      new Object[] {userType, Arrays.asList(hasOwner), outgoingRelationships, Arrays.asList()},
+      new Object[] {
+        userType,
+        Arrays.asList(hasOwner),
+        incomingRelationships,
+        Arrays.asList(
+            hasOwnerDatasetOneRelatedEntity, hasOwnerDatasetTwoRelatedEntity,
+            hasOwnerDatasetThreeRelatedEntity, hasOwnerDatasetFourRelatedEntity)
+      },
+      new Object[] {
+        userType,
+        Arrays.asList(hasOwner),
+        undirectedRelationships,
+        Arrays.asList(
+            hasOwnerDatasetOneRelatedEntity, hasOwnerDatasetTwoRelatedEntity,
+            hasOwnerDatasetThreeRelatedEntity, hasOwnerDatasetFourRelatedEntity)
+      }
+    };
+  }
+
+  @Test
+  @Override
+  public void testFindRelatedEntitiesRelationshipTypes() throws Exception {
+    GraphService service = getPopulatedGraphService();
+
+    RelatedEntitiesResult allOutgoingRelatedEntities =
+        service.findRelatedEntities(
+            anyType,
+            EMPTY_FILTER,
+            anyType,
+            EMPTY_FILTER,
+            Arrays.asList(downstreamOf, hasOwner, knowsUser),
+            outgoingRelationships,
+            0,
+            100);
+    // All DownstreamOf relationships, outgoing (destination)
+    assertEqualsAnyOrder(
+        allOutgoingRelatedEntities,
+        Arrays.asList(
+            downstreamOfDatasetOneRelatedEntity,
+            downstreamOfDatasetTwoRelatedEntity,
+            hasOwnerUserOneRelatedEntity,
+            hasOwnerUserTwoRelatedEntity,
+            knowsUserOneRelatedEntity,
+            knowsUserTwoRelatedEntity,
+            // TODO: DGraph and Neo4J do not support via
+            downstreamOfSchemaFieldTwo));
+
+    RelatedEntitiesResult allIncomingRelatedEntities =
+        service.findRelatedEntities(
+            anyType,
+            EMPTY_FILTER,
+            anyType,
+            EMPTY_FILTER,
+            Arrays.asList(downstreamOf, hasOwner, knowsUser),
+            incomingRelationships,
+            0,
+            100);
+    // All DownstreamOf relationships, incoming (source)
+    assertEqualsAnyOrder(
+        allIncomingRelatedEntities,
+        Arrays.asList(
+            downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfDatasetThreeRelatedEntity,
+            downstreamOfDatasetFourRelatedEntity,
+            hasOwnerDatasetOneRelatedEntity,
+            hasOwnerDatasetTwoRelatedEntity,
+            hasOwnerDatasetThreeRelatedEntity,
+            hasOwnerDatasetFourRelatedEntity,
+            knowsUserOneRelatedEntity,
+            knowsUserTwoRelatedEntity,
+            // TODO: DGraph and Neo4J do not support via
+            downstreamOfSchemaFieldOne));
+
+    RelatedEntitiesResult allUnknownRelationshipTypeRelatedEntities =
+        service.findRelatedEntities(
+            anyType,
+            EMPTY_FILTER,
+            anyType,
+            EMPTY_FILTER,
+            Arrays.asList("unknownRelationshipType", "unseenRelationshipType"),
+            outgoingRelationships,
+            0,
+            100);
+    assertEqualsAnyOrder(allUnknownRelationshipTypeRelatedEntities, Collections.emptyList());
+
+    RelatedEntitiesResult someUnknownRelationshipTypeRelatedEntities =
+        service.findRelatedEntities(
+            anyType,
+            EMPTY_FILTER,
+            anyType,
+            EMPTY_FILTER,
+            Arrays.asList("unknownRelationshipType", downstreamOf),
+            outgoingRelationships,
+            0,
+            100);
+    // All DownstreamOf relationships, outgoing (destination)
+    assertEqualsAnyOrder(
+        someUnknownRelationshipTypeRelatedEntities,
+        Arrays.asList(
+            downstreamOfDatasetOneRelatedEntity,
+            downstreamOfDatasetTwoRelatedEntity,
+            // TODO: DGraph and Neo4J do not support via
+            downstreamOfSchemaFieldTwo));
+  }
+
+  @Test
+  @Override
+  public void testPopulatedGraphService() throws Exception {
+    GraphService service = getPopulatedGraphService();
+
+    RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove =
+        service.findRelatedEntities(
+            anyType,
+            EMPTY_FILTER,
+            anyType,
+            EMPTY_FILTER,
+            Arrays.asList(downstreamOf, hasOwner, knowsUser),
+            outgoingRelationships,
+            0,
+            100);
+    // All downstreamOf, hasOwner, or knowsUser relationships, outgoing
+    assertEqualsAnyOrder(
+        relatedOutgoingEntitiesBeforeRemove,
+        Arrays.asList(
+            downstreamOfDatasetOneRelatedEntity,
+            downstreamOfDatasetTwoRelatedEntity,
+            hasOwnerUserOneRelatedEntity,
+            hasOwnerUserTwoRelatedEntity,
+            knowsUserOneRelatedEntity,
+            knowsUserTwoRelatedEntity,
+            // TODO: DGraph and Neo4j do not support via
+            downstreamOfSchemaFieldTwo));
+    RelatedEntitiesResult relatedIncomingEntitiesBeforeRemove =
+        service.findRelatedEntities(
+            anyType,
+            EMPTY_FILTER,
+            anyType,
+            EMPTY_FILTER,
+            Arrays.asList(downstreamOf, hasOwner, knowsUser),
+            incomingRelationships,
+            0,
+            100);
+    // All downstreamOf, hasOwner, or knowsUser relationships, incoming
+    assertEqualsAnyOrder(
+        relatedIncomingEntitiesBeforeRemove,
+        Arrays.asList(
+            downstreamOfDatasetTwoRelatedEntity,
+            downstreamOfDatasetThreeRelatedEntity,
+            downstreamOfDatasetFourRelatedEntity,
+            hasOwnerDatasetOneRelatedEntity,
+            hasOwnerDatasetTwoRelatedEntity,
+            hasOwnerDatasetThreeRelatedEntity,
+            hasOwnerDatasetFourRelatedEntity,
+            knowsUserOneRelatedEntity,
+            knowsUserTwoRelatedEntity,
+            // TODO: DGraph and Neo4j do not support via
+            downstreamOfSchemaFieldOne));
+    // TODO: DGraph and Neo4j do not support via
+    // No checking of split via edge
+  }
+
+  @Test
+  @Override
+  public void testRemoveNode() throws Exception {
+    GraphService service = getPopulatedGraphService();
+
+    service.removeNode(datasetTwoUrn);
+    syncAfterWrite();
+
+    // assert the modified graph
+    // All downstreamOf, hasOwner, knowsUser relationships minus datasetTwo's, outgoing
+    assertEqualsAnyOrder(
+        service.findRelatedEntities(
+            anyType,
+            EMPTY_FILTER,
+            anyType,
+            EMPTY_FILTER,
+            Arrays.asList(downstreamOf, hasOwner, knowsUser),
+            outgoingRelationships,
+            0,
+            100),
+        Arrays.asList(
+            hasOwnerUserOneRelatedEntity,
+            hasOwnerUserTwoRelatedEntity,
+            knowsUserOneRelatedEntity,
+            knowsUserTwoRelatedEntity,
+            // TODO: DGraph and Neo4j do not support via
+            downstreamOfSchemaFieldTwo));
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphGraphServiceTest.java
index 1ccf018a74c3a..7e683502dd958 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphGraphServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphGraphServiceTest.java
@@ -9,11 +9,12 @@
 
 import com.google.common.collect.ImmutableList;
 import com.linkedin.metadata.graph.GraphService;
-import com.linkedin.metadata.graph.GraphServiceTestBase;
+import com.linkedin.metadata.graph.GraphServiceTestBaseNoVia;
 import com.linkedin.metadata.graph.RelatedEntity;
 import com.linkedin.metadata.models.registry.LineageRegistry;
 import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
 import com.linkedin.metadata.query.filter.RelationshipDirection;
+import com.linkedin.metadata.query.filter.RelationshipFilter;
 import io.dgraph.DgraphClient;
 import io.dgraph.DgraphGrpc;
 import io.grpc.CallOptions;
@@ -28,6 +29,7 @@
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import javax.annotation.Nonnull;
@@ -41,7 +43,7 @@
 
 @SuppressWarnings("ArraysAsListWithZeroOrOneArgument")
 @Slf4j
-public class DgraphGraphServiceTest extends GraphServiceTestBase {
+public class DgraphGraphServiceTest extends GraphServiceTestBaseNoVia {
 
   private ManagedChannel _channel;
   private DgraphGraphService _service;
@@ -823,4 +825,28 @@ public void testGetDestinationUrnsFromResponseData() {
   public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiHop) {
     // TODO: Remove this overridden method once the multihop for dGraph is implemented!
   }
+
+  @Override
+  @Test(dataProvider = "NoViaFindRelatedEntitiesDestinationTypeTests")
+  public void testFindRelatedEntitiesDestinationType(
+      String datasetType,
+      List<String> relationshipTypes,
+      RelationshipFilter relationships,
+      List<RelatedEntity> expectedRelatedEntities)
+      throws Exception {
+    super.testFindRelatedEntitiesDestinationType(
+        datasetType, relationshipTypes, relationships, expectedRelatedEntities);
+  }
+
+  @Override
+  @Test(dataProvider = "NoViaFindRelatedEntitiesSourceTypeTests")
+  public void testFindRelatedEntitiesSourceType(
+      String datasetType,
+      List<String> relationshipTypes,
+      RelationshipFilter relationships,
+      List<RelatedEntity> expectedRelatedEntities)
+      throws Exception {
+    super.testFindRelatedEntitiesSourceType(
+        datasetType, relationshipTypes, relationships, expectedRelatedEntities);
+  }
 }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java
index f1113368601c6..cff79618b8e09 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java
@@ -12,6 +12,7 @@
 import com.linkedin.metadata.graph.EntityLineageResult;
 import com.linkedin.metadata.graph.GraphService;
 import com.linkedin.metadata.graph.GraphServiceTestBase;
+import com.linkedin.metadata.graph.GraphServiceTestBaseNoVia;
 import com.linkedin.metadata.graph.LineageDirection;
 import com.linkedin.metadata.graph.RelatedEntitiesResult;
 import com.linkedin.metadata.graph.RelatedEntity;
@@ -35,7 +36,7 @@
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
-public class Neo4jGraphServiceTest extends GraphServiceTestBase {
+public class Neo4jGraphServiceTest extends GraphServiceTestBaseNoVia {
 
   private Neo4jTestServerBuilder _serverBuilder;
   private Driver _driver;
@@ -90,6 +91,7 @@ protected <T> void assertEqualsAnyOrder(
   }
 
   @Override
+  @Test(dataProvider = "NoViaFindRelatedEntitiesSourceTypeTests")
   public void testFindRelatedEntitiesSourceType(
       String datasetType,
       List<String> relationshipTypes,
@@ -110,6 +112,7 @@ public void testFindRelatedEntitiesSourceType(
   }
 
   @Override
+  @Test(dataProvider = "NoViaFindRelatedEntitiesDestinationTypeTests")
   public void testFindRelatedEntitiesDestinationType(
       String datasetType,
       List<String> relationshipTypes,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java
index 5b7f880e6d83a..8ae2725b749d1 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java
@@ -7,6 +7,7 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.metadata.Constants;
+import com.linkedin.metadata.config.search.GraphQueryConfiguration;
 import com.linkedin.metadata.graph.GraphFilters;
 import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO;
 import com.linkedin.metadata.models.registry.LineageRegistry;
@@ -99,19 +100,21 @@ private static void testGetQueryForLineageFullArguments() throws Exception {
     Long startTime = 0L;
     Long endTime = 1L;
 
+    ESGraphQueryDAO graphQueryDAO =
+        new ESGraphQueryDAO(null, null, null, new GraphQueryConfiguration());
     QueryBuilder limitedBuilder =
-        ESGraphQueryDAO.getLineageQueryForEntityType(urns, edgeInfos, graphFilters);
+        graphQueryDAO.getLineageQueryForEntityType(urns, edgeInfos, graphFilters);
 
     QueryBuilder fullBuilder =
-        ESGraphQueryDAO.getLineageQuery(
+        graphQueryDAO.getLineageQuery(
             urnsPerEntityType, edgesPerEntityType, graphFilters, startTime, endTime);
 
     QueryBuilder fullBuilderEmptyFilters =
-        ESGraphQueryDAO.getLineageQuery(
+        graphQueryDAO.getLineageQuery(
             urnsPerEntityType, edgesPerEntityType, GraphFilters.emptyGraphFilters, null, null);
 
     QueryBuilder fullBuilderMultipleFilters =
-        ESGraphQueryDAO.getLineageQuery(
+        graphQueryDAO.getLineageQuery(
             urnsPerEntityTypeMultiple,
             edgesPerEntityTypeMultiple,
             graphFiltersMultiple,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java
index 71f247ebfc29a..8c184055a6b0d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java
@@ -20,7 +20,10 @@
 import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO;
 import com.linkedin.metadata.graph.elastic.ESGraphWriteDAO;
 import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService;
+import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
+import com.linkedin.metadata.models.registry.EntityRegistryException;
 import com.linkedin.metadata.models.registry.LineageRegistry;
+import com.linkedin.metadata.models.registry.MergedEntityRegistry;
 import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.RelationshipDirection;
@@ -30,6 +33,7 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
 import io.datahubproject.test.search.SearchTestUtils;
+import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
@@ -76,7 +80,20 @@ public void wipe() throws Exception {
 
   @Nonnull
   private ElasticSearchGraphService buildService(boolean enableMultiPathSearch) {
-    LineageRegistry lineageRegistry = new LineageRegistry(SnapshotEntityRegistry.getInstance());
+    ConfigEntityRegistry configEntityRegistry =
+        new ConfigEntityRegistry(
+            SearchCommonTestConfiguration.class
+                .getClassLoader()
+                .getResourceAsStream("entity-registry.yml"));
+    SnapshotEntityRegistry snapshotEntityRegistry = SnapshotEntityRegistry.getInstance();
+    LineageRegistry lineageRegistry;
+    try {
+      MergedEntityRegistry mergedEntityRegistry =
+          new MergedEntityRegistry(snapshotEntityRegistry).apply(configEntityRegistry);
+      lineageRegistry = new LineageRegistry(mergedEntityRegistry);
+    } catch (EntityRegistryException e) {
+      throw new RuntimeException(e);
+    }
     GraphQueryConfiguration configuration = GraphQueryConfiguration.testDefaults;
     configuration.setEnableMultiPathSearch(enableMultiPathSearch);
     ESGraphQueryDAO readDAO =
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java
index a26c886c6eaf7..8f752376ef2d9 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java
@@ -108,7 +108,10 @@ public boolean isEnabled() {
   public void invoke(@Nonnull MetadataChangeLog event) {
     if (enabled && isEligibleForProcessing(event)) {
 
-      log.info("Urn {} received by Sibling Hook.", event.getEntityUrn());
+      log.info(
+          "Urn {} with aspect {} received by Sibling Hook.",
+          event.getEntityUrn(),
+          event.getAspectName());
 
       final Urn urn = getUrnFromEvent(event);
 
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java
index cae67108b4ca0..ddfcf4b72776e 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java
@@ -140,10 +140,23 @@ public void testFineGrainedLineageEdgesAreAdded() throws Exception {
     Urn downstreamUrn =
         UrnUtils.getUrn(
             "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD),field_foo)");
+    Urn lifeCycleOwner =
+        UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)");
     MetadataChangeLog event = createUpstreamLineageMCL(upstreamUrn, downstreamUrn);
     updateIndicesHook.invoke(event);
 
-    Edge edge = new Edge(downstreamUrn, upstreamUrn, DOWNSTREAM_OF, null, null, null, null, null);
+    Edge edge =
+        new Edge(
+            downstreamUrn,
+            upstreamUrn,
+            DOWNSTREAM_OF,
+            null,
+            null,
+            null,
+            null,
+            null,
+            lifeCycleOwner,
+            null);
     Mockito.verify(mockGraphService, Mockito.times(1)).addEdge(Mockito.eq(edge));
     Mockito.verify(mockGraphService, Mockito.times(1))
         .removeEdgesFromNode(
@@ -164,11 +177,24 @@ public void testFineGrainedLineageEdgesAreAddedRestate() throws Exception {
     Urn downstreamUrn =
         UrnUtils.getUrn(
             "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD),field_foo)");
+    Urn lifeCycleOwner =
+        UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)");
     MetadataChangeLog event =
         createUpstreamLineageMCL(upstreamUrn, downstreamUrn, ChangeType.RESTATE);
     updateIndicesHook.invoke(event);
 
-    Edge edge = new Edge(downstreamUrn, upstreamUrn, DOWNSTREAM_OF, null, null, null, null, null);
+    Edge edge =
+        new Edge(
+            downstreamUrn,
+            upstreamUrn,
+            DOWNSTREAM_OF,
+            null,
+            null,
+            null,
+            null,
+            null,
+            lifeCycleOwner,
+            null);
     Mockito.verify(mockGraphService, Mockito.times(1)).addEdge(Mockito.eq(edge));
     Mockito.verify(mockGraphService, Mockito.times(1))
         .removeEdgesFromNode(
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/GraphQueryConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/GraphQueryConfiguration.java
index 4da50f47e2feb..cd869a61bf3ab 100644
--- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/GraphQueryConfiguration.java
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/GraphQueryConfiguration.java
@@ -12,6 +12,12 @@ public class GraphQueryConfiguration {
   // will return all paths between the source and destination nodes within the hops limit.
   private boolean enableMultiPathSearch;
 
+  /**
+   * Adds a boosting query for via nodes being present on a lineage search hit, allows these nodes
+   * to be prioritized in the case of a multiple path situation with multi-path search disabled
+   */
+  private boolean boostViaNodes;
+
   public static GraphQueryConfiguration testDefaults;
 
   static {
@@ -20,5 +26,6 @@ public class GraphQueryConfiguration {
     testDefaults.setTimeoutSeconds(10);
     testDefaults.setMaxResult(10000);
     testDefaults.setEnableMultiPathSearch(true);
+    testDefaults.setBoostViaNodes(true);
   }
 }
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index 467b1cf109dee..c0f82d8536922 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -236,7 +236,8 @@ elasticsearch:
       timeoutSeconds: ${ELASTICSEARCH_SEARCH_GRAPH_TIMEOUT_SECONDS:50} # graph dao timeout seconds
       batchSize: ${ELASTICSEARCH_SEARCH_GRAPH_BATCH_SIZE:1000} # graph dao batch size
       maxResult: ${ELASTICSEARCH_SEARCH_GRAPH_MAX_RESULT:10000} # graph dao max result size
-      enableMultiPathSearch: ${ELASTICSEARCH_SEARCH_GRAPH_MULTI_PATH_SEARCH:false}
+      enableMultiPathSearch: ${ELASTICSEARCH_SEARCH_GRAPH_MULTI_PATH_SEARCH:false} # allows a path to be retraversed to walk all paths to the node instead of just shortest, avoids cycles by not rewalking the visited edge
+      boostViaNodes: ${ELASTICSEARCH_SEARCH_GRAPH_BOOST_VIA_NODES:true} # adds a boosting query that ranks graph edges with via nodes higher, used to allow via paths to be prioritized when multi path search is disabled
 
 # TODO: Kafka topic convention
 kafka:
diff --git a/metadata-service/factories/build.gradle b/metadata-service/factories/build.gradle
index 145ec7e65188c..b250435b4a642 100644
--- a/metadata-service/factories/build.gradle
+++ b/metadata-service/factories/build.gradle
@@ -65,5 +65,4 @@ dependencies {
 configurations.all{
   exclude group: "commons-io", module:"commons-io"
   exclude group: "jline", module:"jline"
-  exclude group: 'software.amazon.awssdk', module: 'third-party-jackson-core'
 }
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/RelationshipController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/RelationshipController.java
index 3550a86163f51..09bd9f8bb09e5 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/RelationshipController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/RelationshipController.java
@@ -9,6 +9,7 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.metadata.authorization.PoliciesConfig;
+import com.linkedin.metadata.graph.Edge;
 import com.linkedin.metadata.graph.RelatedEntities;
 import com.linkedin.metadata.graph.RelatedEntitiesScrollResult;
 import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService;
@@ -49,16 +50,14 @@
     name = "Generic Relationships",
     description = "APIs for ingesting and accessing entity relationships.")
 public class RelationshipController {
-
-  private static final String[] SORT_FIELDS = {"source.urn", "destination.urn", "relationshipType"};
-  private static final String[] SORT_ORDERS = {"ASCENDING", "ASCENDING", "ASCENDING"};
+  private static final String[] SORT_ORDERS = {"ASCENDING", "ASCENDING", "ASCENDING", "ASCENDING"};
   private static final List<SortCriterion> EDGE_SORT_CRITERION;
 
   static {
     EDGE_SORT_CRITERION =
-        IntStream.range(0, SORT_FIELDS.length)
+        IntStream.range(0, Edge.KEY_FIELDS.length)
             .mapToObj(
-                idx -> SearchUtil.sortBy(SORT_FIELDS[idx], SortOrder.valueOf(SORT_ORDERS[idx])))
+                idx -> SearchUtil.sortBy(Edge.KEY_FIELDS[idx], SortOrder.valueOf(SORT_ORDERS[idx])))
             .collect(Collectors.toList());
   }
 
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/graph/Edge.java b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/Edge.java
index 458b23317c6c8..cb74ae5acd6a6 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/graph/Edge.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/Edge.java
@@ -1,13 +1,20 @@
 package com.linkedin.metadata.graph;
 
 import com.linkedin.common.urn.Urn;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Base64;
 import java.util.Map;
 import lombok.AllArgsConstructor;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
 
 @Data
 @AllArgsConstructor
+@Slf4j
 public class Edge {
   @EqualsAndHashCode.Include private Urn source;
   @EqualsAndHashCode.Include private Urn destination;
@@ -18,7 +25,7 @@ public class Edge {
   @EqualsAndHashCode.Exclude private Urn updatedActor;
   @EqualsAndHashCode.Exclude private Map<String, Object> properties;
   // The entity who owns the lifecycle of this edge
-  @EqualsAndHashCode.Exclude private Urn lifecycleOwner;
+  @EqualsAndHashCode.Include private Urn lifecycleOwner;
   // An entity through which the edge between source and destination is created
   @EqualsAndHashCode.Include private Urn via;
 
@@ -44,4 +51,32 @@ public Edge(
         null,
         null);
   }
+
+  public String toDocId() {
+    StringBuilder rawDocId = new StringBuilder();
+    rawDocId
+        .append(getSource().toString())
+        .append(DOC_DELIMETER)
+        .append(getRelationshipType())
+        .append(DOC_DELIMETER)
+        .append(getDestination().toString());
+    if (getLifecycleOwner() != null && StringUtils.isNotBlank(getLifecycleOwner().toString())) {
+      rawDocId.append(DOC_DELIMETER).append(getLifecycleOwner().toString());
+    }
+
+    try {
+      byte[] bytesOfRawDocID = rawDocId.toString().getBytes(StandardCharsets.UTF_8);
+      MessageDigest md = MessageDigest.getInstance("MD5");
+      byte[] thedigest = md.digest(bytesOfRawDocID);
+      return Base64.getEncoder().encodeToString(thedigest);
+    } catch (NoSuchAlgorithmException e) {
+      log.error("Unable to hash document ID, returning unhashed id: " + rawDocId);
+      return rawDocId.toString();
+    }
+  }
+
+  public static final String[] KEY_FIELDS = {
+    "source.urn", "destination.urn", "relationshipType", "lifeCycleOwner"
+  };
+  private static final String DOC_DELIMETER = "--";
 }