diff --git a/README.md b/README.md
index 96b30ea49359c..17aab332a07da 100644
--- a/README.md
+++ b/README.md
@@ -116,6 +116,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
 - [Geotab](https://www.geotab.com)
 - [Grofers](https://grofers.com)
 - [hipages](https://hipages.com.au/)
+- [IOMED](https://iomed.health)
 - [Klarna](https://www.klarna.com)
 - [LinkedIn](http://linkedin.com)
 - [Peloton](https://www.onepeloton.com)
diff --git a/build.gradle b/build.gradle
index dc9ca7e85a050..3b6818b4b575a 100644
--- a/build.gradle
+++ b/build.gradle
@@ -88,7 +88,7 @@ project.ext.externalDependency = [
     // avro-serde includes dependencies for `kafka-avro-serializer` `kafka-schema-registry-client` and `avro`
     'kafkaAvroSerde': 'io.confluent:kafka-streams-avro-serde:5.5.1',
     'kafkaClients': 'org.apache.kafka:kafka-clients:2.3.0',
-    'logbackClassic': 'ch.qos.logback:logback-classic:1.2.3',
+    'logbackClassic': 'ch.qos.logback:logback-classic:1.2.9',
     'lombok': 'org.projectlombok:lombok:1.18.12',
     'mariadbConnector': 'org.mariadb.jdbc:mariadb-java-client:2.6.0',
     'mavenArtifact': "org.apache.maven:maven-artifact:$mavenVersion",
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java
index 1e4d1007de4de..35c6db088292e 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java
@@ -59,7 +59,7 @@ public CompletableFuture<List<TimeSeriesAspect>> get(DataFetchingEnvironment env
         // Step 1: Get aspects.
         List<EnvelopedAspect> aspects =
             _client.getTimeseriesAspectValues(urn, _entityName, _aspectName, maybeStartTimeMillis, maybeEndTimeMillis,
-                maybeLimit, context.getAuthentication());
+                maybeLimit, null, null, context.getAuthentication());
 
         // Step 2: Bind profiles into GraphQL strong types.
         return aspects.stream().map(_aspectMapper::apply).collect(Collectors.toList());
diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
index cff1e717852b4..e16cce8d45e8b 100644
--- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
@@ -71,7 +71,7 @@ services:
     environment:
     - discovery.type=single-node
     - xpack.security.enabled=false
-    - ES_JAVA_OPTS=-Xms256m -Xmx256m
+    - ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true
     healthcheck:
       retries: 4
       start_period: 2m
diff --git a/metadata-dao-impl/kafka-producer/build.gradle b/metadata-dao-impl/kafka-producer/build.gradle
index 9c29164e6c134..1a73014acdda1 100644
--- a/metadata-dao-impl/kafka-producer/build.gradle
+++ b/metadata-dao-impl/kafka-producer/build.gradle
@@ -17,11 +17,11 @@ dependencies {
   testCompile externalDependency.mockito
   
   constraints {
-    implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
-    implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
   }
 }
\ No newline at end of file
diff --git a/metadata-events/mxe-registration/build.gradle b/metadata-events/mxe-registration/build.gradle
index c6d9bafc2372f..5c3e64b506607 100644
--- a/metadata-events/mxe-registration/build.gradle
+++ b/metadata-events/mxe-registration/build.gradle
@@ -15,11 +15,11 @@ dependencies {
   avroOriginal project(path: ':metadata-models', configuration: 'avroSchema')
   
   constraints {
-    implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
-    implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
   }
 }
diff --git a/metadata-events/mxe-utils-avro-1.7/build.gradle b/metadata-events/mxe-utils-avro-1.7/build.gradle
index 0352700b630f3..71148d7c4ea0d 100644
--- a/metadata-events/mxe-utils-avro-1.7/build.gradle
+++ b/metadata-events/mxe-utils-avro-1.7/build.gradle
@@ -9,11 +9,11 @@ dependencies {
   testCompile project(':metadata-testing:metadata-test-utils')
 
   constraints {
-    implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
-    implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
   }
 }
diff --git a/metadata-ingestion-examples/common/build.gradle b/metadata-ingestion-examples/common/build.gradle
index d31f75d607f8e..d2d3637f6892c 100644
--- a/metadata-ingestion-examples/common/build.gradle
+++ b/metadata-ingestion-examples/common/build.gradle
@@ -19,11 +19,11 @@ dependencies {
   runtime externalDependency.logbackClassic
 
   constraints {
-    implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
-    implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
   }
 }
diff --git a/metadata-ingestion-examples/kafka-etl/build.gradle b/metadata-ingestion-examples/kafka-etl/build.gradle
index c00cabb9fbf74..0ad4da77888a1 100644
--- a/metadata-ingestion-examples/kafka-etl/build.gradle
+++ b/metadata-ingestion-examples/kafka-etl/build.gradle
@@ -22,11 +22,11 @@ dependencies {
   runtime externalDependency.logbackClassic
 
   constraints {
-    implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
-    implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
   }
 }
diff --git a/metadata-ingestion-examples/mce-cli/build.gradle b/metadata-ingestion-examples/mce-cli/build.gradle
index f384afe747363..5d887ae799775 100644
--- a/metadata-ingestion-examples/mce-cli/build.gradle
+++ b/metadata-ingestion-examples/mce-cli/build.gradle
@@ -28,11 +28,11 @@ dependencies {
   annotationProcessor externalDependency.picocli
 
   constraints {
-    implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
-    implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
   }
 
diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md
index d19ccac08abd5..aad490159905a 100644
--- a/metadata-ingestion/README.md
+++ b/metadata-ingestion/README.md
@@ -165,10 +165,6 @@ sink:
     server: "http://localhost:8080"
 ```
 
-We automatically expand environment variables in the config,
-similar to variable substitution in GNU bash or in docker-compose files. For details, see
-https://docs.docker.com/compose/compose-file/compose-file-v2/#variable-substitution.
-
 Running a recipe is quite easy.
 
 ```shell
@@ -177,6 +173,11 @@ datahub ingest -c ./examples/recipes/mssql_to_datahub.yml
 
 A number of recipes are included in the [examples/recipes](./examples/recipes) directory. For full info and context on each source and sink, see the pages described in the [table of plugins](#installing-plugins).
 
+### Handling sensitive information in recipes
+
+We automatically expand environment variables in the config (e.g. `${MSSQL_PASSWORD}`),
+similar to variable substitution in GNU bash or in docker-compose files. For details, see
+https://docs.docker.com/compose/compose-file/compose-file-v2/#variable-substitution. This environment variable substitution should be used to mask sensitive information in recipe files. As long as you can get env variables securely to the ingestion process there would not be any need to store sensitive information in recipes.
 ## Transformations
 
 If you'd like to modify data before it reaches the ingestion sinks – for instance, adding additional owners or tags – you can use a transformer to write your own module and integrate it with DataHub.
diff --git a/metadata-ingestion/examples/recipes/mode_to_datahub.yml b/metadata-ingestion/examples/recipes/mode_to_datahub.yml
index 82bdb7a1d54b4..f1503d9526d10 100644
--- a/metadata-ingestion/examples/recipes/mode_to_datahub.yml
+++ b/metadata-ingestion/examples/recipes/mode_to_datahub.yml
@@ -2,12 +2,16 @@
 source:
   type: "mode"
   config:
-    token: 9fa6a90fcd33
-    password: a03bcbc011d6f77c585f5682
+    token: token
+    password: password
     connect_uri: https://app.mode.com/
-    workspace: "petabloc"
+    workspace: "workspace"
     default_schema: "public"
     owner_username_instead_of_email: False
+    api_options:
+      retry_backoff_multiplier: 2
+      max_retry_interval: 10
+      max_attempts: 5
 
 # see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
 sink:
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index c0afd76928d81..fec00f3ff2d1d 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -23,7 +23,7 @@ def get_long_description():
 base_requirements = {
     # Compatability.
     "dataclasses>=0.6; python_version < '3.7'",
-    "typing_extensions>=3.10.0.2",
+    "typing_extensions>=3.10.0.2,<4",
     "mypy_extensions>=0.4.3",
     # Actual dependencies.
     "typing-inspect",
@@ -97,7 +97,7 @@ def get_long_description():
     "bigquery": sql_common | bigquery_common | {"pybigquery >= 0.6.0"},
     "bigquery-usage": bigquery_common | {"cachetools"},
     "datahub-business-glossary": set(),
-    "dbt": set(),
+    "dbt": {"requests"},
     "druid": sql_common | {"pydruid>=0.6.2"},
     "feast": {"docker"},
     "glue": aws_common,
@@ -124,7 +124,8 @@ def get_long_description():
     "oracle": sql_common | {"cx_Oracle"},
     "postgres": sql_common | {"psycopg2-binary", "GeoAlchemy2"},
     "redash": {"redash-toolbelt", "sql-metadata"},
-    "redshift": sql_common | {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2", "sqllineage"},
+    "redshift": sql_common
+    | {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2", "sqllineage"},
     "redshift-usage": sql_common
     | {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"},
     "sagemaker": aws_common,
@@ -147,7 +148,6 @@ def get_long_description():
         "sqlalchemy-trino"
     },
     "nifi": {"requests"},
-
 }
 
 all_exclude_plugins: Set[str] = {
@@ -183,7 +183,8 @@ def get_long_description():
     "flake8>=3.8.3",
     "flake8-tidy-imports>=4.3.0",
     "isort>=5.7.0",
-    "mypy>=0.901",
+    # Waiting for https://github.com/samuelcolvin/pydantic/pull/3175 before allowing mypy 0.920.
+    "mypy>=0.901,<0.920",
     "pytest>=6.2.2",
     "pytest-cov>=2.8.1",
     "pytest-docker>=0.10.3",
@@ -303,7 +304,6 @@ def get_long_description():
         "trino = datahub.ingestion.source.sql.trino:TrinoSource",
         "starburst-trino-usage = datahub.ingestion.source.usage.starburst_trino_usage:TrinoUsageSource",
         "nifi = datahub.ingestion.source.nifi:NifiSource",
-
     ],
     "datahub.ingestion.sink.plugins": [
         "file = datahub.ingestion.sink.file:FileSink",
@@ -311,6 +311,9 @@ def get_long_description():
         "datahub-kafka = datahub.ingestion.sink.datahub_kafka:DatahubKafkaSink",
         "datahub-rest = datahub.ingestion.sink.datahub_rest:DatahubRestSink",
     ],
+    "datahub.ingestion.state_provider.plugins": [
+        "datahub = datahub.ingestion.source.state_provider.datahub_ingestion_state_provider:DatahubIngestionStateProvider",
+    ],
     "apache_airflow_provider": ["provider_info=datahub_provider:get_provider_info"],
 }
 
diff --git a/metadata-ingestion/source_docs/bigquery.md b/metadata-ingestion/source_docs/bigquery.md
index 1d73a761a6c72..567fecca490c8 100644
--- a/metadata-ingestion/source_docs/bigquery.md
+++ b/metadata-ingestion/source_docs/bigquery.md
@@ -44,27 +44,32 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 As a SQL-based service, the Athena integration is also supported by our SQL profiler. See [here](./sql_profiles.md) for more details on configuration.
 
-| Field                       | Required | Default                                                                  | Description                                                                                                                                                                             |
-| --------------------------- | -------- | ------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `project_id`                |          | Autodetected                                                             | Project ID to ingest from. If not specified, will infer from environment.                                                                                                               |
-| `env`                       |          | `"PROD"`                                                                 | Environment to use in namespace when constructing URNs.                                                                                                                                 |
-| `options.<option>`          |          |                                                                          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details. |
-| `table_pattern.allow`       |          |                                                                          | List of regex patterns for tables to include in ingestion.                                                                                                                              |
-| `table_pattern.deny`        |          |                                                                          | List of regex patterns for tables to exclude from ingestion.                                                                                                                            |
-| `table_pattern.ignoreCase`  |          | `True`                                                                   | Whether to ignore case sensitivity during pattern matching.                                                                                                                             |
-| `schema_pattern.allow`      |          |                                                                          | List of regex patterns for schemas to include in ingestion.                                                                                                                             |
-| `schema_pattern.deny`       |          |                                                                          | List of regex patterns for schemas to exclude from ingestion.                                                                                                                           |
-| `schema_pattern.ignoreCase` |          | `True`                                                                   | Whether to ignore case sensitivity during pattern matching.                                                                                                                             |
-| `view_pattern.allow`        |          |                                                                          | List of regex patterns for views to include in ingestion.                                                                                                                               |
-| `view_pattern.deny`         |          |                                                                          | List of regex patterns for views to exclude from ingestion.                                                                                                                             |
-| `view_pattern.ignoreCase`   |          | `True`                                                                   | Whether to ignore case sensitivity during pattern matching.                                                                                                                             |
-| `include_tables`            |          | `True`                                                                   | Whether tables should be ingested.                                                                                                                                                      |
-| `include_views`             |          | `True`                                                                   | Whether views should be ingested.                                                                                                                                                       |
-| `include_table_lineage`     |          | `True`                                                                   | Whether table level lineage should be ingested and processed.                                                                                                                           |
-| `max_query_duration`        |          | `15`                                                                     | A time buffer in minutes to adjust start_time and end_time while querying Bigquery audit logs.                                                                                          |
-| `start_time`                |          | Start of last full day in UTC (or hour, depending on `bucket_duration`)  | Earliest time of lineage data to consider.                                                                                                                                              |
-| `end_time`                  |          | End of last full day in UTC (or hour, depending on `bucket_duration`)    | Latest time of lineage data to consider.                                                                                                                                                |
-| `extra_client_options`      |          |                                                                          | Additional options to pass to `google.cloud.logging_v2.client.Client`.                                                                                                                  |
+| Field                                     | Required | Default                                                                  | Description                                                                                                                                                                                                                                                                             |
+| ----------------------------------------- | -------- | ------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `project_id`                              |          | Autodetected                                                             | Project ID to ingest from. If not specified, will infer from environment.                                                                                                                                                                                                               |
+| `env`                                     |          | `"PROD"`                                                                 | Environment to use in namespace when constructing URNs.                                                                                                                                                                                                                                 |
+| `options.<option>`                        |          |                                                                          | Any options specified here will be passed to SQLAlchemy's `create_engine` as kwargs.<br />See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.                                                                                                 |
+| `table_pattern.allow`                     |          |                                                                          | List of regex patterns for tables to include in ingestion.                                                                                                                                                                                                                              |
+| `table_pattern.deny`                      |          |                                                                          | List of regex patterns for tables to exclude from ingestion.                                                                                                                                                                                                                            |
+| `table_pattern.ignoreCase`                |          | `True`                                                                   | Whether to ignore case sensitivity during pattern matching.                                                                                                                                                                                                                             |
+| `schema_pattern.allow`                    |          |                                                                          | List of regex patterns for schemas to include in ingestion.                                                                                                                                                                                                                             |
+| `schema_pattern.deny`                     |          |                                                                          | List of regex patterns for schemas to exclude from ingestion.                                                                                                                                                                                                                           |
+| `schema_pattern.ignoreCase`               |          | `True`                                                                   | Whether to ignore case sensitivity during pattern matching.                                                                                                                                                                                                                             |
+| `view_pattern.allow`                      |          |                                                                          | List of regex patterns for views to include in ingestion.                                                                                                                                                                                                                               |
+| `view_pattern.deny`                       |          |                                                                          | List of regex patterns for views to exclude from ingestion.                                                                                                                                                                                                                             |
+| `view_pattern.ignoreCase`                 |          | `True`                                                                   | Whether to ignore case sensitivity during pattern matching.                                                                                                                                                                                                                             |
+| `include_tables`                          |          | `True`                                                                   | Whether tables should be ingested.                                                                                                                                                                                                                                                      |
+| `include_views`                           |          | `True`                                                                   | Whether views should be ingested.                                                                                                                                                                                                                                                       |
+| `include_table_lineage`                   |          | `True`                                                                   | Whether table level lineage should be ingested and processed.                                                                                                                                                                                                                           |
+| `max_query_duration`                      |          | `15`                                                                     | A time buffer in minutes to adjust start_time and end_time while querying Bigquery audit logs.                                                                                                                                                                                          |
+| `start_time`                              |          | Start of last full day in UTC (or hour, depending on `bucket_duration`)  | Earliest time of lineage data to consider.                                                                                                                                                                                                                                              |
+| `end_time`                                |          | End of last full day in UTC (or hour, depending on `bucket_duration`)    | Latest time of lineage data to consider.                                                                                                                                                                                                                                                |
+| `extra_client_options`                    |          |                                                                          | Additional options to pass to `google.cloud.logging_v2.client.Client`.                                                                                                                                                                                                                  |
+| `use_exported_bigquery_audit_metadata`    |          | `False`                                                                  | When configured, use `BigQueryAuditMetadata` in `bigquery_audit_metadata_datasets` to compute lineage information.                                                                                                                                                                      |
+| `use_date_sharded_audit_log_tables`       |          | `False`                                                                  | Whether to read date sharded tables or time partitioned tables when extracting lineage from exported audit logs.                                                                                                                                                                        |
+| `bigquery_audit_metadata_datasets`        |          | None                                                                     | A list of datasets that contain a table named `cloudaudit_googleapis_com_data_access` which contain BigQuery audit logs, specifically, those containing `BigQueryAuditMetadata`. It is recommended that the project of the dataset is also specified, for example, `projectA.datasetB`. |
+
+
 
 The following parameters are only relevant if include_table_lineage is set to true:
 
@@ -73,34 +78,42 @@ The following parameters are only relevant if include_table_lineage is set to tr
 - end_time 
 - extra_client_options
 
+When use_exported_bigquery_audit_metadata is set to true, lineage information will be computed using exported bigquery logs. On how to setup exported bigquery audit logs, refer to the following [docs](https://cloud.google.com/bigquery/docs/reference/auditlogs#defining_a_bigquery_log_sink_using_gcloud) on BigQuery audit logs. Note that only protoPayloads with "type.googleapis.com/google.cloud.audit.BigQueryAuditMetadata" are supported by the current ingestion version. The bigquery_audit_metadata_datasets parameter will be used only if use_exported_bigquery_audit_metadata is set to true.
+
+Note: the bigquery_audit_metadata_datasets parameter receives a list of datasets, in the format $PROJECT.$DATASET. This way queries from a multiple number of projects can be used to compute lineage information.
+
 Note: Since bigquery source also supports dataset level lineage, the auth client will require additional permissions to be able to access the google audit logs. Refer the permissions section in bigquery-usage section below which also accesses the audit logs.
 
 ## Compatibility
 
 Coming soon!
 
-## BigQuery Usage Stats
+# BigQuery Usage Stats
 
 For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
 
-### Setup
+## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[bigquery-usage]'`.
 
-### Capabilities
+### Prerequisites
 
-This plugin extracts the following:
-
-- Statistics on queries issued and tables and columns accessed (excludes views)
-- Aggregation of these statistics into buckets, by day or hour granularity
-
-Note: the client must have one of the following OAuth scopes, and should be authorized on all projects you'd like to ingest usage stats from.
+The Google Identity must have one of the following OAuth scopes granted to it: 
 
 - https://www.googleapis.com/auth/logging.read
 - https://www.googleapis.com/auth/logging.admin
 - https://www.googleapis.com/auth/cloud-platform.read-only
 - https://www.googleapis.com/auth/cloud-platform
 
+And should be authorized on all projects you'd like to ingest usage stats from. 
+
+## Capabilities
+
+This plugin extracts the following:
+
+- Statistics on queries issued and tables and columns accessed (excludes views)
+- Aggregation of these statistics into buckets, by day or hour granularity
+
 :::note
 
 1. This source only does usage statistics. To get the tables, views, and schemas in your BigQuery project, use the `bigquery` source described above.
@@ -108,7 +121,7 @@ Note: the client must have one of the following OAuth scopes, and should be auth
 
 :::
 
-### Quickstart recipe
+## Quickstart recipe
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
@@ -130,7 +143,7 @@ sink:
   # sink configs
 ```
 
-### Config details
+## Config details
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
@@ -150,9 +163,10 @@ By default, we extract usage stats for the last day, with the recommendation tha
 | `table_pattern.allow`  |          |                                                                | List of regex patterns for tables to include in ingestion.                                                                                                                                                                                                                                                                                                                             |
 | `table_pattern.deny`  |          |                                                                | List of regex patterns for tables to exclude in ingestion.                                                                                                                                                                                                                                                                                                                              |
 
-### Compatibility
+## Compatibility
 
-Coming soon!
+The source was last most recently confirmed compatible with the [December 16, 2021](https://cloud.google.com/bigquery/docs/release-notes#December_16_2021)
+release of BigQuery. 
 
 ## Questions
 
diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md
index 762d405b4c679..411ed15dca636 100644
--- a/metadata-ingestion/source_docs/dbt.md
+++ b/metadata-ingestion/source_docs/dbt.md
@@ -60,9 +60,9 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                     | Required | Default  | Description                                                                                                                                           |
 | ------------------------- | -------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `manifest_path`           | ✅       |          | Path to dbt manifest JSON. See https://docs.getdbt.com/reference/artifacts/manifest-json                                                              |
-| `catalog_path`            | ✅       |          | Path to dbt catalog JSON. See https://docs.getdbt.com/reference/artifacts/catalog-json                                                                |
-| `sources_path`            |          |          | Path to dbt sources JSON. See https://docs.getdbt.com/reference/artifacts/sources-json. If not specified, last-modified fields will not be populated. |
+| `manifest_path`           | ✅       |          | Path to dbt manifest JSON. See https://docs.getdbt.com/reference/artifacts/manifest-json  Note this can be a local file or a URI.                     |
+| `catalog_path`            | ✅       |          | Path to dbt catalog JSON. See https://docs.getdbt.com/reference/artifacts/catalog-json    Note this can be a local file or a URI.                     |
+| `sources_path`            |          |          | Path to dbt sources JSON. See https://docs.getdbt.com/reference/artifacts/sources-json. If not specified, last-modified fields will not be populated. Note this can be a local file or a URI. |
 | `env`                     |          | `"PROD"` | Environment to use in namespace when constructing URNs.                                                                                               |
 | `target_platform`         | ✅       |          | The platform that dbt is loading onto. (e.g. bigquery / redshift / postgres etc.)                                                                                                                 |
 | `use_identifiers`         |         | `False`   | Use model [identifier](https://docs.getdbt.com/reference/resource-properties/identifier) instead of model name if defined (if not, default to model name).                                                           |
diff --git a/metadata-ingestion/source_docs/mode.md b/metadata-ingestion/source_docs/mode.md
index 0d25308baceaa..aad2fd8eeaf64 100644
--- a/metadata-ingestion/source_docs/mode.md
+++ b/metadata-ingestion/source_docs/mode.md
@@ -85,6 +85,11 @@ source:
     # Options
     workspace: "datahub"
     default_schema: "public"
+    owner_username_instead_of_email: False
+    api_options:
+      retry_backoff_multiplier: 2
+      max_retry_interval: 10
+      max_attempts: 5
 
 sink:
   # sink configs
@@ -92,17 +97,28 @@ sink:
 
 ## Config details
 
-| Field                             | Required | Default                  | Description                                                       |
-|-----------------------------------| -------- |--------------------------|-------------------------------------------------------------------|
-| `connect_uri`                     |    ✅     | `"https://app.mode.com"` | Mode host URL.                                                    |
-| `token`                           |    ✅     |                          | Mode user token.                                                  |
-| `password`                        |    ✅     |                          | Mode password for authentication.                                 |
-| `default_schema`                  |          | `public`                 | Default schema to use when schema is not provided in an SQL query |
-| `env`                             |          | `"PROD"`                 | Environment to use in namespace when constructing URNs.           |
-| `owner_username_instead_of_email` |          | `True`                   | Use username for owner URN instead of Email                       |
+| Field                             | Required | Default                  | Description                                                                                       |
+|-----------------------------------| -------- |--------------------------|---------------------------------------------------------------------------------------------------|
+| `connect_uri`                     |    ✅    | `"https://app.mode.com"` | Mode host URL.                                                                                    |
+| `token`                           |    ✅    |                          | Mode user token.                                                                                  |
+| `password`                        |    ✅    |                          | Mode password for authentication.                                                                 |
+| `default_schema`                  |          | `public`                 | Default schema to use when schema is not provided in an SQL query                                 |
+| `env`                             |          | `"PROD"`                 | Environment to use in namespace when constructing URNs.                                           |
+| `owner_username_instead_of_email` |          | `True`                   | Use username for owner URN instead of Email                                                       |
+| `api_options`                     |          |                          | Retry/Wait settings for Mode API to avoid "Too many Requests" error. See Mode API Options below   |
 
 See Mode's [Authentication documentation](https://mode.com/developer/api-reference/authentication/) on how to generate `token` and `password`.
 
+<br/>
+
+#### Mode API Options
+| Field                      | Required | Default | Description                                              |
+|----------------------------|----------|---------|----------------------------------------------------------|
+| `retry_backoff_multiplier` |          | `1`     | Multiplier for exponential backoff when waiting to retry |
+| `max_retry_interval`       |          | `10`    | Maximum interval to wait when retrying                   |
+| `max_attempts`             |          | `5`     | Maximum number of attempts to retry before failing       |
+
+
 ## Compatibility
 
 N/A
@@ -111,4 +127,4 @@ N/A
 ## Questions
 
 If you've got any questions on configuring this source, feel free to ping us on
-[our Slack](https://slack.datahubproject.io/)!
\ No newline at end of file
+[our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/mysql.md b/metadata-ingestion/source_docs/mysql.md
index 2e9588f62e322..430cb8aaa19a5 100644
--- a/metadata-ingestion/source_docs/mysql.md
+++ b/metadata-ingestion/source_docs/mysql.md
@@ -38,9 +38,11 @@ sink:
 
 ## Config details
 
-Note that a `.` is used to denote nested fields in the YAML recipe.
+Like all SQL-based sources, the MySQL integration supports:
+- Stale Metadata Deletion: See [here](./stateful_ingestion.md) for more details on configuration.
+- SQL Profiling: See [here](./sql_profiles.md) for more details on configuration.
 
-As a SQL-based service, the Athena integration is also supported by our SQL profiler. See [here](./sql_profiles.md) for more details on configuration.
+Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                       | Required | Default            | Description                                                                                                                                                                             |
 | --------------------------- | -------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
diff --git a/metadata-ingestion/source_docs/redshift.md b/metadata-ingestion/source_docs/redshift.md
index 1cafdc28a4132..4156aee0349e7 100644
--- a/metadata-ingestion/source_docs/redshift.md
+++ b/metadata-ingestion/source_docs/redshift.md
@@ -1,4 +1,3 @@
-
 # Redshift
 
 For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
@@ -7,13 +6,12 @@ For context on getting started with ingestion, check out our [metadata ingestion
 
 To install this plugin, run `pip install 'acryl-datahub[redshift]'`.
 
-::: Required permissions :::
-
-This source needs to access system tables that require `superuser` permission; otherwise, it won't be able to see all schemas/tables.
+### Prerequisites
 
-To add a superuser or grant superuser permission, please check [this page](https://docs.aws.amazon.com/redshift/latest/dg/r_superusers.html)
+This source needs to access system tables that require `superuser` permission; otherwise, it won't be able to see all schemas/tables. 
+To add a superuser or grant superuser permission, please refer to the [Superusers page](https://docs.aws.amazon.com/redshift/latest/dg/r_superusers.html).
 
-If you don't want to grant superuser permission, please ensure the user has SELECT privilege on [`SVV_TABLE_INFO`](https://docs.aws.amazon.com/redshift/latest/dg/r_SVV_TABLE_INFO.html) table.
+If you are unable to add superuser permissions, please ensure the user has SELECT privilege on [`SVV_TABLE_INFO`](https://docs.aws.amazon.com/redshift/latest/dg/r_SVV_TABLE_INFO.html) table.
 
 ## Capabilities
 
@@ -79,9 +77,11 @@ sink:
 
 ## Config details
 
-Note that a `.` is used to denote nested fields in the YAML recipe.
+Like all SQL-based sources, the Redshift integration supports:
+- Stale Metadata Deletion: See [here](./stateful_ingestion.md) for more details on configuration.
+- SQL Profiling: See [here](./sql_profiles.md) for more details on configuration.
 
-As a SQL-based service, the Athena integration is also supported by our SQL profiler. See [here](./sql_profiles.md) for more details on configuration.
+Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                       | Required | Default            | Description                                                                                                                                                                             |
 |-----------------------------| -------- |--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -108,10 +108,6 @@ As a SQL-based service, the Athena integration is also supported by our SQL prof
 | `include_copy_lineage`      |          | `True`             | Whether lineage should be collected from copy commands                                                                                                                                  |
 | `default_schema`            |          | `"public"`         | The default schema to use if the sql parser fails to parse the schema with `sql_based` lineage collector                                                                               |
 
-## Compatibility
-
-Coming soon!
-
 ## Lineage
 
 There are multiple lineage collector implementations as Redshift does not support table lineage out of the box.
@@ -154,7 +150,8 @@ Cons:
 # Note
 - The redshift stl redshift tables which are used for getting data lineage only retain approximately two to five days of log history. This means you cannot extract lineage from queries issued outside that window.
 
-# Redshift-Usage
+# Redshift Usage
+
 This plugin extracts usage statistics for datasets in Amazon Redshift. For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
 
 Note: Usage information is computed by querying the following system tables - 
@@ -163,10 +160,10 @@ Note: Usage information is computed by querying the following system tables -
 3. stl_query
 4. svl_user_info
 
-##Setup
+## Setup
 To install this plugin, run `pip install 'acryl-datahub[redshift-usage]'`.
 
-##Capabilities
+## Capabilities
 This plugin has the below functionalities -
 1. For a specific dataset this plugin ingests the following statistics - 
    1. top n queries.
@@ -174,7 +171,7 @@ This plugin has the below functionalities -
    3. usage of each column in the dataset.
 2. Aggregation of these statistics into buckets, by day or hour granularity.
 
-## Sample usage recipe
+## Quickstart recipe
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
@@ -197,7 +194,7 @@ sink:
 # sink configs
 ```
 
-### Config details
+## Config details
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
 By default, we extract usage stats for the last day, with the recommendation that this source is executed every day.
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 8c6989bbd548b..50d32fd9c76f3 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -6,6 +6,43 @@ For context on getting started with ingestion, check out our [metadata ingestion
 
 To install this plugin, run `pip install 'acryl-datahub[snowflake]'`.
 
+### Prerequisites
+
+In order to execute this source, your Snowflake user will need to have specific privileges granted to it for reading metadata
+from your warehouse. You can create a DataHub-specific role, assign it the required privileges, and assign it to a new DataHub user 
+by executing the following Snowflake commands from a user with the `ACCOUNTADMIN` role: 
+
+```sql
+create or replace role datahub_role;
+
+// Grant privileges to use and select from your target warehouses / dbs / schemas / tables
+grant operate, usage on warehouse <your-warehouse> to role datahub_role;
+grant usage on <your-database> to role datahub_role;
+grant usage on all schemas in database <your-database> to role datahub_role; 
+grant select on all tables in database <your-database> to role datahub_role; 
+grant select on all external tables in database <your-database> to role datahub_role;
+grant select on all views in database <your-database> to role datahub_role;
+
+// Grant privileges for all future schemas and tables created in a warehouse 
+grant usage on future schemas in database "<your-database>" to role datahub_role;
+grant select on future tables in database "<your-database>" to role datahub_role;
+
+// Create a new DataHub user and assign the DataHub role to it 
+create user datahub_user display_name = 'DataHub' password='' default_role = datahub_role default_warehouse = '<your-warehouse>';
+
+// Grant the datahub_role to the new DataHub user. 
+grant role datahub_role to user datahub_user;
+```
+
+This represents the bare minimum privileges required to extract databases, schemas, views, tables from Snowflake. 
+
+If you plan to enable extraction of table lineage, via the `include_table_lineage` config flag, you'll also need to grant privileges
+to access the Snowflake Account Usage views. You can execute the following using the `ACCOUNTADMIN` role to do so:
+
+```sql
+grant imported privileges on database snowflake to role datahub_role;
+```
+
 ## Capabilities
 
 This plugin extracts the following:
@@ -46,8 +83,11 @@ sink:
 
 ## Config details
 
-Note that a `.` is used to denote nested fields in the YAML recipe.
+Like all SQL-based sources, the Snowflake integration supports:
+- Stale Metadata Deletion: See [here](./stateful_ingestion.md) for more details on configuration.
+- SQL Profiling: See [here](./sql_profiles.md) for more details on configuration.
 
+Note that a `.` is used to denote nested fields in the YAML recipe.
 
 | Field                         | Required | Default                                                                     | Description                                                                                                                                                                             |
 | ----------------------------- | -------- | --------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
@@ -84,15 +124,26 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 
 Table lineage requires Snowflake's [Access History](https://docs.snowflake.com/en/user-guide/access-history.html) feature.
 
-## Snowflake Usage Stats
+# Snowflake Usage Stats
 
 For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
 
-### Setup
+## Setup
 
 To install this plugin, run `pip install 'acryl-datahub[snowflake-usage]'`.
 
-### Capabilities
+### Prerequisites 
+
+In order to execute the snowflake-usage source, your Snowflake user will need to have specific privileges granted to it. Specifically,
+you'll need to grant access to the [Account Usage](https://docs.snowflake.com/en/sql-reference/account-usage.html) system tables, using which the DataHub source extracts information. Assuming
+you've followed the steps outlined above to create a DataHub-specific User & Role, you'll simply need to execute the following commands
+in Snowflake from a user with the `ACCOUNTADMIN` role: 
+
+```sql
+grant imported privileges on database snowflake to role datahub_role;
+```
+
+## Capabilities
 
 This plugin extracts the following:
 
@@ -109,7 +160,7 @@ This source only does usage statistics. To get the tables, views, and schemas in
 
 :::
 
-### Quickstart recipe
+## Quickstart recipe
 
 Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
 
@@ -135,7 +186,9 @@ sink:
   # sink configs
 ```
 
-### Config details
+## Config details
+
+Snowflake integration also supports prevention of redundant reruns for the same data. See [here](./stateful_ingestion.md) for more details on configuration.
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
@@ -156,7 +209,8 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `schema_pattern`  |          |                                                                     | Allow/deny patterns for schema in snowflake dataset names.      |
 | `view_pattern`     |          |                                                                    | Allow/deny patterns for views in snowflake dataset names.       |
 | `table_pattern`     |          |                                                                   | Allow/deny patterns for tables in snowflake dataset names.       |
-### Compatibility
+
+# Compatibility
 
 Coming soon!
 
diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md
index 0640bb0213e15..d22db2c723dac 100644
--- a/metadata-ingestion/source_docs/sql_profiles.md
+++ b/metadata-ingestion/source_docs/sql_profiles.md
@@ -69,30 +69,29 @@ sink:
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                                               | Required | Default                     | Description                                                                          |
-| --------------------------------------------------- | -------- | --------------------------- | ------------------------------------------------------------------------------------ |
-| `profiling.enabled`                                 |          | `False`                     | Whether profiling should be done.                                                    |
-| `profiling.limit`                                   |          |                             | Max number of documents to profile. By default, profiles all documents.              |
-| `profiling.offset`                                  |          |                             | Offset in documents to profile. By default, uses no offset.                          |
+| Field                                               | Required | Default              | Description                                                                          |
+| --------------------------------------------------- | -------- |----------------------| ------------------------------------------------------------------------------------ |
+| `profiling.enabled`                                 |          | `False`              | Whether profiling should be done.                                                    |
+| `profiling.limit`                                   |          |                      | Max number of documents to profile. By default, profiles all documents.              |
+| `profiling.offset`                                  |          |                      | Offset in documents to profile. By default, uses no offset.                          |
 | `profiling.max_workers`                             |          | `5 * os.cpu_count()` | Number of worker threads to use for profiling. Set to 1 to disable.                  |
-| `profiling.query_combiner_enabled`      |          | `True`                       | *This feature is still experimental and can be disabled if it causes issues.* Reduces the total number of queries issued and speeds up profiling by dynamically combining SQL queries where possible. |
-| `profile_pattern.allow`                             |          | `*`                         | List of regex patterns for tables or table columns to profile. Defaults to all.      |
-| `profile_pattern.deny`                              |          |                             | List of regex patterns for tables or table columns to not profile. Defaults to none. |
-| `profile_pattern.ignoreCase`                        |          | `True`                      | Whether to ignore case sensitivity during pattern matching.                          |
-| `profiling.turn_off_expensive_profiling_metrics`      |          | False                       | Whether to turn off expensive profiling or not. This turns off profiling for quantiles, distinct_value_frequencies, histogram & sample_values. This also limits maximum number of fields being profiled to 10.|
-| `profiling.max_number_of_fields_to_profile`           |          | `None`                      | A positive integer that specifies the maximum number of columns to profile for any table. `None` implies all columns. The cost of profiling goes up significantly as the number of columns to profile goes up.|
-| `profiling.profile_table_level_only`                  |          | False                       | Whether to perform profiling at table-level only, or include column-level profiling as well.|
-| `profiling.include_field_null_count`                  |          | `True`                      | Whether to profile for the number of nulls for each column.                          |
-| `profiling.include_field_min_value`                   |          | `True`                      | Whether to profile for the min value of numeric columns.                             |
-| `profiling.include_field_max_value`                   |          | `True`                      | Whether to profile for the max value of numeric columns.                             |
-| `profiling.include_field_mean_value`                  |          | `True`                      | Whether to profile for the mean value of numeric columns.                            |
-| `profiling.include_field_median_value`                |          | `True`                      | Whether to profile for the median value of numeric columns.                          |
-| `profiling.include_field_stddev_value`                |          | `True`                      | Whether to profile for the standard deviation of numeric columns.                    |
-| `profiling.include_field_quantiles`                   |          | `True`                      | Whether to profile for the quantiles of numeric columns.                             |
-| `profiling.include_field_distinct_value_frequencies`  |          | `True`                      | Whether to profile for distinct value frequencies.                                   |
-| `profiling.include_field_histogram`                   |          | `True`                      | Whether to profile for the histogram for numeric fields.                             |
-| `profiling.include_field_sample_values`               |          | `True`                      | Whether to profile for the sample values for all columns.                            |
-
+| `profiling.query_combiner_enabled`      |          | `True`               | *This feature is still experimental and can be disabled if it causes issues.* Reduces the total number of queries issued and speeds up profiling by dynamically combining SQL queries where possible. |
+| `profile_pattern.allow`                             |          | `*`                  | List of regex patterns for tables or table columns to profile. Defaults to all.      |
+| `profile_pattern.deny`                              |          |                      | List of regex patterns for tables or table columns to not profile. Defaults to none. |
+| `profile_pattern.ignoreCase`                        |          | `True`               | Whether to ignore case sensitivity during pattern matching.                          |
+| `profiling.turn_off_expensive_profiling_metrics`      |          | False                | Whether to turn off expensive profiling or not. This turns off profiling for quantiles, distinct_value_frequencies, histogram & sample_values. This also limits maximum number of fields being profiled to 10.|
+| `profiling.max_number_of_fields_to_profile`           |          | `None`               | A positive integer that specifies the maximum number of columns to profile for any table. `None` implies all columns. The cost of profiling goes up significantly as the number of columns to profile goes up.|
+| `profiling.profile_table_level_only`                  |          | False                | Whether to perform profiling at table-level only, or include column-level profiling as well.|
+| `profiling.include_field_null_count`                  |          | `True`               | Whether to profile for the number of nulls for each column.                          |
+| `profiling.include_field_min_value`                   |          | `True`               | Whether to profile for the min value of numeric columns.                             |
+| `profiling.include_field_max_value`                   |          | `True`               | Whether to profile for the max value of numeric columns.                             |
+| `profiling.include_field_mean_value`                  |          | `True`               | Whether to profile for the mean value of numeric columns.                            |
+| `profiling.include_field_median_value`                |          | `True`               | Whether to profile for the median value of numeric columns.                          |
+| `profiling.include_field_stddev_value`                |          | `True`               | Whether to profile for the standard deviation of numeric columns.                    |
+| `profiling.include_field_quantiles`                   |          | `False`              | Whether to profile for the quantiles of numeric columns.                             |
+| `profiling.include_field_distinct_value_frequencies`  |          | `False`              | Whether to profile for distinct value frequencies.                                   |
+| `profiling.include_field_histogram`                   |          | `False`              | Whether to profile for the histogram for numeric fields.                             |
+| `profiling.include_field_sample_values`               |          | `True`               | Whether to profile for the sample values for all columns.                            |
 ## Compatibility
 
 Coming soon!
diff --git a/metadata-ingestion/source_docs/stateful_ingestion.md b/metadata-ingestion/source_docs/stateful_ingestion.md
new file mode 100644
index 0000000000000..871eaf6099d09
--- /dev/null
+++ b/metadata-ingestion/source_docs/stateful_ingestion.md
@@ -0,0 +1,151 @@
+# Stateful Ingestion
+The stateful ingestion feature enables sources to be configured to save custom checkpoint states from their
+runs, and query these states back from subsequent runs to make decisions about the current run based on the state saved 
+from the previous run(s) using a supported ingestion state provider. This is an explicit opt-in feature and is not enabled
+by default.
+
+**_NOTE_**: This feature requires the server to be `statefulIngestion` capable. This is a feature of metadata service with version >= `0.8.20`. 
+
+To check if you are running a stateful ingestion capable server:
+```console
+curl http://<datahub-gms-endpoint>/config
+
+{
+models: { },
+statefulIngestionCapable: true, # <-- this should be present and true
+retention: "true",
+noCode: "true"
+}
+```
+
+## Config details
+
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                                                        | Required | Default                                                                                                          | Description                                                                                                                                                 |
+|--------------------------------------------------------------| -------- |------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `source.config.stateful_ingestion.enabled`                   |          | False                                                                                                            | The type of the ingestion state provider registered with datahub.                                                                                           |
+| `source.conifg.stateful_ingestion.ignore_old_state`          |          | False                                                                                                            | If set to True, ignores the previous checkpoint state.                                                                                                      | 
+| `source.conifg.stateful_ingestion.ignore_new_state`          |          | False                                                                                                            | If set to True, ignores the current checkpoint state.                                                                                                       | 
+| `source.conifg.stateful_ingestion.max_checkpoint_state_size` |          | 2^24 (16MB)                                                                                                      | The maximum size of the checkpoint state in bytes.                                                                                                          | 
+| `source.conifg.stateful_ingestion.state_provider`            |          | The default [datahub ingestion state provider](#datahub-ingestion-state-provider) configuration. | The ingestion state provider configuration.                                                                                                                 | 
+| `pipeline_name`                                              |    ✅    |                                                                                                                  | The name of the ingestion pipeline the checkpoint states of various source connector job runs are saved/retrieved against via the ingestion state provider. | 
+
+NOTE: If either `dry-run` or `preview` mode are set, stateful ingestion will be turned off regardless of the rest of the configuration.
+## Use-cases powered by stateful ingestion.
+Following is the list of current use-cases powered by stateful ingestion in datahub.
+### Removal of stale tables and views.
+Stateful ingestion can be used to automatically soft delete the tables and views that are seen in a previous run
+but absent in the current run (they are either deleted or no longer desired).
+#### Supported sources
+* All sql based sources.
+#### Additional config details
+
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                                      | Required | Default | Description                                                                                                                                  |
+|--------------------------------------------| -------- |---------|----------------------------------------------------------------------------------------------------------------------------------------------|
+| `stateful_ingestion.remove_stale_metadata` |          | True    | Soft-deletes the tables and views that were found in the last successful run but missing in the current run with stateful_ingestion enabled. |
+#### Sample configuration
+```yaml
+source:
+  type: "snowflake"
+  config:
+    username: <user_name>
+    password: <password>
+    host_port: <host_port>
+    warehouse: <ware_house>
+    role: <role>
+    include_tables: True
+    include_views: True
+    # Rest of the source specific params ...
+    ## Stateful Ingestion config ##
+    stateful_ingestion:
+        enabled: True # False by default
+        remove_stale_metadata: True # default value
+        ## Default state_provider configuration ##
+        # state_provider:
+            # type: "datahub" # default value
+            # This section is needed if the pipeline-level `datahub_api` is not configured.
+            # config:  # default value
+            #    datahub_api: 
+            #        server: "http://localhost:8080"
+
+# The pipeline_name is mandatory for stateful ingestion and the state is tied to this.
+# If this is changed after using with stateful ingestion, the previous state will not be available to the next run.
+pipeline_name: "my_snowflake_pipeline_1"
+
+# Pipeline-level datahub_api configuration.
+datahub_api: # Optional. But if provided, this config will be used by the "datahub" ingestion state provider.
+    server: "http://localhost:8080"
+    
+sink:
+  type: "datahub-rest"
+  config:
+    server: 'http://localhost:8080'
+```
+
+### Prevent redundant reruns for usage source.
+Typically, the usage runs are configured to fetch the usage data for the previous day(or hour) for each run. Once a usage
+run has finished, subsequent runs until the following day would be fetching the same usage data. With stateful ingestion,
+the redundant fetches can be avoided even if the ingestion job is scheduled to run more frequently than the granularity of
+usage ingestion.
+#### Supported sources
+* Snowflake Usage source.
+#### Additional config details
+
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                            | Required | Default | Description                                                                                                                               |
+|----------------------------------| -------- |---------|-------------------------------------------------------------------------------------------------------------------------------------------|
+| `stateful_ingestion.force_rerun` |          | False   | Custom-alias for `stateful_ingestion.ignore_old_state`. Prevents a rerun for the same time window if there was a previous successful run. |
+#### Sample Configuration
+```yaml
+source:
+  type: "snowflake-usage"
+  config:
+    username: <user_name>
+    password: <password>
+    role: <role>
+    host_port: <host_port>
+    warehouse: <ware_house>
+    # Rest of the source specific params ...
+    ## Stateful Ingestion config ##
+    stateful_ingestion:
+        enabled: True # default is false
+        force_rerun: False # Specific to this source(alias for ignore_old_state), used to override default behavior if True.
+
+# The pipeline_name is mandatory for stateful ingestion and the state is tied to this.
+# If this is changed after using with stateful ingestion, the previous state will not be available to the next run.
+pipeline_name: "my_snowflake_usage_ingestion_pipeline_1"
+sink:
+  type: "datahub-rest"
+  config:
+    server: 'http://localhost:8080'
+```
+
+## The Ingestion State Provider
+The ingestion state provider is responsible for saving and retrieving the ingestion state associated with the ingestion runs
+of various jobs inside the source connector of the ingestion pipeline. An ingestion state provider needs to implement the
+[IngestionStateProvider](https://github.com/linkedin/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/api/ingestion_state_provider.py) interface and
+register itself with datahub by adding an entry under `datahub.ingestion.state_provider.plugins` key of the entry_points section in [setup.py](https://github.com/linkedin/datahub/blob/master/metadata-ingestion/setup.py) with its type and implementation class as shown below.
+```python
+entry_points = {
+    # <snip other keys>"
+    "datahub.ingestion.state_provider.plugins": [
+        "datahub = datahub.ingestion.source.state_provider.datahub_ingestion_state_provider:DatahubIngestionStateProvider",
+    ]
+}
+```
+
+### Datahub Ingestion State Provider
+This is the state provider implementation that is avialble out of the box. It's type is `datahub` and it is implemented on top
+of the `datahub_api` client and the timeseries aspect capabilities of the datahub-backend.
+#### Config details
+
+Note that a `.` is used to denote nested fields in the YAML recipe.
+
+| Field                                                    | Required | Default                                                                                                                                                                                                                                 | Description                                                      |
+|----------------------------------------------------------| -------- |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------|
+| `state_provider.type`   |          | `datahub`                                                                                                                                                                                                                               | The type of the ingestion state provider registered with datahub |
+| `state_provider.config` |          | The `datahub_api` config if set at pipeline level. Otherwise, the default `DatahubClientConfig`. See the [defaults](https://github.com/linkedin/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/graph/client.py#L19) here. | The configuration required for initializing the state provider.  |
diff --git a/metadata-ingestion/src/datahub/ingestion/api/common.py b/metadata-ingestion/src/datahub/ingestion/api/common.py
index 18c10be0b52d7..22c86330fc7e5 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/common.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/common.py
@@ -28,11 +28,16 @@ def get_metadata(self) -> dict:
 
 
 class PipelineContext:
-    run_id: str
-    graph: Optional[DataHubGraph]
-
     def __init__(
-        self, run_id: str, datahub_api: Optional[DatahubClientConfig] = None
+        self,
+        run_id: str,
+        datahub_api: Optional[DatahubClientConfig] = None,
+        pipeline_name: Optional[str] = None,
+        dry_run: bool = False,
+        preview_mode: bool = False,
     ) -> None:
         self.run_id = run_id
         self.graph = DataHubGraph(datahub_api) if datahub_api is not None else None
+        self.pipeline_name = pipeline_name
+        self.dry_run_mode = dry_run
+        self.preview_mode = preview_mode
diff --git a/metadata-ingestion/src/datahub/ingestion/api/ingestion_state_provider.py b/metadata-ingestion/src/datahub/ingestion/api/ingestion_state_provider.py
new file mode 100644
index 0000000000000..8c3017e32e71c
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/api/ingestion_state_provider.py
@@ -0,0 +1,31 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict, NewType, Optional
+
+from datahub.ingestion.api.common import PipelineContext
+from datahub.metadata.schema_classes import DatahubIngestionCheckpointClass
+
+JobId = NewType("JobId", str)
+
+
+class IngestionStateProvider(ABC):
+    """Abstract base class for all ingestion state providers."""
+
+    @classmethod
+    @abstractmethod
+    def create(
+        cls, config_dict: Dict[str, Any], ctx: PipelineContext
+    ) -> "IngestionStateProvider":
+        pass
+
+    def get_latest_checkpoint(
+        self,
+        pipeline_name: str,
+        platform_instance_id: str,
+        job_name: JobId,
+    ) -> Optional[DatahubIngestionCheckpointClass]:
+        pass
+
+    def commit_checkpoints(
+        self, job_checkpoints: Dict[JobId, DatahubIngestionCheckpointClass]
+    ) -> None:
+        pass
diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index ec03b0f0779f3..0fe0240f764f0 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -5,6 +5,7 @@
 from typing import Any, Dict, List, Optional, Type, TypeVar
 
 from avrogen.dict_wrapper import DictWrapper
+from requests.adapters import Response
 from requests.models import HTTPError
 
 from datahub.configuration.common import ConfigModel, OperationalError
@@ -13,6 +14,7 @@
 
 # This bound isn't tight, but it's better than nothing.
 Aspect = TypeVar("Aspect", bound=DictWrapper)
+
 logger = logging.getLogger(__name__)
 
 
@@ -57,6 +59,30 @@ def _get_generic(self, url: str) -> Dict:
                     "Unable to get metadata from DataHub", {"message": str(e)}
                 ) from e
 
+    def _post_generic(self, url: str, payload_dict: Dict) -> Dict:
+        payload = json.dumps(payload_dict)
+        logger.debug(payload)
+        try:
+            response: Response = self._session.post(url, payload)
+            response.raise_for_status()
+            return response.json()
+        except HTTPError as e:
+            try:
+                info = response.json()
+                raise OperationalError(
+                    "Unable to get metadata from DataHub", info
+                ) from e
+            except JSONDecodeError:
+                # If we can't parse the JSON, just raise the original error.
+                raise OperationalError(
+                    "Unable to get metadata from DataHub", {"message": str(e)}
+                ) from e
+
+    @staticmethod
+    def _guess_entity_type(urn: str) -> str:
+        assert urn.startswith("urn:li:"), "urns must start with urn:li:"
+        return urn.split(":")[2]
+
     def get_aspect(
         self,
         entity_urn: str,
@@ -79,6 +105,9 @@ def get_aspect(
                 f"Failed to find {aspect_type_name} in response {response_json}"
             )
 
+    def get_config(self) -> Dict[str, Any]:
+        return self._get_generic(f"{self.config.server}/config")
+
     def get_ownership(self, entity_urn: str) -> Optional[OwnershipClass]:
         return self.get_aspect(
             entity_urn=entity_urn,
@@ -146,3 +175,35 @@ def list_all_entity_urns(
         except Exception as e:
             logger.error("Error while fetching entity urns.", e)
             return None
+
+    def get_latest_timeseries_value(
+        self,
+        entity_urn: str,
+        aspect_name: str,
+        aspect_type: Type[Aspect],
+        filter_criteria_map: Dict[str, str],
+    ) -> Optional[Aspect]:
+        filter_criteria = [
+            {"field": k, "value": v, "condition": "EQUAL"}
+            for k, v in filter_criteria_map.items()
+        ]
+        query_body = {
+            "urn": entity_urn,
+            "entity": self._guess_entity_type(entity_urn),
+            "aspect": aspect_name,
+            "latestValue": True,
+            "filter": {"or": [{"and": filter_criteria}]},
+        }
+        end_point = f"{self.config.server}/aspects?action=getTimeseriesAspectValues"
+        resp: Dict = self._post_generic(end_point, query_body)
+        values: list = resp.get("value", {}).get("values")
+        if values:
+            assert len(values) == 1
+            aspect_json: str = values[0].get("aspect", {}).get("value")
+            if aspect_json:
+                return aspect_type.from_obj(json.loads(aspect_json), tuples=False)
+            else:
+                raise OperationalError(
+                    f"Failed to find {aspect_type} in response {aspect_json}"
+                )
+        return None
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index 213cd09a965ef..572aa37646f4f 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -39,6 +39,7 @@ class PipelineConfig(ConfigModel):
     transformers: Optional[List[DynamicTypedConfig]]
     run_id: str = "__DEFAULT_RUN_ID"
     datahub_api: Optional[DatahubClientConfig] = None
+    pipeline_name: Optional[str] = None
 
     @validator("run_id", pre=True, always=True)
     def run_id_should_be_semantic(
@@ -101,7 +102,11 @@ def __init__(
         self.dry_run = dry_run
         self.preview_mode = preview_mode
         self.ctx = PipelineContext(
-            run_id=self.config.run_id, datahub_api=self.config.datahub_api
+            run_id=self.config.run_id,
+            datahub_api=self.config.datahub_api,
+            pipeline_name=self.config.pipeline_name,
+            dry_run=dry_run,
+            preview_mode=preview_mode,
         )
 
         source_type = self.config.source.type
@@ -161,9 +166,18 @@ def run(self) -> None:
             extractor.close()
             if not self.dry_run:
                 self.sink.handle_work_unit_end(wu)
-        self.source.close()
         self.sink.close()
 
+        # Temporary hack to prevent committing state if there are failures during the pipeline run.
+        try:
+            self.raise_from_status()
+        except Exception:
+            logger.warning(
+                "Pipeline failed. Not closing the source to prevent bad commits."
+            )
+        else:
+            self.source.close()
+
     def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]:
         """
         Transforms the given sequence of records by passing the records through the transformers
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt.py b/metadata-ingestion/src/datahub/ingestion/source/dbt.py
index d1558681445d7..e5c4ec3ed5a68 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt.py
@@ -1,9 +1,11 @@
 import json
 import logging
+import re
 from dataclasses import dataclass, field
 from typing import Any, Dict, Iterable, List, Optional, Tuple
 
 import dateutil.parser
+import requests
 from pydantic import validator
 
 from datahub.configuration import ConfigModel
@@ -264,6 +266,14 @@ def extract_dbt_entities(
     return dbt_entities
 
 
+def load_file_as_json(uri: str) -> Any:
+    if re.match("^https?://", uri):
+        return json.loads(requests.get(uri).text)
+    else:
+        with open(uri, "r") as f:
+            return json.load(f)
+
+
 def loadManifestAndCatalog(
     manifest_path: str,
     catalog_path: str,
@@ -282,16 +292,13 @@ def loadManifestAndCatalog(
     Optional[str],
     Dict[str, Dict[str, Any]],
 ]:
-    with open(manifest_path, "r") as manifest:
-        dbt_manifest_json = json.load(manifest)
+    dbt_manifest_json = load_file_as_json(manifest_path)
 
-    with open(catalog_path, "r") as catalog:
-        dbt_catalog_json = json.load(catalog)
+    dbt_catalog_json = load_file_as_json(catalog_path)
 
     if sources_path is not None:
-        with open(sources_path, "r") as sources:
-            dbt_sources_json = json.load(sources)
-            sources_results = dbt_sources_json["results"]
+        dbt_sources_json = load_file_as_json(sources_path)
+        sources_results = dbt_sources_json["results"]
     else:
         sources_results = {}
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
index d3b3c6fe90f5e..fffc60cad6dac 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
@@ -112,9 +112,9 @@ class GEProfilingConfig(ConfigModel):
     include_field_mean_value: bool = True
     include_field_median_value: bool = True
     include_field_stddev_value: bool = True
-    include_field_quantiles: bool = True
-    include_field_distinct_value_frequencies: bool = True
-    include_field_histogram: bool = True
+    include_field_quantiles: bool = False
+    include_field_distinct_value_frequencies: bool = False
+    include_field_histogram: bool = False
     include_field_sample_values: bool = True
 
     allow_deny_patterns: AllowDenyPattern = AllowDenyPattern.allow_all()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py
index ed5efd0381d6d..c13192cf840c7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mode.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py
@@ -1,12 +1,15 @@
 import re
+import time
 from functools import lru_cache
 from typing import Dict, Iterable, Optional, Tuple, Union
 
 import dateutil.parser as dp
 import requests
+import tenacity
 from pydantic import validator
 from requests.models import HTTPBasicAuth, HTTPError
 from sqllineage.runner import LineageRunner
+from tenacity import retry_if_exception_type, stop_after_attempt, wait_exponential
 
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import ConfigModel
@@ -36,6 +39,12 @@
 from datahub.utilities import config_clean
 
 
+class ModeAPIConfig(ConfigModel):
+    retry_backoff_multiplier: Union[int, float] = 2
+    max_retry_interval: Union[int, float] = 10
+    max_attempts: int = 5
+
+
 class ModeConfig(ConfigModel):
     # See https://mode.com/developer/api-reference/authentication/
     # for authentication
@@ -45,6 +54,7 @@ class ModeConfig(ConfigModel):
     workspace: Optional[str] = None
     default_schema: str = "public"
     owner_username_instead_of_email: Optional[bool] = True
+    api_options: ModeAPIConfig = ModeAPIConfig()
     env: str = builder.DEFAULT_ENV
 
     @validator("connect_uri")
@@ -52,6 +62,10 @@ def remove_trailing_slash(cls, v):
         return config_clean.remove_trailing_slashes(v)
 
 
+class HTTPError429(HTTPError):
+    pass
+
+
 class ModeSource(Source):
     config: ModeConfig
     report: SourceReport
@@ -76,8 +90,7 @@ def __init__(self, ctx: PipelineContext, config: ModeConfig):
 
         # Test the connection
         try:
-            test_response = self.session.get(f"{self.config.connect_uri}/api/account")
-            test_response.raise_for_status()
+            self._get_request_json(f"{self.config.connect_uri}/api/account")
         except HTTPError as http_error:
             self.report.report_failure(
                 key="mode-session",
@@ -100,23 +113,26 @@ def construct_dashboard(
             urn=dashboard_urn,
             aspects=[],
         )
-        modified_actor = builder.make_user_urn(
-            self._get_creator(
-                report_info.get("_links", {}).get("creator", {}).get("href", "")
-            )
-        )
-        modified_ts = int(
-            dp.parse(f"{report_info.get('last_saved_at', 'now')}").timestamp() * 1000
-        )
-        created_ts = int(
-            dp.parse(f"{report_info.get('created_at', 'now')}").timestamp() * 1000
-        )
-        title = report_info.get("name", "") or ""
-        description = report_info.get("description", "") or ""
-        last_modified = ChangeAuditStamps(
-            created=AuditStamp(time=created_ts, actor=modified_actor),
-            lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
+
+        last_modified = ChangeAuditStamps()
+        creator = self._get_creator(
+            report_info.get("_links", {}).get("creator", {}).get("href", "")
         )
+        if creator is not None:
+            modified_actor = builder.make_user_urn(creator)
+            modified_ts = int(
+                dp.parse(f"{report_info.get('last_saved_at', 'now')}").timestamp()
+                * 1000
+            )
+            created_ts = int(
+                dp.parse(f"{report_info.get('created_at', 'now')}").timestamp() * 1000
+            )
+            title = report_info.get("name", "") or ""
+            description = report_info.get("description", "") or ""
+            last_modified = ChangeAuditStamps(
+                created=AuditStamp(time=created_ts, actor=modified_actor),
+                lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
+            )
 
         dashboard_info_class = DashboardInfoClass(
             description=description,
@@ -153,8 +169,8 @@ def construct_dashboard(
 
     @lru_cache(maxsize=None)
     def _get_ownership(self, user: str) -> Optional[OwnershipClass]:
-        owner_urn = builder.make_user_urn(user)
-        if owner_urn is not None:
+        if user is not None:
+            owner_urn = builder.make_user_urn(user)
             ownership: OwnershipClass = OwnershipClass(
                 owners=[
                     OwnerClass(
@@ -168,13 +184,22 @@ def _get_ownership(self, user: str) -> Optional[OwnershipClass]:
         return None
 
     @lru_cache(maxsize=None)
-    def _get_creator(self, href: str) -> str:
-        user = self.session.get(f"{self.config.connect_uri}{href}")
-        user_json = user.json()
-        if self.config.owner_username_instead_of_email:
-            return user_json.get("username", "unknown")
-        else:
-            return user_json.get("email", "unknown")
+    def _get_creator(self, href: str) -> Optional[str]:
+        user = None
+        try:
+            user_json = self._get_request_json(f"{self.config.connect_uri}{href}")
+            user = (
+                user_json.get("username")
+                if self.config.owner_username_instead_of_email
+                else user_json.get("email")
+            )
+        except HTTPError as http_error:
+            self.report.report_failure(
+                key="mode-user",
+                reason=f"Unable to retrieve user for {href}, "
+                f"Reason: {str(http_error)}",
+            )
+        return user
 
     def _get_chart_urns(self, report_token: str) -> list:
         chart_urns = []
@@ -193,9 +218,7 @@ def _get_chart_urns(self, report_token: str) -> list:
     def _get_space_name_and_tokens(self) -> dict:
         space_info = {}
         try:
-            workspace_response = self.session.get(f"{self.workspace_uri}/spaces")
-            workspace_response.raise_for_status()
-            payload = workspace_response.json()
+            payload = self._get_request_json(f"{self.workspace_uri}/spaces")
             spaces = payload.get("_embedded", {}).get("spaces", {})
 
             for s in spaces:
@@ -334,9 +357,19 @@ def _get_datahub_friendly_platform(self, adapter, platform):
     def _get_platform_and_dbname(
         self, data_source_id: int
     ) -> Union[Tuple[str, str], Tuple[None, None]]:
-        ds_response = self.session.get(f"{self.workspace_uri}/data_sources")
-        ds_json = ds_response.json()
-        data_sources = ds_json.get("_embedded", {}).get("data_sources", {})
+
+        data_sources = []
+        try:
+            ds_json = self._get_request_json(f"{self.workspace_uri}/data_sources")
+            data_sources = ds_json.get("_embedded", {}).get("data_sources", [])
+        except HTTPError as http_error:
+            self.report.report_failure(
+                key=f"mode-datasource-{data_source_id}",
+                reason=f"No data sources found for datasource id: "
+                f"{data_source_id}, "
+                f"Reason: {str(http_error)}",
+            )
+
         if not data_sources:
             self.report.report_failure(
                 key=f"mode-datasource-{data_source_id}",
@@ -398,9 +431,9 @@ def _parse_definition_name(self, definition_variable: str) -> Tuple[str, str]:
     @lru_cache(maxsize=None)
     def _get_definition(self, definition_name):
         try:
-            definition_response = self.session.get(f"{self.workspace_uri}/definitions")
-            definition_response.raise_for_status()
-            definition_json = definition_response.json()
+            definition_json = self._get_request_json(
+                f"{self.workspace_uri}/definitions"
+            )
             definitions = definition_json.get("_embedded", {}).get("definitions", [])
             for definition in definitions:
                 if definition.get("name", "") == definition_name:
@@ -458,21 +491,22 @@ def construct_chart_from_api_data(
             aspects=[],
         )
 
-        modified_actor = builder.make_user_urn(
-            self._get_creator(
-                chart_data.get("_links", {}).get("creator", {}).get("href", "")
-            )
-        )
-        created_ts = int(
-            dp.parse(chart_data.get("created_at", "now")).timestamp() * 1000
-        )
-        modified_ts = int(
-            dp.parse(chart_data.get("updated_at", "now")).timestamp() * 1000
-        )
-        last_modified = ChangeAuditStamps(
-            created=AuditStamp(time=created_ts, actor=modified_actor),
-            lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
+        last_modified = ChangeAuditStamps()
+        creator = self._get_creator(
+            chart_data.get("_links", {}).get("creator", {}).get("href", "")
         )
+        if creator is not None:
+            modified_actor = builder.make_user_urn(creator)
+            created_ts = int(
+                dp.parse(chart_data.get("created_at", "now")).timestamp() * 1000
+            )
+            modified_ts = int(
+                dp.parse(chart_data.get("updated_at", "now")).timestamp() * 1000
+            )
+            last_modified = ChangeAuditStamps(
+                created=AuditStamp(time=created_ts, actor=modified_actor),
+                lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
+            )
 
         chart_detail = (
             chart_data.get("view", {})
@@ -538,11 +572,9 @@ def construct_chart_from_api_data(
     def _get_reports(self, space_token: str) -> list:
         reports = []
         try:
-            reports_response = self.session.get(
+            reports_json = self._get_request_json(
                 f"{self.workspace_uri}/spaces/{space_token}/reports"
             )
-            reports_response.raise_for_status()
-            reports_json = reports_response.json()
             reports = reports_json.get("_embedded", {}).get("reports", {})
         except HTTPError as http_error:
             self.report.report_failure(
@@ -556,11 +588,9 @@ def _get_reports(self, space_token: str) -> list:
     def _get_queries(self, report_token: str) -> list:
         queries = []
         try:
-            queries_response = self.session.get(
+            queries_json = self._get_request_json(
                 f"{self.workspace_uri}/reports/{report_token}/queries"
             )
-            queries_response.raise_for_status()
-            queries_json = queries_response.json()
             queries = queries_json.get("_embedded", {}).get("queries", {})
         except HTTPError as http_error:
             self.report.report_failure(
@@ -574,12 +604,10 @@ def _get_queries(self, report_token: str) -> list:
     def _get_charts(self, report_token: str, query_token: str) -> list:
         charts = []
         try:
-            chart_response = self.session.get(
+            charts_json = self._get_request_json(
                 f"{self.workspace_uri}/reports/{report_token}"
                 f"/queries/{query_token}/charts"
             )
-            chart_response.raise_for_status()
-            charts_json = chart_response.json()
             charts = charts_json.get("_embedded", {}).get("charts", {})
         except HTTPError as http_error:
             self.report.report_failure(
@@ -591,6 +619,36 @@ def _get_charts(self, report_token: str, query_token: str) -> list:
             )
         return charts
 
+    def _get_request_json(self, url: str) -> Dict:
+        r = tenacity.Retrying(
+            wait=wait_exponential(
+                multiplier=self.config.api_options.retry_backoff_multiplier,
+                max=self.config.api_options.max_retry_interval,
+            ),
+            retry=retry_if_exception_type(HTTPError429),
+            stop=stop_after_attempt(self.config.api_options.max_attempts),
+        )
+
+        @r.wraps
+        def get_request():
+            try:
+                response = self.session.get(url)
+                response.raise_for_status()
+                return response.json()
+            except HTTPError as http_error:
+                error_response = http_error.response
+                if error_response.status_code == 429:
+                    # respect Retry-After
+                    sleep_time = error_response.headers.get("retry-after")
+                    if sleep_time is not None:
+                        time.sleep(sleep_time)
+                    raise HTTPError429
+
+                raise http_error
+            return {}
+
+        return get_request()
+
     def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]:
         for space_token, space_name in self.space_tokens.items():
             reports = self._get_reports(space_token)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py
index 8d6f5d4e025bc..548add0a16baf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py
@@ -1,6 +1,7 @@
 import collections
 import functools
 import logging
+import textwrap
 from datetime import timedelta
 from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
 from unittest.mock import patch
@@ -9,6 +10,7 @@
 import pybigquery  # noqa: F401
 import pybigquery.sqlalchemy_bigquery
 import pydantic
+from google.cloud.bigquery import Client as BigQueryClient
 from google.cloud.logging_v2.client import Client as GCPLoggingClient
 from sqlalchemy.engine.reflection import Inspector
 
@@ -24,8 +26,10 @@
     register_custom_type,
 )
 from datahub.ingestion.source.usage.bigquery_usage import (
+    BQ_DATE_SHARD_FORMAT,
     BQ_DATETIME_FORMAT,
     AuditLogEntry,
+    BigQueryAuditMetadata,
     BigQueryTableRef,
     QueryEvent,
 )
@@ -68,6 +72,60 @@
 # patch the implementation.
 
 
+def bigquery_audit_metadata_query_template(
+    dataset: str, use_date_sharded_tables: bool
+) -> str:
+    """
+    Receives a dataset (with project specified) and returns a query template that is used to query exported
+    AuditLogs containing protoPayloads of type BigQueryAuditMetadata.
+    :param dataset: the dataset to query against in the form of $PROJECT.$DATASET
+    :param use_date_sharded_tables: whether to read from date sharded audit log tables or time partitioned audit log
+           tables
+    :return: a query template, when supplied start_time and end_time, can be used to query audit logs from BigQuery
+    """
+    query: str
+    if use_date_sharded_tables:
+        query = (
+            f"""
+        SELECT
+            timestamp,
+            logName,
+            insertId,
+            protopayload_auditlog AS protoPayload,
+            protopayload_auditlog.metadataJson AS metadata
+        FROM
+            `{dataset}.cloudaudit_googleapis_com_data_access_*`
+        """
+            + """
+        WHERE
+            _TABLE_SUFFIX BETWEEN "{start_date}" AND "{end_date}" AND
+        """
+        )
+    else:
+        query = f"""
+        SELECT
+            timestamp,
+            logName,
+            insertId,
+            protopayload_auditlog AS protoPayload,
+            protopayload_auditlog.metadataJson AS metadata
+        FROM
+            `{dataset}.cloudaudit_googleapis_com_data_access`
+        WHERE
+        """
+
+    audit_log_filter = """    timestamp >= "{start_time}"
+    AND timestamp < "{end_time}"
+    AND protopayload_auditlog.serviceName="bigquery.googleapis.com"
+    AND JSON_EXTRACT_SCALAR(protopayload_auditlog.metadataJson, "$.jobChange.job.jobStatus.jobState") = "DONE"
+    AND JSON_EXTRACT(protopayload_auditlog.metadataJson, "$.jobChange.job.jobConfig.queryConfig") IS NOT NULL;
+    """
+
+    query = textwrap.dedent(query) + audit_log_filter
+
+    return textwrap.dedent(query)
+
+
 def get_view_definition(self, connection, view_name, schema=None, **kw):
     view = self._get_table(connection, view_name, schema)
     return view.view_query
@@ -92,6 +150,10 @@ class BigQueryConfig(BaseTimeWindowConfig, SQLAlchemyConfig):
     include_table_lineage: Optional[bool] = True
     max_query_duration: timedelta = timedelta(minutes=15)
 
+    bigquery_audit_metadata_datasets: Optional[List[str]] = None
+    use_exported_bigquery_audit_metadata: bool = False
+    use_date_sharded_audit_log_tables: bool = False
+
     def get_sql_alchemy_url(self):
         if self.project_id:
             return f"{self.scheme}://{self.project_id}"
@@ -110,21 +172,50 @@ def __init__(self, config, ctx):
 
     def _compute_big_query_lineage(self) -> None:
         if self.config.include_table_lineage:
-            try:
-                _clients: List[GCPLoggingClient] = self._make_bigquery_client()
-                log_entries: Iterable[AuditLogEntry] = self._get_bigquery_log_entries(
-                    _clients
-                )
-                parsed_entries: Iterable[QueryEvent] = self._parse_bigquery_log_entries(
-                    log_entries
-                )
-                self.lineage_metadata = self._create_lineage_map(parsed_entries)
-            except Exception as e:
-                logger.error(
-                    "Error computing lineage information using GCP logs.",
-                    e,
+            if self.config.use_exported_bigquery_audit_metadata:
+                self._compute_bigquery_lineage_via_exported_bigquery_audit_metadata()
+            else:
+                self._compute_bigquery_lineage_via_gcp_logging()
+
+            if self.lineage_metadata is not None:
+                logger.info(
+                    f"Built lineage map containing {len(self.lineage_metadata)} entries."
                 )
 
+    def _compute_bigquery_lineage_via_gcp_logging(self) -> None:
+        try:
+            _clients: List[GCPLoggingClient] = self._make_bigquery_client()
+            log_entries: Iterable[AuditLogEntry] = self._get_bigquery_log_entries(
+                _clients
+            )
+            parsed_entries: Iterable[QueryEvent] = self._parse_bigquery_log_entries(
+                log_entries
+            )
+            self.lineage_metadata = self._create_lineage_map(parsed_entries)
+        except Exception as e:
+            logger.error(
+                "Error computing lineage information using GCP logs.",
+                e,
+            )
+
+    def _compute_bigquery_lineage_via_exported_bigquery_audit_metadata(self) -> None:
+        try:
+            _client: BigQueryClient = BigQueryClient(project=self.config.project_id)
+            exported_bigquery_audit_metadata: Iterable[
+                BigQueryAuditMetadata
+            ] = self._get_exported_bigquery_audit_metadata(_client)
+            parsed_entries: Iterable[
+                QueryEvent
+            ] = self._parse_exported_bigquery_audit_metadata(
+                exported_bigquery_audit_metadata
+            )
+            self.lineage_metadata = self._create_lineage_map(parsed_entries)
+        except Exception as e:
+            logger.error(
+                "Error computing lineage information using exported GCP audit logs.",
+                e,
+            )
+
     def _make_bigquery_client(self) -> List[GCPLoggingClient]:
         # See https://github.com/googleapis/google-cloud-python/issues/2674 for
         # why we disable gRPC here.
@@ -156,6 +247,53 @@ def _get_bigquery_log_entries(
             )
         logger.debug("finished loading log entries from BigQuery")
 
+    def _get_exported_bigquery_audit_metadata(
+        self, bigquery_client: BigQueryClient
+    ) -> Iterable[BigQueryAuditMetadata]:
+        if self.config.bigquery_audit_metadata_datasets is None:
+            return
+
+        start_time: str = (
+            self.config.start_time - self.config.max_query_duration
+        ).strftime(BQ_DATETIME_FORMAT)
+        end_time: str = (
+            self.config.end_time + self.config.max_query_duration
+        ).strftime(BQ_DATETIME_FORMAT)
+
+        for dataset in self.config.bigquery_audit_metadata_datasets:
+            logger.debug(
+                f"Start loading log entries from BigQueryAuditMetadata in {dataset}"
+            )
+
+            query: str
+            if self.config.use_date_sharded_audit_log_tables:
+                start_date: str = (
+                    self.config.start_time - self.config.max_query_duration
+                ).strftime(BQ_DATE_SHARD_FORMAT)
+                end_date: str = (
+                    self.config.end_time + self.config.max_query_duration
+                ).strftime(BQ_DATE_SHARD_FORMAT)
+
+                query = bigquery_audit_metadata_query_template(
+                    dataset, self.config.use_date_sharded_audit_log_tables
+                ).format(
+                    start_time=start_time,
+                    end_time=end_time,
+                    start_date=start_date,
+                    end_date=end_date,
+                )
+            else:
+                query = bigquery_audit_metadata_query_template(
+                    dataset, self.config.use_date_sharded_audit_log_tables
+                ).format(start_time=start_time, end_time=end_time)
+            query_job = bigquery_client.query(query)
+
+            logger.debug(
+                f"Finished loading log entries from BigQueryAuditMetadata in {dataset}"
+            )
+
+            yield from query_job
+
     # Currently we only parse JobCompleted events but in future we would want to parse other
     # events to also create field level lineage.
     def _parse_bigquery_log_entries(
@@ -177,6 +315,29 @@ def _parse_bigquery_log_entries(
             if event is not None:
                 yield event
 
+    def _parse_exported_bigquery_audit_metadata(
+        self, audit_metadata_rows: Iterable[BigQueryAuditMetadata]
+    ) -> Iterable[QueryEvent]:
+        for audit_metadata in audit_metadata_rows:
+            event: Optional[QueryEvent] = None
+            try:
+                if QueryEvent.can_parse_exported_bigquery_audit_metadata(
+                    audit_metadata
+                ):
+                    event = QueryEvent.from_exported_bigquery_audit_metadata(
+                        audit_metadata
+                    )
+                else:
+                    raise RuntimeError("Unable to parse log entry as QueryEvent.")
+            except Exception as e:
+                self.report.report_failure(
+                    f"""{audit_metadata["logName"]}-{audit_metadata["insertId"]}""",
+                    f"unable to parse log entry: {audit_metadata!r}",
+                )
+                logger.error("Unable to parse GCP log entry.", e)
+            if event is not None:
+                yield event
+
     def _create_lineage_map(self, entries: Iterable[QueryEvent]) -> Dict[str, Set[str]]:
         lineage_map: Dict[str, Set[str]] = collections.defaultdict(set)
         for e in entries:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py
index 93cc1ce281ae2..d9a648cd22105 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py
@@ -53,7 +53,7 @@ class BaseSnowflakeConfig(BaseTimeWindowConfig):
     scheme = "snowflake"
 
     username: Optional[str] = None
-    password: Optional[str] = None
+    password: Optional[pydantic.SecretStr] = pydantic.Field(default=None, exclude=True)
     host_port: str
     warehouse: Optional[str]
     role: Optional[str]
@@ -63,7 +63,7 @@ def get_sql_alchemy_url(self, database=None):
         return make_sqlalchemy_uri(
             self.scheme,
             self.username,
-            self.password,
+            self.password.get_secret_value() if self.password else None,
             self.host_port,
             f'"{database}"' if database is not None else database,
             uri_opts={
@@ -306,3 +306,9 @@ def _is_dataset_allowed(self, dataset_name: Optional[str]) -> bool:
         ):
             return False
         return True
+
+    # Stateful Ingestion specific overrides
+    # NOTE: There is no special state associated with this source yet than what is provided by sql_common.
+    def get_platform_instance_id(self) -> str:
+        """Overrides the source identifier for stateful ingestion."""
+        return self.config.host_port
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index 470e28a8a163f..e91e4d14a05a6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -12,6 +12,7 @@
     Tuple,
     Type,
     Union,
+    cast,
 )
 from urllib.parse import quote_plus
 
@@ -21,12 +22,27 @@
 from sqlalchemy.exc import ProgrammingError
 from sqlalchemy.sql import sqltypes as types
 
-from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.emitter.mce_builder import DEFAULT_ENV
+from datahub.configuration.common import AllowDenyPattern
+from datahub.emitter.mce_builder import (
+    DEFAULT_ENV,
+    make_data_platform_urn,
+    make_dataset_urn,
+)
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.common import PipelineContext
-from datahub.ingestion.api.source import Source, SourceReport
+from datahub.ingestion.api.source import SourceReport
 from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.state.checkpoint import Checkpoint
+from datahub.ingestion.source.state.sql_common_state import (
+    BaseSQLAlchemyCheckpointState,
+)
+from datahub.ingestion.source.state.stateful_ingestion_base import (
+    JobId,
+    StatefulIngestionConfig,
+    StatefulIngestionConfigBase,
+    StatefulIngestionSourceBase,
+)
+from datahub.metadata.com.linkedin.pegasus2avro.common import StatusClass
 from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
 from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
@@ -125,6 +141,7 @@ class SQLSourceReport(SourceReport):
     views_scanned: int = 0
     entities_profiled: int = 0
     filtered: List[str] = field(default_factory=list)
+    soft_deleted_stale_entities: List[str] = field(default_factory=list)
 
     query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None
 
@@ -150,8 +167,21 @@ def report_from_query_combiner(
     ) -> None:
         self.query_combiner = query_combiner_report
 
+    def report_stale_entity_soft_deleted(self, urn: str) -> None:
+        self.soft_deleted_stale_entities.append(urn)
+
+
+class SQLAlchemyStatefulIngestionConfig(StatefulIngestionConfig):
+    """
+    Specialization of basic StatefulIngestionConfig to adding custom config.
+    This will be used to override the stateful_ingestion config param of StatefulIngestionConfigBase
+    in the SQLAlchemyConfig.
+    """
 
-class SQLAlchemyConfig(ConfigModel):
+    remove_stale_metadata: bool = True
+
+
+class SQLAlchemyConfig(StatefulIngestionConfigBase):
     env: str = DEFAULT_ENV
     options: dict = {}
     # Although the 'table_pattern' enables you to skip everything from certain schemas,
@@ -169,6 +199,8 @@ class SQLAlchemyConfig(ConfigModel):
     from datahub.ingestion.source.ge_data_profiler import GEProfilingConfig
 
     profiling: GEProfilingConfig = GEProfilingConfig()
+    # Custom Stateful Ingestion settings
+    stateful_ingestion: Optional[SQLAlchemyStatefulIngestionConfig] = None
 
     @pydantic.root_validator()
     def ensure_profiling_pattern_is_passed_to_profiling(
@@ -300,7 +332,7 @@ def get_schema_metadata(
 
     schema_metadata = SchemaMetadata(
         schemaName=dataset_name,
-        platform=f"urn:li:dataPlatform:{platform}",
+        platform=make_data_platform_urn(platform),
         version=0,
         hash="",
         platformSchema=MySqlDDL(tableSchema=""),
@@ -312,11 +344,11 @@ def get_schema_metadata(
     return schema_metadata
 
 
-class SQLAlchemySource(Source):
+class SQLAlchemySource(StatefulIngestionSourceBase):
     """A Base class for all SQL Sources that use SQLAlchemy to extend"""
 
     def __init__(self, config: SQLAlchemyConfig, ctx: PipelineContext, platform: str):
-        super().__init__(ctx)
+        super(SQLAlchemySource, self).__init__(config, ctx)
         self.config = config
         self.platform = platform
         self.report = SQLSourceReport()
@@ -332,9 +364,101 @@ def get_inspectors(self) -> Iterable[Inspector]:
             inspector = inspect(conn)
             yield inspector
 
+    def is_checkpointing_enabled(self, job_id: JobId) -> bool:
+        if (
+            job_id == self.get_default_ingestion_job_id()
+            and self.is_stateful_ingestion_configured()
+            and self.config.stateful_ingestion
+            and self.config.stateful_ingestion.remove_stale_metadata
+        ):
+            return True
+
+        return False
+
+    def get_default_ingestion_job_id(self) -> JobId:
+        """
+        Default ingestion job name that sql_common provides.
+        Subclasses can override as needed.
+        """
+        return JobId("common_ingest_from_sql_source")
+
+    def create_checkpoint(self, job_id: JobId) -> Optional[Checkpoint]:
+        """
+        Create the custom checkpoint with empty state for the job.
+        """
+        assert self.ctx.pipeline_name is not None
+        if job_id == self.get_default_ingestion_job_id():
+            return Checkpoint(
+                job_name=job_id,
+                pipeline_name=self.ctx.pipeline_name,
+                platform_instance_id=self.get_platform_instance_id(),
+                run_id=self.ctx.run_id,
+                config=self.config,
+                state=BaseSQLAlchemyCheckpointState(),
+            )
+        return None
+
     def get_schema_names(self, inspector):
         return inspector.get_schema_names()
 
+    def get_platform_instance_id(self) -> str:
+        """
+        The source identifier such as the specific source host address required for stateful ingestion.
+        Individual subclasses need to override this method appropriately.
+        """
+        config_dict = self.config.dict()
+        host_port = config_dict.get("host_port", "no_host_port")
+        database = config_dict.get("database", "no_database")
+        return f"{self.platform}_{host_port}_{database}"
+
+    def gen_removed_entity_workunits(self) -> Iterable[MetadataWorkUnit]:
+        last_checkpoint = self.get_last_checkpoint(
+            self.get_default_ingestion_job_id(), BaseSQLAlchemyCheckpointState
+        )
+        cur_checkpoint = self.get_current_checkpoint(
+            self.get_default_ingestion_job_id()
+        )
+        if (
+            self.config.stateful_ingestion
+            and self.config.stateful_ingestion.remove_stale_metadata
+            and last_checkpoint is not None
+            and last_checkpoint.state is not None
+            and cur_checkpoint is not None
+            and cur_checkpoint.state is not None
+        ):
+            logger.debug("Checking for stale entity removal.")
+
+            def soft_delete_dataset(urn: str, type: str) -> Iterable[MetadataWorkUnit]:
+                logger.info(f"Soft-deleting stale entity of type {type} - {urn}.")
+                mcp = MetadataChangeProposalWrapper(
+                    entityType="dataset",
+                    entityUrn=urn,
+                    changeType=ChangeTypeClass.UPSERT,
+                    aspectName="status",
+                    aspect=StatusClass(removed=True),
+                )
+                wu = MetadataWorkUnit(id=f"soft-delete-{type}-{urn}", mcp=mcp)
+                self.report.report_workunit(wu)
+                self.report.report_stale_entity_soft_deleted(urn)
+                yield wu
+
+            last_checkpoint_state = cast(
+                BaseSQLAlchemyCheckpointState, last_checkpoint.state
+            )
+            cur_checkpoint_state = cast(
+                BaseSQLAlchemyCheckpointState, cur_checkpoint.state
+            )
+
+            for table_urn in last_checkpoint_state.get_table_urns_not_in(
+                cur_checkpoint_state
+            ):
+                yield from soft_delete_dataset(table_urn, "table")
+
+            for view_urn in last_checkpoint_state.get_view_urns_not_in(
+                cur_checkpoint_state
+            ):
+                yield from soft_delete_dataset(view_urn, "view")
+
     def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
         sql_config = self.config
         if logger.isEnabledFor(logging.DEBUG):
@@ -373,6 +497,10 @@ def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
             if profiler and profile_requests:
                 yield from self.loop_profiler(profile_requests, profiler)
 
+        if self.is_stateful_ingestion_configured():
+            # Clean up stale entities.
+            yield from self.gen_removed_entity_workunits()
+
     def standardize_schema_table_names(
         self, schema: str, entity: str
     ) -> Tuple[str, str]:
@@ -391,18 +519,31 @@ def get_identifier(
         else:
             return f"{schema}.{entity}"
 
-    def get_foreign_key_metadata(self, datasetUrn, fk_dict, inspector):
+    def get_foreign_key_metadata(
+        self,
+        dataset_urn: str,
+        schema: str,
+        fk_dict: Dict[str, str],
+        inspector: Inspector,
+    ) -> ForeignKeyConstraint:
+        referred_schema: Optional[str] = fk_dict.get("referred_schema")
+
+        if not referred_schema:
+            referred_schema = schema
+
         referred_dataset_name = self.get_identifier(
-            schema=fk_dict["referred_schema"],
+            schema=referred_schema,
             entity=fk_dict["referred_table"],
             inspector=inspector,
         )
 
         source_fields = [
-            f"urn:li:schemaField:({datasetUrn},{f})"
+            f"urn:li:schemaField:({dataset_urn},{f})"
             for f in fk_dict["constrained_columns"]
         ]
-        foreign_dataset = f"urn:li:dataset:(urn:li:dataPlatform:{self.platform},{referred_dataset_name},{self.config.env})"
+        foreign_dataset = make_dataset_urn(
+            self.platform, referred_dataset_name, self.config.env
+        )
         foreign_fields = [
             f"urn:li:schemaField:({foreign_dataset},{f})"
             for f in fk_dict["referred_columns"]
@@ -418,6 +559,7 @@ def loop_tables(
         schema: str,
         sql_config: SQLAlchemyConfig,
     ) -> Iterable[SqlWorkUnit]:
+        tables_seen: Set[str] = set()
         for table in inspector.get_table_names(schema):
             schema, table = self.standardize_schema_table_names(
                 schema=schema, entity=table
@@ -425,12 +567,19 @@ def loop_tables(
             dataset_name = self.get_identifier(
                 schema=schema, entity=table, inspector=inspector
             )
+            if dataset_name not in tables_seen:
+                tables_seen.add(dataset_name)
+            else:
+                logger.debug(f"{dataset_name} has already been seen, skipping...")
+                continue
+
             self.report.report_entity_scanned(dataset_name, ent_type="table")
 
             if not sql_config.table_pattern.allowed(dataset_name):
                 self.report.report_dropped(dataset_name)
                 continue
 
+            columns = []
             try:
                 columns = inspector.get_columns(table, schema)
                 if len(columns) == 0:
@@ -465,11 +614,20 @@ def loop_tables(
                 # The "properties" field is a non-standard addition to SQLAlchemy's interface.
                 properties = table_info.get("properties", {})
 
-            datasetUrn = f"urn:li:dataset:(urn:li:dataPlatform:{self.platform},{dataset_name},{self.config.env})"
+            dataset_urn = make_dataset_urn(self.platform, dataset_name, self.config.env)
             dataset_snapshot = DatasetSnapshot(
-                urn=datasetUrn,
-                aspects=[],
+                urn=dataset_urn,
+                aspects=[StatusClass(removed=False)],
             )
+            if self.is_stateful_ingestion_configured():
+                cur_checkpoint = self.get_current_checkpoint(
+                    self.get_default_ingestion_job_id()
+                )
+                if cur_checkpoint is not None:
+                    checkpoint_state = cast(
+                        BaseSQLAlchemyCheckpointState, cur_checkpoint.state
+                    )
+                    checkpoint_state.add_table_urn(dataset_urn)
 
             if description is not None or properties:
                 dataset_properties = DatasetPropertiesClass(
@@ -481,13 +639,15 @@ def loop_tables(
             pk_constraints: dict = inspector.get_pk_constraint(table, schema)
             try:
                 foreign_keys = [
-                    self.get_foreign_key_metadata(datasetUrn, fk_rec, inspector)
+                    self.get_foreign_key_metadata(
+                        dataset_urn, schema, fk_rec, inspector
+                    )
                     for fk_rec in inspector.get_foreign_keys(table, schema)
                 ]
             except KeyError:
                 # certain databases like MySQL cause issues due to lower-case/upper-case irregularities
                 logger.debug(
-                    f"{datasetUrn}: failure in foreign key extraction... skipping"
+                    f"{dataset_urn}: failure in foreign key extraction... skipping"
                 )
                 foreign_keys = []
 
@@ -604,10 +764,22 @@ def loop_views(
             properties["view_definition"] = view_definition
             properties["is_view"] = "True"
 
+            dataset_urn = make_dataset_urn(self.platform, dataset_name, self.config.env)
             dataset_snapshot = DatasetSnapshot(
-                urn=f"urn:li:dataset:(urn:li:dataPlatform:{self.platform},{dataset_name},{self.config.env})",
-                aspects=[],
+                urn=dataset_urn,
+                aspects=[StatusClass(removed=False)],
             )
+
+            if self.is_stateful_ingestion_configured():
+                cur_checkpoint = self.get_current_checkpoint(
+                    self.get_default_ingestion_job_id()
+                )
+                if cur_checkpoint is not None:
+                    checkpoint_state = cast(
+                        BaseSQLAlchemyCheckpointState, cur_checkpoint.state
+                    )
+                    checkpoint_state.add_view_urn(dataset_urn)
+
             if description is not None or properties:
                 dataset_properties = DatasetPropertiesClass(
                     description=description,
@@ -666,15 +838,17 @@ def loop_profiler(
             if profile is None:
                 continue
             dataset_name = request.pretty_name
+            dataset_urn = make_dataset_urn(self.platform, dataset_name, self.config.env)
             mcp = MetadataChangeProposalWrapper(
                 entityType="dataset",
-                entityUrn=f"urn:li:dataset:(urn:li:dataPlatform:{self.platform},{dataset_name},{self.config.env})",
+                entityUrn=dataset_urn,
                 changeType=ChangeTypeClass.UPSERT,
                 aspectName="datasetProfile",
                 aspect=profile,
             )
             wu = MetadataWorkUnit(id=f"profile-{dataset_name}", mcp=mcp)
             self.report.report_workunit(wu)
+
             yield wu
 
     def prepare_profiler_args(self, schema: str, table: str) -> dict:
@@ -687,4 +861,6 @@ def get_report(self):
         return self.report
 
     def close(self):
-        pass
+        if self.is_stateful_ingestion_configured():
+            # Commit the checkpoints for this run
+            self.commit_checkpoints()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/state/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/checkpoint.py b/metadata-ingestion/src/datahub/ingestion/source/state/checkpoint.py
new file mode 100644
index 0000000000000..b42e96f2311c9
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/checkpoint.py
@@ -0,0 +1,149 @@
+import bz2
+import functools
+import json
+import logging
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Callable, Dict, Optional, Type
+
+import pydantic
+
+from datahub.configuration.common import ConfigModel
+from datahub.metadata.schema_classes import (
+    DatahubIngestionCheckpointClass,
+    IngestionCheckpointStateClass,
+)
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+class CheckpointStateBase(ConfigModel):
+    """
+    Base class for ingestion checkpoint state.
+    NOTE: We use the pydantic based ConfigModel as base here so that
+    we can leverage built-in functionality for including/excluding fields in the
+    serialization along with potential validation for specific sources.
+    """
+
+    version: str = pydantic.Field(default="1.0")
+    serde: str = pydantic.Field(default="utf-8")
+
+    def to_bytes(
+        self,
+        compressor: Callable[[bytes], bytes] = functools.partial(
+            bz2.compress, compresslevel=9
+        ),
+        max_allowed_state_size: int = 2 ** 22,  # 4MB
+    ) -> bytes:
+        """
+        NOTE: Binary compression cannot be turned on yet as the current MCPs encode the GeneralizedAspect
+        payload using Json encoding which does not support bytes type data. For V1, we go with the utf-8 encoding.
+        This also means that double serialization also is not possible to encode version and serde separate from the
+        binary state payload. Binary content-type needs to be supported for encoding the GenericAspect to do this.
+        """
+
+        json_str_self = self.json(exclude={"version", "serde"})
+        encoded_bytes = json_str_self.encode("utf-8")
+        if len(encoded_bytes) > max_allowed_state_size:
+            raise ValueError(
+                f"The state size has exceeded the max_allowed_state_size of {max_allowed_state_size}"
+            )
+
+        return encoded_bytes
+
+    @staticmethod
+    def from_bytes_to_dict(
+        data_bytes: bytes, decompressor: Callable[[bytes], bytes] = bz2.decompress
+    ) -> Dict[str, Any]:
+        """Helper method for sub-classes to use."""
+        # uncompressed_data: bytes = decompressor(data_bytes)
+        # json_str = uncompressed_data.decode('utf-8')
+        json_str = data_bytes.decode("utf-8")
+        return json.loads(json_str)
+
+
+@dataclass
+class Checkpoint:
+    """
+    Ingestion Run Checkpoint class. This is a more convenient abstraction for use in the python ingestion code,
+    providing a strongly typed state object vs the opaque blob in the PDL, and the config persisted as the first-class
+    ConfigModel object.
+    """
+
+    job_name: str
+    pipeline_name: str
+    platform_instance_id: str
+    run_id: str
+    config: ConfigModel
+    state: CheckpointStateBase
+
+    @classmethod
+    def create_from_checkpoint_aspect(
+        cls,
+        job_name: str,
+        checkpoint_aspect: Optional[DatahubIngestionCheckpointClass],
+        config_class: Type[ConfigModel],
+        state_class: Type[CheckpointStateBase],
+    ) -> Optional["Checkpoint"]:
+        if checkpoint_aspect is None:
+            return None
+        try:
+            # Construct the config
+            config_as_dict = json.loads(checkpoint_aspect.config)
+            config_obj = config_class.parse_obj(config_as_dict)
+
+            # Construct the state
+            state_as_dict = (
+                CheckpointStateBase.from_bytes_to_dict(checkpoint_aspect.state.payload)
+                if checkpoint_aspect.state.payload is not None
+                else {}
+            )
+            state_as_dict["version"] = checkpoint_aspect.state.formatVersion
+            state_as_dict["serde"] = checkpoint_aspect.state.serde
+            state_obj = state_class.parse_obj(state_as_dict)
+        except Exception as e:
+            logger.error(
+                "Failed to construct checkpoint class from checkpoint aspect.", e
+            )
+        else:
+            # Construct the deserialized Checkpoint object from the raw aspect.
+            checkpoint = cls(
+                job_name=job_name,
+                pipeline_name=checkpoint_aspect.pipelineName,
+                platform_instance_id=checkpoint_aspect.platformInstanceId,
+                run_id=checkpoint_aspect.runId,
+                config=config_obj,
+                state=state_obj,
+            )
+            logger.info(
+                f"Successfully constructed last checkpoint state for job {job_name}"
+            )
+            return checkpoint
+        return None
+
+    def to_checkpoint_aspect(
+        self, max_allowed_state_size: int
+    ) -> Optional[DatahubIngestionCheckpointClass]:
+        try:
+            checkpoint_state = IngestionCheckpointStateClass(
+                formatVersion=self.state.version,
+                serde=self.state.serde,
+                payload=self.state.to_bytes(
+                    max_allowed_state_size=max_allowed_state_size
+                ),
+            )
+            checkpoint_aspect = DatahubIngestionCheckpointClass(
+                timestampMillis=int(datetime.utcnow().timestamp() * 1000),
+                pipelineName=self.pipeline_name,
+                platformInstanceId=self.platform_instance_id,
+                runId=self.run_id,
+                config=self.config.json(),
+                state=checkpoint_state,
+            )
+            return checkpoint_aspect
+        except Exception as e:
+            logger.error(
+                "Failed to construct the checkpoint aspect from checkpoint object", e
+            )
+
+        return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/sql_common_state.py b/metadata-ingestion/src/datahub/ingestion/source/state/sql_common_state.py
new file mode 100644
index 0000000000000..02604f0bce6f7
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/sql_common_state.py
@@ -0,0 +1,61 @@
+from typing import Iterable, List
+
+import pydantic
+
+from datahub.emitter.mce_builder import dataset_urn_to_key, make_dataset_urn
+from datahub.ingestion.source.state.checkpoint import CheckpointStateBase
+
+
+class BaseSQLAlchemyCheckpointState(CheckpointStateBase):
+    """
+    Base class for representing the checkpoint state for all SQLAlchemy based sources.
+    Stores all tables and views being ingested and is used to remove any stale entities.
+    Subclasses can define additional state as appropriate.
+    """
+
+    encoded_table_urns: List[str] = pydantic.Field(default_factory=list)
+    encoded_view_urns: List[str] = pydantic.Field(default_factory=list)
+
+    @staticmethod
+    def _get_separator() -> str:
+        # Unique small string not allowed in URNs.
+        return "||"
+
+    @staticmethod
+    def _get_lightweight_repr(dataset_urn: str) -> str:
+        """Reduces the amount of text in the URNs for smaller state footprint."""
+        SEP = BaseSQLAlchemyCheckpointState._get_separator()
+        key = dataset_urn_to_key(dataset_urn)
+        assert key is not None
+        return f"{key.platform}{SEP}{key.name}{SEP}{key.origin}"
+
+    @staticmethod
+    def _get_urns_not_in(
+        encoded_urns_1: List[str], encoded_urns_2: List[str]
+    ) -> Iterable[str]:
+        difference = set(encoded_urns_1) - set(encoded_urns_2)
+        for encoded_urn in difference:
+            platform, name, env = encoded_urn.split(
+                BaseSQLAlchemyCheckpointState._get_separator()
+            )
+            yield make_dataset_urn(platform, name, env)
+
+    def get_table_urns_not_in(
+        self, checkpoint: "BaseSQLAlchemyCheckpointState"
+    ) -> Iterable[str]:
+        yield from self._get_urns_not_in(
+            self.encoded_table_urns, checkpoint.encoded_table_urns
+        )
+
+    def get_view_urns_not_in(
+        self, checkpoint: "BaseSQLAlchemyCheckpointState"
+    ) -> Iterable[str]:
+        yield from self._get_urns_not_in(
+            checkpoint.encoded_view_urns, self.encoded_view_urns
+        )
+
+    def add_table_urn(self, table_urn: str) -> None:
+        self.encoded_table_urns.append(self._get_lightweight_repr(table_urn))
+
+    def add_view_urn(self, view_urn: str) -> None:
+        self.encoded_view_urns.append(self._get_lightweight_repr(view_urn))
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py
new file mode 100644
index 0000000000000..04d0b0d0dba47
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py
@@ -0,0 +1,214 @@
+import logging
+from typing import Any, Dict, Optional, Type
+
+import pydantic
+
+from datahub.configuration.common import (
+    ConfigModel,
+    ConfigurationError,
+    DynamicTypedConfig,
+)
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.ingestion_state_provider import IngestionStateProvider, JobId
+from datahub.ingestion.api.source import Source
+from datahub.ingestion.source.state.checkpoint import Checkpoint, CheckpointStateBase
+from datahub.ingestion.source.state_provider.datahub_ingestion_state_provider import (
+    DatahubIngestionStateProviderConfig,
+)
+from datahub.ingestion.source.state_provider.state_provider_registry import (
+    ingestion_state_provider_registry,
+)
+from datahub.metadata.schema_classes import DatahubIngestionCheckpointClass
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+class StatefulIngestionConfig(ConfigModel):
+    """
+    Basic Stateful Ingestion Specific Configuration for any source.
+    """
+
+    enabled: bool = False
+    max_checkpoint_state_size: int = 2 ** 24  # 16MB
+    state_provider: Optional[DynamicTypedConfig] = DynamicTypedConfig(
+        type="datahub", config=DatahubIngestionStateProviderConfig()
+    )
+    ignore_old_state: bool = False
+    ignore_new_state: bool = False
+
+    @pydantic.root_validator()
+    def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        if values.get("enabled"):
+            if values.get("state_provider") is None:
+                raise ConfigurationError(
+                    "Must specify state_provider configuration if stateful ingestion is enabled."
+                )
+        return values
+
+
+class StatefulIngestionConfigBase(ConfigModel):
+    """
+    Base configuration class for stateful ingestion for source configs to inherit from.
+    """
+
+    stateful_ingestion: Optional[StatefulIngestionConfig] = None
+
+
+class StatefulIngestionSourceBase(Source):
+    """
+    Defines the base class for all stateful sources.
+    """
+
+    def __init__(
+        self, config: StatefulIngestionConfigBase, ctx: PipelineContext
+    ) -> None:
+        super().__init__(ctx)
+        self.stateful_ingestion_config = config.stateful_ingestion
+        self.source_config_type = type(config)
+        self.last_checkpoints: Dict[JobId, Optional[Checkpoint]] = {}
+        self.cur_checkpoints: Dict[JobId, Optional[Checkpoint]] = {}
+        self._initialize_state_provider()
+
+    def _initialize_state_provider(self) -> None:
+        self.ingestion_state_provider: Optional[IngestionStateProvider] = None
+        if (
+            self.stateful_ingestion_config is not None
+            and self.stateful_ingestion_config.state_provider is not None
+            and self.stateful_ingestion_config.enabled
+        ):
+            if self.ctx.pipeline_name is None:
+                raise ConfigurationError(
+                    "pipeline_name must be provided if stateful ingestion is enabled."
+                )
+            state_provider_class = ingestion_state_provider_registry.get(
+                self.stateful_ingestion_config.state_provider.type
+            )
+            self.ingestion_state_provider = state_provider_class.create(
+                self.stateful_ingestion_config.state_provider.dict().get("config", {}),
+                self.ctx,
+            )
+            if self.stateful_ingestion_config.ignore_old_state:
+                logger.warning(
+                    "The 'ignore_old_state' config is True. The old checkpoint state will not be provided."
+                )
+            if self.stateful_ingestion_config.ignore_new_state:
+                logger.warning(
+                    "The 'ignore_new_state' config is True. The new checkpoint state will not be created."
+                )
+
+            logger.debug(
+                f"Successfully created {self.stateful_ingestion_config.state_provider.type} state provider."
+            )
+
+    def is_stateful_ingestion_configured(self) -> bool:
+        if (
+            self.stateful_ingestion_config is not None
+            and self.stateful_ingestion_config.enabled
+            and self.ingestion_state_provider is not None
+        ):
+            return True
+        return False
+
+    # Basic methods that sub-classes must implement
+    def create_checkpoint(self, job_id: JobId) -> Optional[Checkpoint]:
+        raise NotImplementedError("Sub-classes must implement this method.")
+
+    def get_platform_instance_id(self) -> str:
+        raise NotImplementedError("Sub-classes must implement this method.")
+
+    def is_checkpointing_enabled(self, job_id: JobId) -> bool:
+        """
+        Sub-classes should override this method to tell if checkpointing is enabled for this run.
+        For instance, currently all of the SQL based sources use checkpointing for stale entity removal.
+        They would turn it on only if remove_stale_metadata=True. Otherwise, the feature won't work correctly.
+        """
+        raise NotImplementedError("Sub-classes must implement this method.")
+
+    def _get_last_checkpoint(
+        self, job_id: JobId, checkpoint_state_class: Type[CheckpointStateBase]
+    ) -> Optional[Checkpoint]:
+        """
+        This is a template method implementation for querying the last checkpoint state.
+        """
+        last_checkpoint: Optional[Checkpoint] = None
+        if self.is_stateful_ingestion_configured():
+            # Obtain the latest checkpoint from GMS for this job.
+            last_checkpoint_aspect = self.ingestion_state_provider.get_latest_checkpoint(  # type: ignore
+                pipeline_name=self.ctx.pipeline_name,  # type: ignore
+                platform_instance_id=self.get_platform_instance_id(),
+                job_name=job_id,
+            )
+            # Convert it to a first-class Checkpoint object.
+            last_checkpoint = Checkpoint.create_from_checkpoint_aspect(
+                job_name=job_id,
+                checkpoint_aspect=last_checkpoint_aspect,
+                config_class=self.source_config_type,
+                state_class=checkpoint_state_class,
+            )
+        return last_checkpoint
+
+    # Base-class implementations for common state management tasks.
+    def get_last_checkpoint(
+        self, job_id: JobId, checkpoint_state_class: Type[CheckpointStateBase]
+    ) -> Optional[Checkpoint]:
+        if not self.is_stateful_ingestion_configured() or (
+            self.stateful_ingestion_config
+            and self.stateful_ingestion_config.ignore_old_state
+        ):
+            return None
+
+        if JobId not in self.last_checkpoints:
+            self.last_checkpoints[job_id] = self._get_last_checkpoint(
+                job_id, checkpoint_state_class
+            )
+        return self.last_checkpoints[job_id]
+
+    def get_current_checkpoint(self, job_id: JobId) -> Optional[Checkpoint]:
+        if not self.is_stateful_ingestion_configured():
+            return None
+
+        if job_id not in self.cur_checkpoints:
+            self.cur_checkpoints[job_id] = (
+                self.create_checkpoint(job_id)
+                if self.is_checkpointing_enabled(job_id)
+                else None
+            )
+        return self.cur_checkpoints[job_id]
+
+    def commit_checkpoints(self) -> None:
+        if not self.is_stateful_ingestion_configured():
+            return None
+        if (
+            self.stateful_ingestion_config
+            and self.stateful_ingestion_config.ignore_new_state
+        ):
+            logger.info(
+                "The `ignore_new_state` config is True. Not committing current checkpoint."
+            )
+            return None
+        if self.ctx.dry_run_mode or self.ctx.preview_mode:
+            logger.warning(
+                f"Will not be committing checkpoints in dry_run_mode(={self.ctx.dry_run_mode})"
+                f" or preview_mode(={self.ctx.preview_mode})."
+            )
+            return None
+        job_checkpoint_aspects: Dict[JobId, DatahubIngestionCheckpointClass] = {}
+        for job_name, job_checkpoint in self.cur_checkpoints.items():
+            if job_checkpoint is None:
+                continue
+            try:
+                checkpoint_aspect = job_checkpoint.to_checkpoint_aspect(
+                    self.stateful_ingestion_config.max_checkpoint_state_size  # type: ignore
+                )
+            except Exception as e:
+                logger.error(
+                    f"Failed to convert checkpoint to aspect for job {job_name}. It will not be committed.",
+                    e,
+                )
+            else:
+                if checkpoint_aspect is not None:
+                    job_checkpoint_aspects[job_name] = checkpoint_aspect
+
+        self.ingestion_state_provider.commit_checkpoints(  # type: ignore
+            job_checkpoints=job_checkpoint_aspects
+        )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/usage_common_state.py b/metadata-ingestion/src/datahub/ingestion/source/state/usage_common_state.py
new file mode 100644
index 0000000000000..b53e09bebd529
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/usage_common_state.py
@@ -0,0 +1,12 @@
+from datahub.ingestion.source.state.checkpoint import CheckpointStateBase
+
+
+class BaseUsageCheckpointState(CheckpointStateBase):
+    """
+    Base class for representing the checkpoint state for all usage based sources.
+    Stores the last successful run's begin and end timestamps.
+    Subclasses can define additional state as appropriate.
+    """
+
+    begin_timestamp_millis: int
+    end_timestamp_millis: int
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state_provider/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/state_provider/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_state_provider.py b/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_state_provider.py
new file mode 100644
index 0000000000000..10681e0df9f69
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/state_provider/datahub_ingestion_state_provider.py
@@ -0,0 +1,185 @@
+import logging
+import re
+from datetime import datetime, timezone
+from typing import Any, Dict, Optional
+
+import datahub.emitter.mce_builder as builder
+from datahub.configuration.common import ConfigModel, ConfigurationError
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.ingestion_state_provider import IngestionStateProvider, JobId
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+from datahub.metadata.schema_classes import (
+    CalendarIntervalClass,
+    ChangeTypeClass,
+    DatahubIngestionCheckpointClass,
+    DatahubIngestionRunSummaryClass,
+    TimeWindowSizeClass,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class DatahubIngestionStateProviderConfig(ConfigModel):
+    datahub_api: Optional[DatahubClientConfig] = DatahubClientConfig()
+
+
+class DatahubIngestionStateProvider(IngestionStateProvider):
+    orchestrator_name: str = "datahub"
+
+    def __init__(self, graph: DataHubGraph):
+        self.graph = graph
+        if not self._is_server_stateful_ingestion_capable():
+            raise ConfigurationError(
+                "Datahub server is not capable of supporting stateful ingestion."
+                " Please consider upgrading to the latest server version to use this feature."
+            )
+
+    @classmethod
+    def create(
+        cls, config_dict: Dict[str, Any], ctx: PipelineContext
+    ) -> IngestionStateProvider:
+        if ctx.graph:
+            return cls(ctx.graph)
+        elif config_dict is None:
+            raise ConfigurationError("Missing provider configuration")
+        else:
+            provider_config = DatahubIngestionStateProviderConfig.parse_obj(config_dict)
+            if provider_config.datahub_api:
+                graph = DataHubGraph(provider_config.datahub_api)
+                return cls(graph)
+            else:
+                raise ConfigurationError(
+                    "Missing datahub_api. Provide either a global one or under the state_provider."
+                )
+
+    def _is_server_stateful_ingestion_capable(self) -> bool:
+        server_config = self.graph.get_config() if self.graph else None
+        if server_config and server_config.get("statefulIngestionCapable"):
+            return True
+        return False
+
+    def get_latest_checkpoint(
+        self,
+        pipeline_name: str,
+        platform_instance_id: str,
+        job_name: JobId,
+    ) -> Optional[DatahubIngestionCheckpointClass]:
+
+        logger.info(
+            f"Querying for the latest ingestion checkpoint for pipelineName:'{pipeline_name}',"
+            f" platformInstanceId:'{platform_instance_id}', job_name:'{job_name}'"
+        )
+
+        data_job_urn = builder.make_data_job_urn(
+            self.orchestrator_name, pipeline_name, job_name
+        )
+        latest_checkpoint: Optional[
+            DatahubIngestionCheckpointClass
+        ] = self.graph.get_latest_timeseries_value(
+            entity_urn=data_job_urn,
+            aspect_name="datahubIngestionCheckpoint",
+            filter_criteria_map={
+                "pipelineName": pipeline_name,
+                "platformInstanceId": platform_instance_id,
+            },
+            aspect_type=DatahubIngestionCheckpointClass,
+        )
+        if latest_checkpoint:
+            logger.info(
+                f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
+                f" platformInstanceId:'{platform_instance_id}', job_name:'{job_name}' found with start_time:"
+                f" {datetime.fromtimestamp(latest_checkpoint.timestampMillis/1000, tz=timezone.utc)} and a"
+                f" bucket duration of {latest_checkpoint.eventGranularity}."
+            )
+            return latest_checkpoint
+        else:
+            logger.info(
+                f"No committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
+                f" platformInstanceId:'{platform_instance_id}', job_name:'{job_name}' found"
+            )
+
+        return None
+
+    def commit_checkpoints(
+        self, job_checkpoints: Dict[JobId, DatahubIngestionCheckpointClass]
+    ) -> None:
+        for job_name, checkpoint in job_checkpoints.items():
+            # Emit the ingestion state for each job
+            logger.info(
+                f"Committing ingestion checkpoint for pipeline:'{checkpoint.pipelineName}',"
+                f"instance:'{checkpoint.platformInstanceId}', job:'{job_name}'"
+            )
+
+            datajob_urn = builder.make_data_job_urn(
+                self.orchestrator_name,
+                checkpoint.pipelineName,
+                job_name,
+            )
+
+            self.graph.emit_mcp(
+                MetadataChangeProposalWrapper(
+                    entityType="dataJob",
+                    entityUrn=datajob_urn,
+                    aspectName="datahubIngestionCheckpoint",
+                    aspect=checkpoint,
+                    changeType=ChangeTypeClass.UPSERT,
+                )
+            )
+
+            logger.info(
+                f"Committed ingestion checkpoint for pipeline:'{checkpoint.pipelineName}',"
+                f"instance:'{checkpoint.platformInstanceId}', job:'{job_name}'"
+            )
+
+    @staticmethod
+    def get_end_time(ingestion_state: DatahubIngestionRunSummaryClass) -> int:
+        start_time_millis = ingestion_state.timestampMillis
+        granularity = ingestion_state.eventGranularity
+        granularity_millis = (
+            DatahubIngestionStateProvider.get_granularity_to_millis(granularity)
+            if granularity is not None
+            else 0
+        )
+        return start_time_millis + granularity_millis
+
+    @staticmethod
+    def get_time_window_size(interval_str: str) -> TimeWindowSizeClass:
+        to_calendar_interval: Dict[str, str] = {
+            "s": CalendarIntervalClass.SECOND,
+            "m": CalendarIntervalClass.MINUTE,
+            "h": CalendarIntervalClass.HOUR,
+            "d": CalendarIntervalClass.DAY,
+            "W": CalendarIntervalClass.WEEK,
+            "M": CalendarIntervalClass.MONTH,
+            "Q": CalendarIntervalClass.QUARTER,
+            "Y": CalendarIntervalClass.YEAR,
+        }
+        interval_pattern = re.compile(r"(\d+)([s|m|h|d|W|M|Q|Y])")
+        token_search = interval_pattern.search(interval_str)
+        if token_search is None:
+            raise ValueError("Invalid interval string:", interval_str)
+        (multiples_str, unit_str) = (token_search.group(1), token_search.group(2))
+        if not multiples_str or not unit_str:
+            raise ValueError("Invalid interval string:", interval_str)
+        unit = to_calendar_interval.get(unit_str)
+        if not unit:
+            raise ValueError("Invalid time unit token:", unit_str)
+        return TimeWindowSizeClass(unit=unit, multiple=int(multiples_str))
+
+    @staticmethod
+    def get_granularity_to_millis(granularity: TimeWindowSizeClass) -> int:
+        to_millis_from_interval: Dict[str, int] = {
+            CalendarIntervalClass.SECOND: 1000,
+            CalendarIntervalClass.MINUTE: 60 * 1000,
+            CalendarIntervalClass.HOUR: 60 * 60 * 1000,
+            CalendarIntervalClass.DAY: 24 * 60 * 60 * 1000,
+            CalendarIntervalClass.WEEK: 7 * 24 * 60 * 60 * 1000,
+            CalendarIntervalClass.MONTH: 31 * 7 * 24 * 60 * 60 * 1000,
+            CalendarIntervalClass.QUARTER: 90 * 7 * 24 * 60 * 60 * 1000,
+            CalendarIntervalClass.YEAR: 365 * 7 * 24 * 60 * 60 * 1000,
+        }
+        units_to_millis = to_millis_from_interval.get(str(granularity.unit), None)
+        if not units_to_millis:
+            raise ValueError("Invalid unit", granularity.unit)
+        return granularity.multiple * units_to_millis
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state_provider/state_provider_registry.py b/metadata-ingestion/src/datahub/ingestion/source/state_provider/state_provider_registry.py
new file mode 100644
index 0000000000000..c448fdae03278
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/state_provider/state_provider_registry.py
@@ -0,0 +1,10 @@
+from datahub.ingestion.api.ingestion_state_provider import IngestionStateProvider
+from datahub.ingestion.api.registry import PluginRegistry
+
+ingestion_state_provider_registry = PluginRegistry[IngestionStateProvider]()
+ingestion_state_provider_registry.register_from_entrypoint(
+    "datahub.ingestion.state_provider.plugins"
+)
+
+# These sinks are always enabled
+assert ingestion_state_provider_registry.get("datahub")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py
index 1828aea6f6abd..b8897001c5a89 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py
@@ -1,6 +1,7 @@
 import collections
 import dataclasses
 import heapq
+import json
 import logging
 import re
 from dataclasses import dataclass
@@ -33,6 +34,9 @@
 # AuditLogEntry = ProtobufEntry
 AuditLogEntry = Any
 
+# BigQueryAuditMetadata is the v2 format in which audit logs are exported to BigQuery
+BigQueryAuditMetadata = Any
+
 DEBUG_INCLUDE_FULL_PAYLOADS = False
 
 # Handle yearly, monthly, daily, or hourly partitioning.
@@ -45,6 +49,7 @@
 SNAPSHOT_TABLE_REGEX = re.compile(r"^(.+)@(\d{13})$")
 
 BQ_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
+BQ_DATE_SHARD_FORMAT = "%Y%m%d"
 BQ_FILTER_REGEX_ALLOW_TEMPLATE = """
 protoPayload.serviceData.jobCompletedEvent.job.jobStatistics.referencedTables.tableId =~ "{allow_pattern}"
 """
@@ -279,6 +284,62 @@ def from_entry(cls, entry: AuditLogEntry) -> "QueryEvent":
 
         return queryEvent
 
+    @classmethod
+    def can_parse_exported_bigquery_audit_metadata(
+        cls, row: BigQueryAuditMetadata
+    ) -> bool:
+        try:
+            row["timestamp"]
+            row["protoPayload"]
+            row["metadata"]
+            return True
+        except (KeyError, TypeError):
+            return False
+
+    @classmethod
+    def from_exported_bigquery_audit_metadata(
+        cls, row: BigQueryAuditMetadata
+    ) -> "QueryEvent":
+        timestamp = row["timestamp"]
+        payload = row["protoPayload"]
+        metadata = json.loads(row["metadata"])
+
+        user = payload["authenticationInfo"]["principalEmail"]
+
+        job = metadata["jobChange"]["job"]
+
+        job_name = job.get("jobName")
+        raw_query = job["jobConfig"]["queryConfig"]["query"]
+
+        raw_dest_table = job["jobConfig"]["queryConfig"].get("destinationTable")
+        destination_table = None
+        if raw_dest_table:
+            destination_table = BigQueryTableRef.from_string_name(raw_dest_table)
+
+        raw_ref_tables = job["jobStats"]["queryStats"].get("referencedTables")
+        referenced_tables = None
+        if raw_ref_tables:
+            referenced_tables = [
+                BigQueryTableRef.from_string_name(spec) for spec in raw_ref_tables
+            ]
+
+        query_event = QueryEvent(
+            timestamp=timestamp,
+            actor_email=user,
+            query=raw_query,
+            destinationTable=destination_table,
+            referencedTables=referenced_tables,
+            jobName=job_name,
+            payload=payload if DEBUG_INCLUDE_FULL_PAYLOADS else None,
+        )
+        if not job_name:
+            logger.debug(
+                "jobName from query events is absent. "
+                "BigQueryAuditMetadata entry - {logEntry}".format(logEntry=row)
+            )
+
+        return query_event
+
 
 class BigQueryUsageConfig(BaseUsageConfig):
     projects: Optional[List[str]] = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py
index 2c616b879802c..7aa51672d5e99 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py
@@ -1,11 +1,9 @@
 import collections
-import dataclasses
 import json
 import logging
 from datetime import datetime, timezone
-from typing import Any, Dict, Iterable, List, Optional
+from typing import Any, Dict, Iterable, List, Optional, cast
 
-import pydantic
 import pydantic.dataclasses
 from pydantic import BaseModel
 from sqlalchemy import create_engine
@@ -14,9 +12,18 @@
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import AllowDenyPattern
 from datahub.configuration.time_window_config import get_time_bucket
-from datahub.ingestion.api.source import Source, SourceReport
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.source import SourceReport
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.sql.snowflake import BaseSnowflakeConfig
+from datahub.ingestion.source.state.checkpoint import Checkpoint
+from datahub.ingestion.source.state.stateful_ingestion_base import (
+    JobId,
+    StatefulIngestionConfig,
+    StatefulIngestionConfigBase,
+    StatefulIngestionSourceBase,
+)
+from datahub.ingestion.source.state.usage_common_state import BaseUsageCheckpointState
 from datahub.ingestion.source.usage.usage_common import (
     BaseUsageConfig,
     GenericAggregatedDataset,
@@ -93,7 +100,19 @@ class SnowflakeJoinedAccessEvent(PermissiveModel):
     role_name: str
 
 
-class SnowflakeUsageConfig(BaseSnowflakeConfig, BaseUsageConfig):
+class SnowflakeStatefulIngestionConfig(StatefulIngestionConfig):
+    """
+    Specialization of basic StatefulIngestionConfig to adding custom config.
+    This will be used to override the stateful_ingestion config param of StatefulIngestionConfigBase
+    in the SnowflakeUsageConfig.
+    """
+
+    ignore_old_state = pydantic.Field(False, alias="force_rerun")
+
+
+class SnowflakeUsageConfig(
+    BaseSnowflakeConfig, BaseUsageConfig, StatefulIngestionConfigBase
+):
     env: str = builder.DEFAULT_ENV
     options: dict = {}
     database_pattern: AllowDenyPattern = AllowDenyPattern(
@@ -103,6 +122,7 @@ class SnowflakeUsageConfig(BaseSnowflakeConfig, BaseUsageConfig):
     table_pattern: AllowDenyPattern = AllowDenyPattern.allow_all()
     view_pattern: AllowDenyPattern = AllowDenyPattern.allow_all()
     apply_view_usage_to_tables: bool = False
+    stateful_ingestion: Optional[SnowflakeStatefulIngestionConfig] = None
 
     @pydantic.validator("role", always=True)
     def role_accountadmin(cls, v):
@@ -119,30 +139,115 @@ def get_sql_alchemy_url(self):
         return super().get_sql_alchemy_url(database="snowflake")
 
 
-@dataclasses.dataclass
-class SnowflakeUsageSource(Source):
-    config: SnowflakeUsageConfig
-    report: SourceReport = dataclasses.field(default_factory=SourceReport)
+class SnowflakeUsageSource(StatefulIngestionSourceBase):
+    def __init__(self, config: SnowflakeUsageConfig, ctx: PipelineContext):
+        super(SnowflakeUsageSource, self).__init__(config, ctx)
+        self.config: SnowflakeUsageConfig = config
+        self.report: SourceReport = SourceReport()
 
     @classmethod
     def create(cls, config_dict, ctx):
         config = SnowflakeUsageConfig.parse_obj(config_dict)
-        return cls(ctx, config)
+        return cls(config, ctx)
+
+    # Stateful Ingestion Overrides.
+    def is_checkpointing_enabled(self, job_id: JobId) -> bool:
+        if job_id == self.get_default_ingestion_job_id():
+            assert self.config.stateful_ingestion
+            return self.config.stateful_ingestion.enabled
+        return False
+
+    def get_default_ingestion_job_id(self) -> JobId:
+        """
+        Default ingestion job name for snowflake_usage.
+        """
+        return JobId("snowflake_usage_ingestion")
+
+    def get_platform_instance_id(self) -> str:
+        return self.config.host_port
+
+    def create_checkpoint(self, job_id: JobId) -> Optional[Checkpoint]:
+        """
+        Create the custom checkpoint with empty state for the job.
+        """
+        assert self.ctx.pipeline_name
+        if job_id == self.get_default_ingestion_job_id():
+            return Checkpoint(
+                job_name=job_id,
+                pipeline_name=self.ctx.pipeline_name,
+                platform_instance_id=self.get_platform_instance_id(),
+                run_id=self.ctx.run_id,
+                config=self.config,
+                state=BaseUsageCheckpointState(
+                    begin_timestamp_millis=int(
+                        self.config.start_time.timestamp() * 1000
+                    ),
+                    end_timestamp_millis=int(self.config.end_time.timestamp() * 1000),
+                ),
+            )
+        return None
+
+    def _should_skip_this_run(self) -> bool:
+        # Check if forced rerun.
+        if (
+            self.config.stateful_ingestion
+            and self.config.stateful_ingestion.ignore_old_state
+        ):
+            return False
+        # Determine from the last check point state
+        last_successful_pipeline_run_end_time_millis: Optional[int] = None
+        last_checkpoint = self.get_last_checkpoint(
+            self.get_default_ingestion_job_id(), BaseUsageCheckpointState
+        )
+        if last_checkpoint and last_checkpoint.state:
+            state = cast(BaseUsageCheckpointState, last_checkpoint.state)
+            last_successful_pipeline_run_end_time_millis = state.end_timestamp_millis
+
+        if (
+            last_successful_pipeline_run_end_time_millis is not None
+            and int(self.config.start_time.timestamp() * 1000)
+            <= last_successful_pipeline_run_end_time_millis
+        ):
+            logger.info(
+                f"Skippig this run, since the last run's bucket duration end: "
+                f"{datetime.fromtimestamp(last_successful_pipeline_run_end_time_millis/1000, tz=timezone.utc)}"
+                f" is later than the current start_time: {self.config.start_time}"
+            )
+            return True
+        return False
 
-    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
-        access_events = self._get_snowflake_history()
-        aggregated_info = self._aggregate_access_events(access_events)
+    def _get_last_successful_run_end(self) -> Optional[int]:
+        last_checkpoint = self.get_last_checkpoint(
+            self.get_default_ingestion_job_id(), BaseUsageCheckpointState
+        )
+        if last_checkpoint and last_checkpoint.state:
+            state = cast(BaseUsageCheckpointState, last_checkpoint.state)
+            return state.end_timestamp_millis
+        return None
+
+    def _init_checkpoints(self):
+        self.get_current_checkpoint(self.get_default_ingestion_job_id())
 
-        for time_bucket in aggregated_info.values():
-            for aggregate in time_bucket.values():
-                wu = self._make_usage_stat(aggregate)
-                self.report.report_workunit(wu)
-                yield wu
+    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
+        skip_this_run: bool = self._should_skip_this_run()
+        if not skip_this_run:
+            # Initialize the checkpoints
+            self._init_checkpoints()
+            # Generate the workunits.
+            access_events = self._get_snowflake_history()
+            aggregated_info = self._aggregate_access_events(access_events)
+
+            for time_bucket in aggregated_info.values():
+                for aggregate in time_bucket.values():
+                    wu = self._make_usage_stat(aggregate)
+                    self.report.report_workunit(wu)
+                    yield wu
 
     def _make_usage_query(self) -> str:
+        start_time = int(self.config.start_time.timestamp() * 1000)
+        end_time = int(self.config.end_time.timestamp() * 1000)
         return SNOWFLAKE_USAGE_SQL_TEMPLATE.format(
-            start_time_millis=int(self.config.start_time.timestamp() * 1000),
-            end_time_millis=int(self.config.end_time.timestamp() * 1000),
+            start_time_millis=start_time, end_time_millis=end_time
         )
 
     def _make_sql_engine(self) -> Engine:
@@ -285,4 +390,5 @@ def get_report(self):
         return self.report
 
     def close(self):
-        pass
+        # Checkpoint this run
+        self.commit_checkpoints()
diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py
index eab8e76e3e706..3c8b54f3d28ca 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py
@@ -23,6 +23,8 @@
 
 P = ParamSpec("P")
 
+MAX_QUERIES_TO_COMBINE_AT_ONCE = 40
+
 
 # We need to make sure that only one query combiner attempts to patch
 # the SQLAlchemy execute method at a time so that they don't interfere.
@@ -286,6 +288,11 @@ def _execute_queue(self, main_greenlet: greenlet.greenlet) -> None:
         full_queue = self._get_queue(main_greenlet)
 
         pending_queue = {k: v for k, v in full_queue.items() if not v.done}
+
+        pending_queue = dict(
+            itertools.islice(pending_queue.items(), MAX_QUERIES_TO_COMBINE_AT_ONCE)
+        )
+
         if pending_queue:
             queue_item = next(iter(pending_queue.values()))
 
diff --git a/metadata-ingestion/tests/integration/dbt/dbt_with_external_metadata_files_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_with_external_metadata_files_mces_golden.json
new file mode 100644
index 0000000000000..f1a6a33a8d253
--- /dev/null
+++ b/metadata-ingestion/tests/integration/dbt/dbt_with_external_metadata_files_mces_golden.json
@@ -0,0 +1,2729 @@
+[
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.customer_details,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "model",
+                            "materialization": "ephemeral",
+                            "dbt_file_path": "models/transform/customer_details.sql",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": [
+                            "dbt:test_tag"
+                        ]
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.Ownership": {
+                        "owners": [
+                            {
+                                "owner": "urn:li:corpuser:@alice2",
+                                "type": "DATAOWNER",
+                                "source": null
+                            }
+                        ],
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        }
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.GlobalTags": {
+                        "tags": [
+                            {
+                                "tag": "urn:li:tag:dbt:test_tag"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "model.sample_dbt.customer_details",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": [
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD)",
+                                "type": "TRANSFORMED"
+                            },
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD)",
+                                "type": "TRANSFORMED"
+                            },
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD)",
+                                "type": "TRANSFORMED"
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "model",
+                            "materialization": "table",
+                            "dbt_file_path": "models/billing/monthly_billing_with_cust.sql",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "model.sample_dbt.monthly_billing_with_cust",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "amount",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "numeric",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "billing_month",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "customer_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "email",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": [
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)",
+                                "type": "TRANSFORMED"
+                            },
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.customer_details,PROD)",
+                                "type": "TRANSFORMED"
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "model",
+                            "materialization": "view",
+                            "dbt_file_path": "models/base/payments_base.sql",
+                            "catalog_type": "VIEW",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "model.sample_dbt.payments_base",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "amount",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "numeric(5,2)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "customer_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_date",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "rental_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "staff_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": [
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD)",
+                                "type": "TRANSFORMED"
+                            },
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD)",
+                                "type": "TRANSFORMED"
+                            },
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD)",
+                                "type": "TRANSFORMED"
+                            },
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD)",
+                                "type": "TRANSFORMED"
+                            },
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD)",
+                                "type": "TRANSFORMED"
+                            },
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD)",
+                                "type": "TRANSFORMED"
+                            },
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD)",
+                                "type": "TRANSFORMED"
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "model",
+                            "materialization": "table",
+                            "dbt_file_path": "models/transform/payments_by_customer_by_month.sql",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "model.sample_dbt.payments_by_customer_by_month",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "amount",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "numeric",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "billing_month",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "customer_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": [
+                            {
+                                "auditStamp": {
+                                    "time": 0,
+                                    "actor": "urn:li:corpuser:unknown",
+                                    "impersonator": null
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)",
+                                "type": "TRANSFORMED"
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.actor,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "model_maturity": "in dev",
+                            "some_other_property": "test 1",
+                            "owner": "@alice1",
+                            "node_type": "source",
+                            "dbt_file_path": "models/base.yml",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": "postgres comment: Actors table \u2013 from postgres\n\ndbt model description: description for actor table from dbt",
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.Ownership": {
+                        "owners": [
+                            {
+                                "owner": "urn:li:corpuser:@alice1",
+                                "type": "DATAOWNER",
+                                "source": null
+                            }
+                        ],
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        }
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "source.sample_dbt.pagila.actor",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 1581759273000,
+                            "actor": "urn:li:corpuser:dbt_executor",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "actor_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "first_name",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": "postgres comment: Actors column \u2013 from postgres\n\ndbt model description: description for first_name from dbt",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": {
+                                    "tags": [
+                                        {
+                                            "tag": "urn:li:tag:dbt:column_tag"
+                                        }
+                                    ]
+                                },
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "last_name",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": "description for last_name from dbt",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "last_update",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": "description for last_update from dbt",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": []
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "source",
+                            "dbt_file_path": "models/base.yml",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": "a user's address",
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "source.sample_dbt.pagila.address",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 1581759930000,
+                            "actor": "urn:li:corpuser:dbt_executor",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "address",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "address2",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "address_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "city_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "district",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "last_update",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "phone",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "postal_code",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": []
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.category,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "source",
+                            "dbt_file_path": "models/base.yml",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": "a user's category",
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "source.sample_dbt.pagila.category",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 1581759987000,
+                            "actor": "urn:li:corpuser:dbt_executor",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "category_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "last_update",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "name",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": []
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "source",
+                            "dbt_file_path": "models/base.yml",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "source.sample_dbt.pagila.city",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 1581759925000,
+                            "actor": "urn:li:corpuser:dbt_executor",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "city",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "city_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "country_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "last_update",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": []
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.country,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "model_maturity": "in prod",
+                            "owner": "@bob",
+                            "some_other_property": "test 2",
+                            "node_type": "source",
+                            "dbt_file_path": "models/base.yml",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.Ownership": {
+                        "owners": [
+                            {
+                                "owner": "urn:li:corpuser:@bob",
+                                "type": "DATAOWNER",
+                                "source": null
+                            }
+                        ],
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        }
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "source.sample_dbt.pagila.country",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 1581759840000,
+                            "actor": "urn:li:corpuser:dbt_executor",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "country",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "country_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "last_update",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": []
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "source",
+                            "dbt_file_path": "models/base.yml",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": "description for customer table from dbt",
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "source.sample_dbt.pagila.customer",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 1581760640000,
+                            "actor": "urn:li:corpuser:dbt_executor",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "active",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "activebool",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.BooleanType": {}
+                                    }
+                                },
+                                "nativeDataType": "boolean",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "address_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "create_date",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                    }
+                                },
+                                "nativeDataType": "date",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "customer_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "email",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "first_name",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "last_name",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "text",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "last_update",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "store_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": []
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "source",
+                            "dbt_file_path": "models/base.yml",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "source.sample_dbt.pagila.payment_p2020_01",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 1580505371996,
+                            "actor": "urn:li:corpuser:dbt_executor",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "amount",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "numeric(5,2)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "customer_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_date",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "rental_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "staff_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": []
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "an_array_property": "['alpha', 'beta', 'charlie']",
+                            "model_maturity": "in prod",
+                            "owner": "@charles",
+                            "some_other_property": "test 3",
+                            "node_type": "source",
+                            "dbt_file_path": "models/base.yml",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.Ownership": {
+                        "owners": [
+                            {
+                                "owner": "urn:li:corpuser:@charles",
+                                "type": "DATAOWNER",
+                                "source": null
+                            }
+                        ],
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        }
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "source.sample_dbt.pagila.payment_p2020_02",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 1582319845996,
+                            "actor": "urn:li:corpuser:dbt_executor",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "amount",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "numeric(5,2)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "customer_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_date",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "rental_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "staff_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": []
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "source",
+                            "dbt_file_path": "models/base.yml",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "source.sample_dbt.pagila.payment_p2020_03",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 1584998318996,
+                            "actor": "urn:li:corpuser:dbt_executor",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "amount",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "numeric(5,2)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "customer_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_date",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "rental_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "staff_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": []
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "source",
+                            "dbt_file_path": "models/base.yml",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "source.sample_dbt.pagila.payment_p2020_04",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 1588287228996,
+                            "actor": "urn:li:corpuser:dbt_executor",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "amount",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "numeric(5,2)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "customer_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_date",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "rental_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "staff_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": []
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "source",
+                            "dbt_file_path": "models/base.yml",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": "a payment",
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "source.sample_dbt.pagila.payment_p2020_05",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": 1589460269996,
+                            "actor": "urn:li:corpuser:dbt_executor",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "amount",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "numeric(5,2)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "customer_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_date",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "rental_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "staff_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": []
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+},
+{
+    "auditHeader": null,
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "node_type": "source",
+                            "dbt_file_path": "models/base.yml",
+                            "catalog_type": "BASE TABLE",
+                            "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
+                            "manifest_version": "0.19.1",
+                            "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
+                            "catalog_version": "0.19.1"
+                        },
+                        "externalUrl": null,
+                        "description": null,
+                        "uri": null,
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "source.sample_dbt.pagila.payment_p2020_06",
+                        "platform": "urn:li:dataPlatform:postgres",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown",
+                            "impersonator": null
+                        },
+                        "lastModified": {
+                            "time": -62135596800000,
+                            "actor": "urn:li:corpuser:dbt_executor",
+                            "impersonator": null
+                        },
+                        "deleted": null,
+                        "dataset": null,
+                        "cluster": null,
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "amount",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "numeric(5,2)",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "customer_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_date",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "timestamp with time zone",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "payment_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "rental_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            },
+                            {
+                                "fieldPath": "staff_id",
+                                "jsonPath": null,
+                                "nullable": false,
+                                "description": null,
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "integer",
+                                "recursive": false,
+                                "globalTags": null,
+                                "glossaryTerms": null,
+                                "isPartOfKey": false,
+                                "jsonProps": null
+                            }
+                        ],
+                        "primaryKeys": null,
+                        "foreignKeysSpecs": null,
+                        "foreignKeys": null
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": []
+                    }
+                }
+            ]
+        }
+    },
+    "proposedDelta": null,
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "dbt-test-with-external-metadata-files",
+        "registryName": null,
+        "registryVersion": null,
+        "properties": null
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py
index df0c291dcaae5..ef04611555e5a 100644
--- a/metadata-ingestion/tests/integration/dbt/test_dbt.py
+++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py
@@ -2,6 +2,7 @@
 from typing import Any, Dict, Optional, Union
 
 import pytest
+import requests_mock
 
 from datahub.ingestion.run.pipeline import Pipeline
 from tests.test_helpers import mce_helpers
@@ -11,6 +12,7 @@ class DbtTestConfig:
     def __init__(
         self,
         run_id: str,
+        dbt_metadata_uri_prefix: str,
         test_resources_dir: Union[str, PathLike],
         tmp_path: Union[str, PathLike],
         output_file: Union[str, PathLike],
@@ -27,9 +29,9 @@ def __init__(
 
         self.run_id = run_id
 
-        self.manifest_path = f"{test_resources_dir}/dbt_manifest.json"
-        self.catalog_path = f"{test_resources_dir}/dbt_catalog.json"
-        self.sources_path = f"{test_resources_dir}/dbt_sources.json"
+        self.manifest_path = f"{dbt_metadata_uri_prefix}/dbt_manifest.json"
+        self.catalog_path = f"{dbt_metadata_uri_prefix}/dbt_catalog.json"
+        self.sources_path = f"{dbt_metadata_uri_prefix}/dbt_sources.json"
         self.target_platform = "postgres"
 
         self.output_path = f"{tmp_path}/{output_file}"
@@ -55,13 +57,30 @@ def __init__(
 
 
 @pytest.mark.integration
-def test_dbt_ingest(pytestconfig, tmp_path, mock_time):
+@requests_mock.Mocker(kw="req_mock")
+def test_dbt_ingest(pytestconfig, tmp_path, mock_time, **kwargs):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/dbt"
 
+    with open(test_resources_dir / "dbt_manifest.json", "r") as f:
+        kwargs["req_mock"].get(
+            "http://some-external-repo/dbt_manifest.json", text=f.read()
+        )
+
+    with open(test_resources_dir / "dbt_catalog.json", "r") as f:
+        kwargs["req_mock"].get(
+            "http://some-external-repo/dbt_catalog.json", text=f.read()
+        )
+
+    with open(test_resources_dir / "dbt_sources.json", "r") as f:
+        kwargs["req_mock"].get(
+            "http://some-external-repo/dbt_sources.json", text=f.read()
+        )
+
     config_variants = [
         DbtTestConfig(
             "dbt-test-with-schemas",
             test_resources_dir,
+            test_resources_dir,
             tmp_path,
             "dbt_with_schemas_mces.json",
             "dbt_with_schemas_mces_golden.json",
@@ -70,9 +89,22 @@ def test_dbt_ingest(pytestconfig, tmp_path, mock_time):
                 "disable_dbt_node_creation": True,
             },
         ),
+        DbtTestConfig(
+            "dbt-test-with-external-metadata-files",
+            "http://some-external-repo",
+            test_resources_dir,
+            tmp_path,
+            "dbt_with_external_metadata_files_mces.json",
+            "dbt_with_external_metadata_files_mces_golden.json",
+            source_config_modifiers={
+                "load_schemas": True,
+                "disable_dbt_node_creation": True,
+            },
+        ),
         DbtTestConfig(
             "dbt-test-without-schemas",
             test_resources_dir,
+            test_resources_dir,
             tmp_path,
             "dbt_without_schemas_mces.json",
             "dbt_without_schemas_mces_golden.json",
@@ -84,6 +116,7 @@ def test_dbt_ingest(pytestconfig, tmp_path, mock_time):
         DbtTestConfig(
             "dbt-test-without-schemas-with-filter",
             test_resources_dir,
+            test_resources_dir,
             tmp_path,
             "dbt_without_schemas_with_filter_mces.json",
             "dbt_without_schemas_with_filter_mces_golden.json",
@@ -98,6 +131,7 @@ def test_dbt_ingest(pytestconfig, tmp_path, mock_time):
         DbtTestConfig(
             "dbt-test-with-schemas-dbt-enabled",
             test_resources_dir,
+            test_resources_dir,
             tmp_path,
             "dbt_enabled_with_schemas_mces.json",
             "dbt_enabled_with_schemas_mces_golden.json",
@@ -106,6 +140,7 @@ def test_dbt_ingest(pytestconfig, tmp_path, mock_time):
         DbtTestConfig(
             "dbt-test-without-schemas-dbt-enabled",
             test_resources_dir,
+            test_resources_dir,
             tmp_path,
             "dbt_enabled_without_schemas_mces.json",
             "dbt_enabled_without_schemas_mces_golden.json",
@@ -114,6 +149,7 @@ def test_dbt_ingest(pytestconfig, tmp_path, mock_time):
         DbtTestConfig(
             "dbt-test-without-schemas-with-filter-dbt-enabled",
             test_resources_dir,
+            test_resources_dir,
             tmp_path,
             "dbt_enabled_without_schemas_with_filter_mces.json",
             "dbt_enabled_without_schemas_with_filter_mces_golden.json",
diff --git a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json
index e7719534e7b00..03607db1b6c38 100644
--- a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json
+++ b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json
@@ -5,12 +5,17 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Sun Dec 05 17:50:27 UTC 2021",
+                            "CreateTime:": "Tue Dec 14 00:47:50 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore",
@@ -20,7 +25,7 @@
                             "Table Parameters: numRows": "0",
                             "Table Parameters: rawDataSize": "0",
                             "Table Parameters: totalSize": "0",
-                            "Table Parameters: transient_lastDdlTime": "1638726627",
+                            "Table Parameters: transient_lastDdlTime": "1639442870",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -119,12 +124,17 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Sun Dec 05 17:50:28 UTC 2021",
+                            "CreateTime:": "Tue Dec 14 00:47:50 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test",
@@ -134,7 +144,7 @@
                             "Table Parameters: numRows": "1",
                             "Table Parameters: rawDataSize": "32",
                             "Table Parameters: totalSize": "33",
-                            "Table Parameters: transient_lastDdlTime": "1638726632",
+                            "Table Parameters: transient_lastDdlTime": "1639442875",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -271,12 +281,17 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Sun Dec 05 17:50:33 UTC 2021",
+                            "CreateTime:": "Tue Dec 14 00:47:56 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test",
@@ -286,7 +301,7 @@
                             "Table Parameters: numRows": "0",
                             "Table Parameters: rawDataSize": "0",
                             "Table Parameters: totalSize": "0",
-                            "Table Parameters: transient_lastDdlTime": "1638726633",
+                            "Table Parameters: transient_lastDdlTime": "1639442876",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -388,12 +403,17 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Sun Dec 05 17:50:33 UTC 2021",
+                            "CreateTime:": "Tue Dec 14 00:47:56 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test",
@@ -403,7 +423,7 @@
                             "Table Parameters: numRows": "0",
                             "Table Parameters: rawDataSize": "0",
                             "Table Parameters: totalSize": "0",
-                            "Table Parameters: transient_lastDdlTime": "1638726633",
+                            "Table Parameters: transient_lastDdlTime": "1639442876",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -570,12 +590,17 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Sun Dec 05 17:50:23 UTC 2021",
+                            "CreateTime:": "Tue Dec 14 00:47:46 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes",
@@ -584,7 +609,7 @@
                             "Table Parameters: numRows": "0",
                             "Table Parameters: rawDataSize": "0",
                             "Table Parameters: totalSize": "5812",
-                            "Table Parameters: transient_lastDdlTime": "1638726625",
+                            "Table Parameters: transient_lastDdlTime": "1639442868",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -683,12 +708,17 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Sun Dec 05 17:50:28 UTC 2021",
+                            "CreateTime:": "Tue Dec 14 00:47:50 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test",
@@ -698,7 +728,7 @@
                             "Table Parameters: numRows": "0",
                             "Table Parameters: rawDataSize": "0",
                             "Table Parameters: totalSize": "0",
-                            "Table Parameters: transient_lastDdlTime": "1638726628",
+                            "Table Parameters: transient_lastDdlTime": "1639442870",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -833,12 +863,17 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "Database:": "db1",
                             "Owner:": "root",
-                            "CreateTime:": "Sun Dec 05 17:50:33 UTC 2021",
+                            "CreateTime:": "Tue Dec 14 00:47:56 UTC 2021",
                             "LastAccessTime:": "UNKNOWN",
                             "Retention:": "0",
                             "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test",
@@ -848,7 +883,7 @@
                             "Table Parameters: numRows": "0",
                             "Table Parameters: rawDataSize": "0",
                             "Table Parameters: totalSize": "0",
-                            "Table Parameters: transient_lastDdlTime": "1638726633",
+                            "Table Parameters: transient_lastDdlTime": "1639442876",
                             "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
                             "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
                             "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
diff --git a/metadata-ingestion/tests/integration/hive/test_hive.py b/metadata-ingestion/tests/integration/hive/test_hive.py
index fd6c004a9c8bf..ed2c87dab56a0 100644
--- a/metadata-ingestion/tests/integration/hive/test_hive.py
+++ b/metadata-ingestion/tests/integration/hive/test_hive.py
@@ -1,12 +1,10 @@
 import subprocess
 
 import pytest
-from click.testing import CliRunner
 from freezegun import freeze_time
 
-from datahub.entrypoints import datahub
-from tests.test_helpers import fs_helpers, mce_helpers
-from tests.test_helpers.click_helpers import assert_result_ok
+from tests.test_helpers import mce_helpers
+from tests.test_helpers.click_helpers import run_datahub_cmd
 from tests.test_helpers.docker_helpers import wait_for_port
 
 FROZEN_TIME = "2020-04-14 07:00:00"
@@ -27,11 +25,8 @@ def test_hive_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
         subprocess.run(command, shell=True, check=True)
 
         # Run the metadata ingestion pipeline.
-        runner = CliRunner()
-        with fs_helpers.isolated_filesystem(tmp_path):
-            config_file = (test_resources_dir / "hive_to_file.yml").resolve()
-            result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"])
-            assert_result_ok(result)
+        config_file = (test_resources_dir / "hive_to_file.yml").resolve()
+        run_datahub_cmd(["ingest", "-c", f"{config_file}"], tmp_path=tmp_path)
 
         # Verify the output.
         mce_helpers.check_golden_file(
diff --git a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
index 88c0fd52e7a7a..a75c41b7fbaab 100644
--- a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
+++ b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
@@ -2,12 +2,10 @@
 
 import pytest
 import requests
-from click.testing import CliRunner
 from freezegun import freeze_time
 
-from datahub.entrypoints import datahub
-from tests.test_helpers import fs_helpers, mce_helpers
-from tests.test_helpers.click_helpers import assert_result_ok
+from tests.test_helpers import mce_helpers
+from tests.test_helpers.click_helpers import run_datahub_cmd
 from tests.test_helpers.docker_helpers import wait_for_port
 
 FROZEN_TIME = "2021-10-25 13:00:00"
@@ -184,13 +182,8 @@ def test_kafka_connect_ingest(docker_compose_runner, pytestconfig, tmp_path, moc
         time.sleep(45)
 
         # Run the metadata ingestion pipeline.
-        runner = CliRunner()
-        with fs_helpers.isolated_filesystem(tmp_path):
-            print(tmp_path)
-            config_file = (test_resources_dir / "kafka_connect_to_file.yml").resolve()
-            result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"])
-            # import pdb;pdb.set_trace();
-            assert_result_ok(result)
+        config_file = (test_resources_dir / "kafka_connect_to_file.yml").resolve()
+        run_datahub_cmd(["ingest", "-c", f"{config_file}"], tmp_path=tmp_path)
 
         # Verify the output.
         mce_helpers.check_golden_file(
diff --git a/metadata-ingestion/tests/integration/kafka/test_kafka.py b/metadata-ingestion/tests/integration/kafka/test_kafka.py
index bf334be1c7fde..ddfb8154dbe80 100644
--- a/metadata-ingestion/tests/integration/kafka/test_kafka.py
+++ b/metadata-ingestion/tests/integration/kafka/test_kafka.py
@@ -1,11 +1,10 @@
 import subprocess
 
 import pytest
-from click.testing import CliRunner
 from freezegun import freeze_time
 
-from datahub.entrypoints import datahub
-from tests.test_helpers import fs_helpers, mce_helpers
+from tests.test_helpers import mce_helpers
+from tests.test_helpers.click_helpers import run_datahub_cmd
 from tests.test_helpers.docker_helpers import wait_for_port
 
 FROZEN_TIME = "2020-04-14 07:00:00"
@@ -28,11 +27,8 @@ def test_kafka_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
         subprocess.run(command, shell=True, check=True)
 
         # Run the metadata ingestion pipeline.
-        runner = CliRunner()
-        with fs_helpers.isolated_filesystem(tmp_path):
-            config_file = (test_resources_dir / "kafka_to_file.yml").resolve()
-            result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"])
-            assert result.exit_code == 0
+        config_file = (test_resources_dir / "kafka_to_file.yml").resolve()
+        run_datahub_cmd(["ingest", "-c", f"{config_file}"], tmp_path=tmp_path)
 
         # Verify the output.
         mce_helpers.check_golden_file(
diff --git a/metadata-ingestion/tests/integration/mysql/mysql_mces_golden.json b/metadata-ingestion/tests/integration/mysql/mysql_mces_golden.json
index 2ade82f970c97..df0de3cab659b 100644
--- a/metadata-ingestion/tests/integration/mysql/mysql_mces_golden.json
+++ b/metadata-ingestion/tests/integration/mysql/mysql_mces_golden.json
@@ -5,6 +5,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.employees,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "datacharmer.employees",
@@ -44,7 +49,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": true
+                                "isPartOfKey": true,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "birth_date",
@@ -60,7 +66,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "first_name",
@@ -76,7 +83,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "last_name",
@@ -92,7 +100,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "gender",
@@ -108,7 +117,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "hire_date",
@@ -124,7 +134,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -139,6 +150,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "mysql-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -148,6 +161,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.salaries,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "datacharmer.salaries",
@@ -187,7 +205,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": true
+                                "isPartOfKey": true,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "salary",
@@ -203,7 +222,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "from_date",
@@ -219,7 +239,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": true
+                                "isPartOfKey": true,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "to_date",
@@ -235,7 +256,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -250,6 +272,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "mysql-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -259,6 +283,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "metagalaxy.metadata_aspect",
@@ -298,7 +327,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": true
+                                "isPartOfKey": true,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "aspect",
@@ -314,7 +344,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": true
+                                "isPartOfKey": true,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "version",
@@ -330,7 +361,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": true
+                                "isPartOfKey": true,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "metadata",
@@ -346,7 +378,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "createdon",
@@ -362,7 +395,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "createdby",
@@ -378,7 +412,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "createdfor",
@@ -394,7 +429,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -409,6 +445,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "mysql-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -418,6 +456,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {},
@@ -466,7 +509,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": true
+                                "isPartOfKey": true,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "urn",
@@ -482,7 +526,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "aspect",
@@ -498,7 +543,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "path",
@@ -514,7 +560,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "longVal",
@@ -530,7 +577,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "stringVal",
@@ -546,7 +594,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "doubleVal",
@@ -562,7 +611,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -577,6 +627,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "mysql-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -586,6 +638,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
@@ -637,7 +694,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "urn",
@@ -653,7 +711,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "path",
@@ -669,7 +728,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "doubleVal",
@@ -685,7 +745,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -700,6 +761,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "mysql-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -709,6 +772,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "northwind.customers",
@@ -748,7 +816,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": true
+                                "isPartOfKey": true,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "company",
@@ -764,7 +833,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "last_name",
@@ -780,7 +850,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "first_name",
@@ -796,7 +867,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "email_address",
@@ -812,7 +884,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "priority",
@@ -828,7 +901,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -843,6 +917,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "mysql-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -852,6 +928,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "northwind.orders",
@@ -891,7 +972,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": true
+                                "isPartOfKey": true,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "description",
@@ -907,7 +989,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "customer_id",
@@ -923,7 +1006,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -949,6 +1033,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "mysql-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -958,6 +1044,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,test_cases.test_empty,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "test_cases.test_empty",
@@ -997,7 +1088,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -1012,6 +1104,8 @@
     "systemMetadata": {
         "lastObserved": 1586847600000,
         "runId": "mysql-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
diff --git a/metadata-ingestion/tests/integration/mysql/mysql_to_file.yml b/metadata-ingestion/tests/integration/mysql/mysql_to_file.yml
index 4f4fb80c3ccf3..e3705ce5eaf82 100644
--- a/metadata-ingestion/tests/integration/mysql/mysql_to_file.yml
+++ b/metadata-ingestion/tests/integration/mysql/mysql_to_file.yml
@@ -20,7 +20,16 @@ source:
         - "^test_cases"
     profiling:
       enabled: True
-
+      include_field_null_count: true
+      include_field_min_value: true
+      include_field_max_value: true
+      include_field_mean_value: true
+      include_field_median_value: true
+      include_field_stddev_value: true
+      include_field_quantiles: true
+      include_field_distinct_value_frequencies: true
+      include_field_histogram: true
+      include_field_sample_values: true
 sink:
   type: file
   config:
diff --git a/metadata-ingestion/tests/integration/mysql/test_mysql.py b/metadata-ingestion/tests/integration/mysql/test_mysql.py
index 65b241b51a059..ec9a3110d90d1 100644
--- a/metadata-ingestion/tests/integration/mysql/test_mysql.py
+++ b/metadata-ingestion/tests/integration/mysql/test_mysql.py
@@ -1,10 +1,8 @@
 import pytest
-from click.testing import CliRunner
 from freezegun import freeze_time
 
-from datahub.entrypoints import datahub
-from tests.test_helpers import fs_helpers, mce_helpers
-from tests.test_helpers.click_helpers import assert_result_ok
+from tests.test_helpers import mce_helpers
+from tests.test_helpers.click_helpers import run_datahub_cmd
 from tests.test_helpers.docker_helpers import wait_for_port
 
 FROZEN_TIME = "2020-04-14 07:00:00"
@@ -21,17 +19,14 @@ def test_mysql_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
         wait_for_port(docker_services, "testmysql", 3306)
 
         # Run the metadata ingestion pipeline.
-        runner = CliRunner()
-        with fs_helpers.isolated_filesystem(tmp_path):
-            config_file = (test_resources_dir / "mysql_to_file.yml").resolve()
-            result = runner.invoke(
-                datahub, ["ingest", "--strict-warnings", "-c", f"{config_file}"]
-            )
-            assert_result_ok(result)
+        config_file = (test_resources_dir / "mysql_to_file.yml").resolve()
+        run_datahub_cmd(
+            ["ingest", "--strict-warnings", "-c", f"{config_file}"], tmp_path=tmp_path
+        )
 
-            # Verify the output.
-            mce_helpers.check_golden_file(
-                pytestconfig,
-                output_path="mysql_mces.json",
-                golden_path=test_resources_dir / "mysql_mces_golden.json",
-            )
+        # Verify the output.
+        mce_helpers.check_golden_file(
+            pytestconfig,
+            output_path=tmp_path / "mysql_mces.json",
+            golden_path=test_resources_dir / "mysql_mces_golden.json",
+        )
diff --git a/metadata-ingestion/tests/integration/openapi/test_openapi.py b/metadata-ingestion/tests/integration/openapi/test_openapi.py
index 25be93d89f367..ee9cf39804d91 100644
--- a/metadata-ingestion/tests/integration/openapi/test_openapi.py
+++ b/metadata-ingestion/tests/integration/openapi/test_openapi.py
@@ -1,10 +1,8 @@
 import pytest
-from click.testing import CliRunner
 from freezegun import freeze_time
 
-from datahub.entrypoints import datahub
-from tests.test_helpers import fs_helpers, mce_helpers
-from tests.test_helpers.click_helpers import assert_result_ok
+from tests.test_helpers import mce_helpers
+from tests.test_helpers.click_helpers import run_datahub_cmd
 
 FROZEN_TIME = "2020-04-14 07:00:00"
 
@@ -15,15 +13,12 @@ def test_openapi_ingest(pytestconfig, tmp_path):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/openapi"
 
     # Run the metadata ingestion pipeline.
-    runner = CliRunner()
-    with fs_helpers.isolated_filesystem(tmp_path):
-        config_file = (test_resources_dir / "openapi_to_file.yml").resolve()
-        result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"])
-        assert_result_ok(result)
+    config_file = (test_resources_dir / "openapi_to_file.yml").resolve()
+    run_datahub_cmd(["ingest", "-c", f"{config_file}"], tmp_path=tmp_path)
 
-        # Verify the output.
-        mce_helpers.check_golden_file(
-            pytestconfig,
-            output_path="/tmp/openapi_mces.json",
-            golden_path=test_resources_dir / "openapi_mces_golden.json",
-        )
+    # Verify the output.
+    mce_helpers.check_golden_file(
+        pytestconfig,
+        output_path="/tmp/openapi_mces.json",
+        golden_path=test_resources_dir / "openapi_mces_golden.json",
+    )
diff --git a/metadata-ingestion/tests/integration/sql_server/mssql_mces_golden.json b/metadata-ingestion/tests/integration/sql_server/mssql_mces_golden.json
index de71437229d58..2b9a70ec0a766 100644
--- a/metadata-ingestion/tests/integration/sql_server/mssql_mces_golden.json
+++ b/metadata-ingestion/tests/integration/sql_server/mssql_mces_golden.json
@@ -5,6 +5,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "DemoData.dbo.Products",
@@ -44,7 +49,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "ProductName",
@@ -60,7 +66,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -75,6 +82,8 @@
     "systemMetadata": {
         "lastObserved": 1615443388097,
         "runId": "mssql-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -84,6 +93,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "DemoData.Foo.Items",
@@ -123,7 +137,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "ItemName",
@@ -139,7 +154,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -154,6 +170,8 @@
     "systemMetadata": {
         "lastObserved": 1615443388097,
         "runId": "mssql-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -163,6 +181,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "DemoData.Foo.Persons",
@@ -202,7 +225,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": true
+                                "isPartOfKey": true,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "LastName",
@@ -218,7 +242,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "FirstName",
@@ -234,7 +259,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "Age",
@@ -250,7 +276,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -265,6 +292,8 @@
     "systemMetadata": {
         "lastObserved": 1615443388097,
         "runId": "mssql-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 },
@@ -274,6 +303,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "DemoData.Foo.SalesReason",
@@ -313,7 +347,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": true
+                                "isPartOfKey": true,
+                                "jsonProps": null
                             },
                             {
                                 "fieldPath": "Name",
@@ -329,7 +364,8 @@
                                 "recursive": false,
                                 "globalTags": null,
                                 "glossaryTerms": null,
-                                "isPartOfKey": false
+                                "isPartOfKey": false,
+                                "jsonProps": null
                             }
                         ],
                         "primaryKeys": null,
@@ -355,7 +391,9 @@
     "systemMetadata": {
         "lastObserved": 1615443388097,
         "runId": "mssql-test",
+        "registryName": null,
+        "registryVersion": null,
         "properties": null
     }
 }
-]
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
index c7ab9a23655c5..5750039b28511 100644
--- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
+++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
@@ -2,11 +2,9 @@
 import time
 
 import pytest
-from click.testing import CliRunner
 
-from datahub.entrypoints import datahub
-from tests.test_helpers import fs_helpers, mce_helpers
-from tests.test_helpers.click_helpers import assert_result_ok
+from tests.test_helpers import mce_helpers
+from tests.test_helpers.click_helpers import run_datahub_cmd
 from tests.test_helpers.docker_helpers import wait_for_port
 
 
@@ -33,14 +31,13 @@ def test_mssql_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
 
         # Run the metadata ingestion pipeline.
         config_file = (test_resources_dir / "mssql_to_file.yml").resolve()
-        runner = CliRunner()
-        with fs_helpers.isolated_filesystem(tmp_path):
-            result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"])
-            assert_result_ok(result)
-
-            # Verify the output.
-            mce_helpers.check_golden_file(
-                pytestconfig,
-                output_path="./mssql_mces.json",
-                golden_path=test_resources_dir / "mssql_mces_golden.json",
-            )
+        run_datahub_cmd(
+            ["ingest", "-c", f"{config_file}"], tmp_path=tmp_path, check_result=True
+        )
+
+        # Verify the output.
+        mce_helpers.check_golden_file(
+            pytestconfig,
+            output_path=tmp_path / "mssql_mces.json",
+            golden_path=test_resources_dir / "mssql_mces_golden.json",
+        )
diff --git a/metadata-ingestion/tests/integration/trino/test_trino.py b/metadata-ingestion/tests/integration/trino/test_trino.py
index cfa13a371ee2a..f8d605a7dc2e9 100644
--- a/metadata-ingestion/tests/integration/trino/test_trino.py
+++ b/metadata-ingestion/tests/integration/trino/test_trino.py
@@ -3,12 +3,10 @@
 
 import pytest
 import requests
-from click.testing import CliRunner
 from freezegun import freeze_time
 
-from datahub.entrypoints import datahub
 from tests.test_helpers import fs_helpers, mce_helpers
-from tests.test_helpers.click_helpers import assert_result_ok
+from tests.test_helpers.click_helpers import run_datahub_cmd
 from tests.test_helpers.docker_helpers import wait_for_port
 
 FROZEN_TIME = "2021-09-23 12:00:00"
@@ -38,13 +36,11 @@ def test_trino_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
         subprocess.run(command, shell=True, check=True)
 
         # Run the metadata ingestion pipeline.
-        runner = CliRunner()
         with fs_helpers.isolated_filesystem(tmp_path):
 
             # Run the metadata ingestion pipeline for trino catalog referring to postgres database
             config_file = (test_resources_dir / "trino_to_file.yml").resolve()
-            result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"])
-            assert_result_ok(result)
+            run_datahub_cmd(["ingest", "-c", f"{config_file}"])
             # Verify the output.
             mce_helpers.check_golden_file(
                 pytestconfig,
@@ -58,8 +54,7 @@ def test_trino_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
 
             # Run the metadata ingestion pipeline for trino catalog referring to hive database
             config_file = (test_resources_dir / "trino_hive_to_file.yml").resolve()
-            result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"])
-            assert_result_ok(result)
+            run_datahub_cmd(["ingest", "-c", f"{config_file}"])
 
             # Verify the output.
             mce_helpers.check_golden_file(
diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json
index 45f0da917fbe7..b978c38c6ba1c 100644
--- a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json
+++ b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json
@@ -5,6 +5,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
@@ -15,7 +20,7 @@
                             "numrows": "1",
                             "rawdatasize": "32",
                             "totalsize": "33",
-                            "transient_lastddltime": "1638688532"
+                            "transient_lastddltime": "1639443036"
                         },
                         "externalUrl": null,
                         "description": "This table has array of structs",
@@ -144,6 +149,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
@@ -152,7 +162,7 @@
                             "numrows": "0",
                             "rawdatasize": "0",
                             "totalsize": "0",
-                            "transient_lastddltime": "1638688536"
+                            "transient_lastddltime": "1639443040"
                         },
                         "externalUrl": null,
                         "description": null,
@@ -246,6 +256,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
@@ -254,7 +269,7 @@
                             "numrows": "0",
                             "rawdatasize": "0",
                             "totalsize": "0",
-                            "transient_lastddltime": "1638688535"
+                            "transient_lastddltime": "1639443039"
                         },
                         "externalUrl": null,
                         "description": null,
@@ -413,10 +428,15 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "transient_lastddltime": "1638688524"
+                            "transient_lastddltime": "1639443026"
                         },
                         "externalUrl": null,
                         "description": null,
@@ -524,6 +544,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
@@ -532,7 +557,7 @@
                             "numrows": "0",
                             "rawdatasize": "0",
                             "totalsize": "0",
-                            "transient_lastddltime": "1638688528"
+                            "transient_lastddltime": "1639443031"
                         },
                         "externalUrl": null,
                         "description": null,
@@ -659,12 +684,17 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "numfiles": "0",
                             "totalsize": "0",
-                            "transient_lastddltime": "1638688535"
+                            "transient_lastddltime": "1639443039"
                         },
                         "externalUrl": null,
                         "description": null,
@@ -791,6 +821,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
@@ -799,7 +834,7 @@
                             "numrows": "0",
                             "rawdatasize": "0",
                             "totalsize": "0",
-                            "transient_lastddltime": "1638688527"
+                            "transient_lastddltime": "1639443031"
                         },
                         "externalUrl": null,
                         "description": null,
@@ -890,6 +925,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
@@ -898,7 +938,7 @@
                             "numrows": "0",
                             "rawdatasize": "0",
                             "totalsize": "0",
-                            "transient_lastddltime": "1638688536"
+                            "transient_lastddltime": "1639443040"
                         },
                         "externalUrl": null,
                         "description": null,
@@ -1093,10 +1133,15 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "transient_lastddltime": "1638688535",
+                            "transient_lastddltime": "1639443039",
                             "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"",
                             "is_view": "True"
                         },
diff --git a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json
index 2f5130fb996ce..66f8bde4c3b87 100644
--- a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json
+++ b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json
@@ -5,6 +5,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "library_catalog.librarydb.book",
@@ -158,6 +163,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "library_catalog.librarydb.issue_history",
@@ -275,6 +285,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "library_catalog.librarydb.member",
@@ -358,6 +373,11 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
             "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)",
             "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                         "schemaName": "library_catalog.librarydb.book_in_circulation",
diff --git a/metadata-ingestion/tests/integration/trino/trino_to_file.yml b/metadata-ingestion/tests/integration/trino/trino_to_file.yml
index 98a19e4c7670a..b09324f9cd3e0 100644
--- a/metadata-ingestion/tests/integration/trino/trino_to_file.yml
+++ b/metadata-ingestion/tests/integration/trino/trino_to_file.yml
@@ -20,6 +20,16 @@ source:
         - "library_catalog.librarydb.*"   
     profiling:
       enabled: True
+      include_field_null_count: true
+      include_field_min_value: true
+      include_field_max_value: true
+      include_field_mean_value: true
+      include_field_median_value: true
+      include_field_stddev_value: true
+      include_field_quantiles: true
+      include_field_distinct_value_frequencies: true
+      include_field_histogram: true
+      include_field_sample_values: true
 
 sink:
   type: file
diff --git a/metadata-ingestion/tests/test_helpers/click_helpers.py b/metadata-ingestion/tests/test_helpers/click_helpers.py
index 16a74abd83790..e6ee16be20cb2 100644
--- a/metadata-ingestion/tests/test_helpers/click_helpers.py
+++ b/metadata-ingestion/tests/test_helpers/click_helpers.py
@@ -1,7 +1,29 @@
-from click.testing import Result
+from pathlib import Path
+from typing import List, Optional
+
+from click.testing import CliRunner, Result
+
+from datahub.entrypoints import datahub
+from tests.test_helpers import fs_helpers
 
 
 def assert_result_ok(result: Result) -> None:
     if result.exception:
         raise result.exception
     assert result.exit_code == 0
+
+
+def run_datahub_cmd(
+    command: List[str], tmp_path: Optional[Path] = None, check_result: bool = True
+) -> Result:
+    runner = CliRunner()
+
+    if tmp_path is None:
+        result = runner.invoke(datahub, command)
+    else:
+        with fs_helpers.isolated_filesystem(tmp_path):
+            result = runner.invoke(datahub, command)
+
+    if check_result:
+        assert_result_ok(result)
+    return result
diff --git a/metadata-ingestion/tests/test_helpers/fs_helpers.py b/metadata-ingestion/tests/test_helpers/fs_helpers.py
index 47123ab34e0e5..91e67ea6b5f10 100644
--- a/metadata-ingestion/tests/test_helpers/fs_helpers.py
+++ b/metadata-ingestion/tests/test_helpers/fs_helpers.py
@@ -1,9 +1,11 @@
 import contextlib
 import os
+import pathlib
+from typing import Iterator
 
 
 @contextlib.contextmanager
-def isolated_filesystem(temp_dir):
+def isolated_filesystem(temp_dir: pathlib.Path) -> Iterator[None]:
     cwd = os.getcwd()
 
     os.chdir(temp_dir)
diff --git a/metadata-ingestion/tests/unit/serde/test_serde.py b/metadata-ingestion/tests/unit/serde/test_serde.py
index a68666ea38acf..1c72a37e9efcf 100644
--- a/metadata-ingestion/tests/unit/serde/test_serde.py
+++ b/metadata-ingestion/tests/unit/serde/test_serde.py
@@ -4,17 +4,16 @@
 
 import fastavro
 import pytest
-from click.testing import CliRunner
 from freezegun import freeze_time
 
 import datahub.metadata.schema_classes as models
 from datahub.cli.json_file import check_mce_file
-from datahub.entrypoints import datahub
 from datahub.ingestion.run.pipeline import Pipeline
 from datahub.ingestion.source.file import iterate_mce_file
 from datahub.metadata.schema_classes import MetadataChangeEventClass
 from datahub.metadata.schemas import getMetadataChangeEventSchema
 from tests.test_helpers import mce_helpers
+from tests.test_helpers.click_helpers import run_datahub_cmd
 from tests.test_helpers.type_helpers import PytestConfig
 
 FROZEN_TIME = "2021-07-22 18:54:06"
@@ -115,9 +114,7 @@ def test_serde_to_avro(pytestconfig: PytestConfig, json_filename: str) -> None:
 def test_check_mce_schema(pytestconfig: PytestConfig, json_filename: str) -> None:
     json_file_path = pytestconfig.rootpath / json_filename
 
-    runner = CliRunner()
-    result = runner.invoke(datahub, ["check", "mce-file", f"{json_file_path}"])
-    assert result.exit_code == 0
+    run_datahub_cmd(["check", "mce-file", f"{json_file_path}"])
 
 
 @pytest.mark.parametrize(
diff --git a/metadata-ingestion/tests/unit/test_check.py b/metadata-ingestion/tests/unit/test_check.py
index bc6a31c47478c..f0978561fe8fe 100644
--- a/metadata-ingestion/tests/unit/test_check.py
+++ b/metadata-ingestion/tests/unit/test_check.py
@@ -1,17 +1,13 @@
-from click.testing import CliRunner
-
-from datahub.entrypoints import datahub
+from tests.test_helpers.click_helpers import run_datahub_cmd
 
 
 def test_cli_help():
-    runner = CliRunner()
-    result = runner.invoke(datahub, ["--help"])
+    result = run_datahub_cmd(["--help"])
     assert result.output
 
 
 def test_cli_version():
-    runner = CliRunner()
-    result = runner.invoke(datahub, ["--debug", "version"])
+    result = run_datahub_cmd(["--debug", "version"])
     assert result.output
 
 
@@ -19,6 +15,5 @@ def test_check_local_docker():
     # This just verifies that it runs without error.
     # We don't actually know what environment this will be run in, so
     # we can't depend on the output. Eventually, we should mock the docker SDK.
-    runner = CliRunner()
-    result = runner.invoke(datahub, ["check", "local-docker"])
+    result = run_datahub_cmd(["check", "local-docker"], check_result=False)
     assert result.output
diff --git a/metadata-ingestion/tests/unit/test_plugin_system.py b/metadata-ingestion/tests/unit/test_plugin_system.py
index ae15685435d69..a74419fb57d5f 100644
--- a/metadata-ingestion/tests/unit/test_plugin_system.py
+++ b/metadata-ingestion/tests/unit/test_plugin_system.py
@@ -1,8 +1,6 @@
 import pytest
-from click.testing import CliRunner
 
 from datahub.configuration.common import ConfigurationError
-from datahub.entrypoints import datahub
 from datahub.ingestion.api.registry import PluginRegistry
 from datahub.ingestion.api.sink import Sink
 from datahub.ingestion.extractor.extractor_registry import extractor_registry
@@ -10,7 +8,7 @@
 from datahub.ingestion.sink.sink_registry import sink_registry
 from datahub.ingestion.source.source_registry import source_registry
 from datahub.ingestion.transformer.transform_registry import transform_registry
-from tests.test_helpers.click_helpers import assert_result_ok
+from tests.test_helpers.click_helpers import run_datahub_cmd
 
 
 @pytest.mark.parametrize(
@@ -32,12 +30,10 @@ def test_registry_nonempty(registry):
 )
 def test_list_all(verbose: bool) -> None:
     # This just verifies that it runs without error.
-    runner = CliRunner()
     args = ["check", "plugins"]
     if verbose:
         args.append("--verbose")
-    result = runner.invoke(datahub, args)
-    assert_result_ok(result)
+    result = run_datahub_cmd(args)
     assert len(result.output.splitlines()) > 20
 
 
diff --git a/metadata-ingestion/tests/unit/test_sql_common.py b/metadata-ingestion/tests/unit/test_sql_common.py
new file mode 100644
index 0000000000000..9378518062af7
--- /dev/null
+++ b/metadata-ingestion/tests/unit/test_sql_common.py
@@ -0,0 +1,75 @@
+from typing import Dict
+from unittest.mock import Mock
+
+from sqlalchemy.engine.reflection import Inspector
+
+from datahub.ingestion.api.source import Source
+from datahub.ingestion.source.sql.sql_common import (
+    PipelineContext,
+    SQLAlchemyConfig,
+    SQLAlchemySource,
+)
+
+
+class TestSQLAlchemyConfig(SQLAlchemyConfig):
+    def get_sql_alchemy_url(self):
+        pass
+
+
+class TestSQLAlchemySource(SQLAlchemySource):
+    @classmethod
+    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
+        pass
+
+
+def test_generate_foreign_key():
+    config: SQLAlchemyConfig = TestSQLAlchemyConfig()
+    ctx: PipelineContext = PipelineContext(run_id="test_ctx")
+    platform: str = "TEST"
+    inspector: Inspector = Mock()
+    source = TestSQLAlchemySource(config=config, ctx=ctx, platform=platform)
+    fk_dict: Dict[str, str] = {
+        "name": "test_constraint",
+        "referred_table": "test_table",
+        "referred_schema": "test_referred_schema",
+        "constrained_columns": ["test_column"],  # type: ignore
+        "referred_columns": ["test_referred_column"],  # type: ignore
+    }
+    foreign_key = source.get_foreign_key_metadata(
+        dataset_urn="test_urn",
+        schema="test_schema",
+        fk_dict=fk_dict,
+        inspector=inspector,
+    )
+
+    assert fk_dict.get("name") == foreign_key.name
+    assert [
+        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:TEST,test_referred_schema.test_table,PROD),test_referred_column)"
+    ] == foreign_key.foreignFields
+    assert ["urn:li:schemaField:(test_urn,test_column)"] == foreign_key.sourceFields
+
+
+def test_use_source_schema_for_foreign_key_if_not_specified():
+    config: SQLAlchemyConfig = TestSQLAlchemyConfig()
+    ctx: PipelineContext = PipelineContext(run_id="test_ctx")
+    platform: str = "TEST"
+    inspector: Inspector = Mock()
+    source = TestSQLAlchemySource(config=config, ctx=ctx, platform=platform)
+    fk_dict: Dict[str, str] = {
+        "name": "test_constraint",
+        "referred_table": "test_table",
+        "constrained_columns": ["test_column"],  # type: ignore
+        "referred_columns": ["test_referred_column"],  # type: ignore
+    }
+    foreign_key = source.get_foreign_key_metadata(
+        dataset_urn="test_urn",
+        schema="test_schema",
+        fk_dict=fk_dict,
+        inspector=inspector,
+    )
+
+    assert fk_dict.get("name") == foreign_key.name
+    assert [
+        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:TEST,test_schema.test_table,PROD),test_referred_column)"
+    ] == foreign_key.foreignFields
+    assert ["urn:li:schemaField:(test_urn,test_column)"] == foreign_key.sourceFields
diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle
index 39b193b6c2da8..2825d899efb48 100644
--- a/metadata-io/build.gradle
+++ b/metadata-io/build.gradle
@@ -47,11 +47,11 @@ dependencies {
   testAnnotationProcessor externalDependency.lombok
 
   constraints {
-    implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
-    implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
   }
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java
index bbce1aba75e65..78c28290c51f5 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java
@@ -4,27 +4,29 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.metadata.aspect.EnvelopedAspect;
 import com.linkedin.metadata.query.filter.Filter;
+import com.linkedin.timeseries.AggregationSpec;
+import com.linkedin.timeseries.GenericTable;
+import com.linkedin.timeseries.GroupingBucket;
 import java.util.List;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
-import com.linkedin.timeseries.GroupingBucket;
-import com.linkedin.timeseries.GenericTable;
-import com.linkedin.timeseries.AggregationSpec;
 
 
 public interface TimeseriesAspectService {
 
   void configure();
 
-  void upsertDocument(@Nonnull String entityName, @Nonnull String aspectName, @Nonnull String docId, @Nonnull JsonNode document);
+  void upsertDocument(@Nonnull String entityName, @Nonnull String aspectName, @Nonnull String docId,
+      @Nonnull JsonNode document);
 
   List<EnvelopedAspect> getAspectValues(@Nonnull final Urn urn, @Nonnull String entityName, @Nonnull String aspectName,
-      @Nullable Long startTimeMillis, @Nullable Long endTimeMillis, @Nullable Integer limit);
+      @Nullable Long startTimeMillis, @Nullable Long endTimeMillis, @Nullable Integer limit,
+      @Nullable Boolean getLatestValue, @Nullable Filter filter);
 
   /**
    * Get the aggregated metrics for the given dataset or column from a time series aspect.
    */
   @Nonnull
-  GenericTable getAggregatedStats(@Nonnull String entityName, @Nonnull String aspectName, @Nonnull AggregationSpec[] aggregationSpecs,
-      @Nullable Filter filter, @Nullable GroupingBucket[] groupingBuckets);
+  GenericTable getAggregatedStats(@Nonnull String entityName, @Nonnull String aspectName,
+      @Nonnull AggregationSpec[] aggregationSpecs, @Nullable Filter filter, @Nullable GroupingBucket[] groupingBuckets);
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
index fa1606a28e90c..116ce142d6721 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
@@ -119,9 +119,9 @@ public void upsertDocument(@Nonnull String entityName, @Nonnull String aspectNam
 
   @Override
   public List<EnvelopedAspect> getAspectValues(@Nonnull final Urn urn, @Nonnull String entityName,
-      @Nonnull String aspectName, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis,
-      @Nullable Integer limit) {
-    final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery(null);
+      @Nonnull String aspectName, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis, @Nullable Integer limit,
+      @Nullable Boolean getLatestValue, @Nullable Filter filter) {
+    final BoolQueryBuilder filterQueryBuilder = QueryBuilders.boolQuery().must(ESUtils.buildFilterQuery(filter));
     filterQueryBuilder.must(QueryBuilders.matchQuery("urn", urn.toString()));
     // NOTE: We are interested only in the un-exploded rows as only they carry the `event` payload.
     filterQueryBuilder.mustNot(QueryBuilders.termQuery(MappingsBuilder.IS_EXPLODED_FIELD, true));
@@ -139,6 +139,12 @@ public List<EnvelopedAspect> getAspectValues(@Nonnull final Urn urn, @Nonnull St
     }
     final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
     searchSourceBuilder.query(filterQueryBuilder);
+    if (getLatestValue != null && getLatestValue) {
+      if (limit != null && limit > 1) {
+        log.warn(String.format("Changing limit from %s to 1, since getLatestValue is true", limit));
+      }
+      limit = 1;
+    }
     searchSourceBuilder.size(limit != null ? limit : DEFAULT_LIMIT);
     searchSourceBuilder.sort(SortBuilders.fieldSort("@timestamp").order(SortOrder.DESC));
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/MappingsBuilder.java
index 80802c96b92b6..df6ec6ba87896 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/MappingsBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/MappingsBuilder.java
@@ -12,6 +12,7 @@
 public class MappingsBuilder {
 
   public static final String URN_FIELD = "urn";
+  public static final String MESSAGE_ID_FIELD = "messageId";
   public static final String TIMESTAMP_FIELD = "@timestamp";
   public static final String TIMESTAMP_MILLIS_FIELD = "timestampMillis";
   public static final String EVENT_GRANULARITY = "eventGranularity";
@@ -31,6 +32,7 @@ public static Map<String, Object> getMappings(@Nonnull final AspectSpec aspectSp
     Map<String, Object> mappings = new HashMap<>();
 
     mappings.put(URN_FIELD, ImmutableMap.of("type", "keyword"));
+    mappings.put(MESSAGE_ID_FIELD, ImmutableMap.of("type", "keyword"));
     mappings.put(TIMESTAMP_FIELD, ImmutableMap.of("type", "date"));
     mappings.put(TIMESTAMP_MILLIS_FIELD, ImmutableMap.of("type", "date"));
     mappings.put(EVENT_GRANULARITY, ImmutableMap.of("type", "keyword"));
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java
index d73ef2d2b04db..fee42af32a1cd 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java
@@ -171,6 +171,11 @@ private static DataSchema.Type getTimeseriesFieldType(AspectSpec aspectSpec, Str
     if (fieldPath.equals(ES_FIELD_TIMESTAMP)) {
       return DataSchema.Type.LONG;
     }
+    /* TODO: Remove if not needed after merge.
+    if (fieldPath.equals(MappingsBuilder.EVENT_GRANULARITY)) {
+      return DataSchema.Type.RECORD;
+    }
+    */
     String[] memberParts = fieldPath.split("\\.");
     if (memberParts.length == 1) {
       // Search in the timeseriesFieldSpecs.
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java
index 4fee0e59d3a99..39c903cae8f38 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/transformer/TimeseriesAspectTransformer.java
@@ -81,8 +81,17 @@ private static ObjectNode getCommonDocument(@Nonnull final Urn urn, final Record
         (Long) timeseriesAspect.data().get(MappingsBuilder.TIMESTAMP_MILLIS_FIELD));
     Object eventGranularity = timeseriesAspect.data().get(MappingsBuilder.EVENT_GRANULARITY);
     if (eventGranularity != null) {
-      document.put(MappingsBuilder.EVENT_GRANULARITY, eventGranularity.toString());
+      try {
+        document.put(MappingsBuilder.EVENT_GRANULARITY, OBJECT_MAPPER.writeValueAsString(eventGranularity));
+      } catch (JsonProcessingException e) {
+        throw new IllegalArgumentException("Failed to convert eventGranulairty to Json string!", e);
+      }
     }
+    String messageId = (String) timeseriesAspect.data().get(MappingsBuilder.MESSAGE_ID_FIELD);
+    if (messageId != null) {
+      document.put(MappingsBuilder.MESSAGE_ID_FIELD, messageId);
+    }
+
     return document;
   }
 
@@ -185,6 +194,10 @@ private static String getDocId(@Nonnull JsonNode document, String collectionId)
     if (collectionId != null) {
       docId += collectionId;
     }
+    JsonNode messageId = document.get(MappingsBuilder.MESSAGE_ID_FIELD);
+    if (messageId != null) {
+      docId += messageId.toString();
+    }
 
     return DigestUtils.md5Hex(docId);
   }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java
index 77d81c4f84a76..59e48d4da86c6 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java
@@ -19,6 +19,7 @@
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.query.filter.Condition;
 import com.linkedin.metadata.query.filter.Criterion;
+import com.linkedin.metadata.query.filter.CriterionArray;
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.search.elasticsearch.ElasticSearchServiceTest;
 import com.linkedin.metadata.search.utils.QueryUtils;
@@ -51,10 +52,9 @@
 import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;
 
-import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
-import static com.linkedin.metadata.ElasticSearchTestUtils.syncAfterWrite;
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertNotNull;
+import static com.linkedin.metadata.DockerTestUtils.*;
+import static com.linkedin.metadata.ElasticSearchTestUtils.*;
+import static org.testng.Assert.*;
 
 
 public class ElasticSearchTimeseriesAspectServiceTest {
@@ -129,22 +129,23 @@ public void tearDown() {
    * Tests for upsertDocument API
    */
 
-  private void upsertDocument(TestEntityProfile dp) throws JsonProcessingException {
-    Map<String, JsonNode> documents = TimeseriesAspectTransformer.transform(TEST_URN, dp, _aspectSpec, null);
+  private void upsertDocument(TestEntityProfile dp, Urn urn) throws JsonProcessingException {
+    Map<String, JsonNode> documents = TimeseriesAspectTransformer.transform(urn, dp, _aspectSpec, null);
     assertEquals(documents.size(), 3);
-    documents.entrySet().forEach(document -> {
-      _elasticSearchTimeseriesAspectService.upsertDocument(ENTITY_NAME, ASPECT_NAME, document.getKey(),
-          document.getValue());
-    });
+    documents.forEach(
+        (key, value) -> _elasticSearchTimeseriesAspectService.upsertDocument(ENTITY_NAME, ASPECT_NAME, key, value));
   }
 
-  private TestEntityProfile makeTestProfile(long eventTime, long stat) {
+  private TestEntityProfile makeTestProfile(long eventTime, long stat, String messageId) {
     TestEntityProfile testEntityProfile = new TestEntityProfile();
     testEntityProfile.setTimestampMillis(eventTime);
     testEntityProfile.setStat(stat);
     testEntityProfile.setStrStat(String.valueOf(stat));
     testEntityProfile.setStrArray(new StringArray("sa_" + stat, "sa_" + (stat + 1)));
     testEntityProfile.setEventGranularity(new TimeWindowSize().setUnit(CalendarInterval.DAY).setMultiple(1));
+    if (messageId != null) {
+      testEntityProfile.setMessageId(messageId);
+    }
 
     // Add a couple of component profiles with cooked up stats.
     TestEntityComponentProfile componentProfile1 = new TestEntityComponentProfile();
@@ -163,9 +164,10 @@ public void testUpsertProfiles() throws Exception {
     _startTime = Calendar.getInstance().getTimeInMillis();
     _startTime = _startTime - _startTime % 86400000;
     // Create the testEntity profiles that we would like to use for testing.
-    TestEntityProfile firstProfile = makeTestProfile(_startTime, 20);
+    TestEntityProfile firstProfile = makeTestProfile(_startTime, 20, null);
     Stream<TestEntityProfile> testEntityProfileStream = Stream.iterate(firstProfile,
-        (TestEntityProfile prev) -> makeTestProfile(prev.getTimestampMillis() + TIME_INCREMENT, prev.getStat() + 10));
+        (TestEntityProfile prev) -> makeTestProfile(prev.getTimestampMillis() + TIME_INCREMENT, prev.getStat() + 10,
+            null));
 
     _testEntityProfiles = testEntityProfileStream.limit(NUM_PROFILES)
         .collect(Collectors.toMap(TestEntityProfile::getTimestampMillis, Function.identity()));
@@ -177,13 +179,44 @@ public void testUpsertProfiles() throws Exception {
     // Upsert the documents into the index.
     _testEntityProfiles.values().forEach(x -> {
       try {
-        upsertDocument(x);
+        upsertDocument(x, TEST_URN);
+      } catch (JsonProcessingException jsonProcessingException) {
+        jsonProcessingException.printStackTrace();
+      }
+    });
+
+    syncAfterWrite(_searchClient);
+  }
+
+  @Test(groups = "upsertUniqueMessageId")
+  public void testUpsertProfilesWithUniqueMessageIds() throws Exception {
+    // Create the testEntity profiles that have the same value for timestampMillis, but use unique message ids.
+    // We should preserve all the documents we are going to upsert in the index.
+    final long curTimeMillis = Calendar.getInstance().getTimeInMillis();
+    final long startTime = curTimeMillis - curTimeMillis % 86400000;
+    final TestEntityProfile firstProfile = makeTestProfile(startTime, 20, "20");
+    Stream<TestEntityProfile> testEntityProfileStream = Stream.iterate(firstProfile,
+        (TestEntityProfile prev) -> makeTestProfile(prev.getTimestampMillis(), prev.getStat() + 10,
+            String.valueOf(prev.getStat() + 10)));
+
+    final List<TestEntityProfile> testEntityProfiles = testEntityProfileStream.limit(3).collect(Collectors.toList());
+
+    // Upsert the documents into the index.
+    final Urn urn = new TestEntityUrn("acryl", "testElasticSearchTimeseriesAspectService", "table2");
+    testEntityProfiles.forEach(x -> {
+      try {
+        upsertDocument(x, urn);
       } catch (JsonProcessingException jsonProcessingException) {
         jsonProcessingException.printStackTrace();
       }
     });
 
     syncAfterWrite(_searchClient);
+
+    List<EnvelopedAspect> resultAspects =
+        _elasticSearchTimeseriesAspectService.getAspectValues(urn, ENTITY_NAME, ASPECT_NAME, null, null,
+            testEntityProfiles.size(), false, null);
+    assertEquals(resultAspects.size(), testEntityProfiles.size());
   }
 
   /*
@@ -209,16 +242,27 @@ private void validateAspectValues(List<EnvelopedAspect> aspects, long numResults
   public void testGetAspectTimeseriesValuesAll() {
     List<EnvelopedAspect> resultAspects =
         _elasticSearchTimeseriesAspectService.getAspectValues(TEST_URN, ENTITY_NAME, ASPECT_NAME, null, null,
-            NUM_PROFILES);
+            NUM_PROFILES, false, null);
     validateAspectValues(resultAspects, NUM_PROFILES);
   }
 
+  @Test(groups = "getAspectValues", dependsOnGroups = "upsert")
+  public void testGetAspectTimeseriesValuesWithFilter() {
+    Filter filter = new Filter();
+    Criterion hasStatEqualsTwenty = new Criterion().setField("stat").setCondition(Condition.EQUAL).setValue("20");
+    filter.setCriteria(new CriterionArray(hasStatEqualsTwenty));
+    List<EnvelopedAspect> resultAspects =
+        _elasticSearchTimeseriesAspectService.getAspectValues(TEST_URN, ENTITY_NAME, ASPECT_NAME, null, null,
+            NUM_PROFILES, false, filter);
+    validateAspectValues(resultAspects, 1);
+  }
+
   @Test(groups = "getAspectValues", dependsOnGroups = "upsert")
   public void testGetAspectTimeseriesValuesSubRangeInclusiveOverlap() {
     int expectedNumRows = 10;
     List<EnvelopedAspect> resultAspects =
         _elasticSearchTimeseriesAspectService.getAspectValues(TEST_URN, ENTITY_NAME, ASPECT_NAME, _startTime,
-            _startTime + TIME_INCREMENT * (expectedNumRows - 1), expectedNumRows);
+            _startTime + TIME_INCREMENT * (expectedNumRows - 1), expectedNumRows, false, null);
     validateAspectValues(resultAspects, expectedNumRows);
   }
 
@@ -228,7 +272,17 @@ public void testGetAspectTimeseriesValuesSubRangeExclusiveOverlap() {
     List<EnvelopedAspect> resultAspects =
         _elasticSearchTimeseriesAspectService.getAspectValues(TEST_URN, ENTITY_NAME, ASPECT_NAME,
             _startTime + TIME_INCREMENT / 2, _startTime + TIME_INCREMENT * expectedNumRows + TIME_INCREMENT / 2,
-            expectedNumRows);
+            expectedNumRows, false, null);
+    validateAspectValues(resultAspects, expectedNumRows);
+  }
+
+  @Test(groups = "getAspectValues", dependsOnGroups = "upsert")
+  public void testGetAspectTimeseriesValuesSubRangeExclusiveOverlapLatestValueOnly() {
+    int expectedNumRows = 1;
+    List<EnvelopedAspect> resultAspects =
+        _elasticSearchTimeseriesAspectService.getAspectValues(TEST_URN, ENTITY_NAME, ASPECT_NAME,
+            _startTime + TIME_INCREMENT / 2, _startTime + TIME_INCREMENT * expectedNumRows + TIME_INCREMENT / 2,
+            expectedNumRows, true, null);
     validateAspectValues(resultAspects, expectedNumRows);
   }
 
@@ -237,7 +291,7 @@ public void testGetAspectTimeseriesValuesExactlyOneResponse() {
     int expectedNumRows = 1;
     List<EnvelopedAspect> resultAspects =
         _elasticSearchTimeseriesAspectService.getAspectValues(TEST_URN, ENTITY_NAME, ASPECT_NAME,
-            _startTime + TIME_INCREMENT / 2, _startTime + TIME_INCREMENT * 3 / 2, expectedNumRows);
+            _startTime + TIME_INCREMENT / 2, _startTime + TIME_INCREMENT * 3 / 2, expectedNumRows, false, null);
     validateAspectValues(resultAspects, expectedNumRows);
   }
 
@@ -246,7 +300,7 @@ public void testGetAspectTimeseriesValueMissingUrn() {
     Urn nonExistingUrn = new TestEntityUrn("missing", "missing", "missing");
     List<EnvelopedAspect> resultAspects =
         _elasticSearchTimeseriesAspectService.getAspectValues(nonExistingUrn, ENTITY_NAME, ASPECT_NAME, null, null,
-            NUM_PROFILES);
+            NUM_PROFILES, false, null);
     validateAspectValues(resultAspects, 0);
   }
 
@@ -537,7 +591,7 @@ public void testGetAggregatedStatsSumStatForFirst10HoursOfDay1() {
     // Validate rows
     assertNotNull(resultTable.getRows());
     assertEquals(resultTable.getRows().size(), 1);
-    // value is 20+30+40+... upto 10 terms = 650
+    // value is 20+30+40+... up to 10 terms = 650
     // TODO: Compute this caching the documents.
     assertEquals(resultTable.getRows(),
         new StringArrayArray(new StringArray(_startTime.toString(), String.valueOf(650))));
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
index cefe8b573b4b6..f4dabd32a7321 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
@@ -57,8 +57,7 @@
 import org.springframework.kafka.annotation.KafkaListener;
 import org.springframework.stereotype.Component;
 
-import static com.linkedin.metadata.search.utils.QueryUtils.createRelationshipFilter;
-import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter;
+import static com.linkedin.metadata.search.utils.QueryUtils.*;
 
 
 @Slf4j
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datajob/JobStatus.pdl b/metadata-models/src/main/pegasus/com/linkedin/datajob/JobStatus.pdl
index 375cefcadf601..776bc1ee85baa 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/datajob/JobStatus.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/datajob/JobStatus.pdl
@@ -39,4 +39,9 @@ enum JobStatus {
    * Jobs with unknown status (either unmappable or unavailable)
    */  
   UNKNOWN
+
+  /**
+   * Jobs that have been skipped.
+   */
+  SKIPPED
 }
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datajob/datahub/DatahubIngestionCheckpoint.pdl b/metadata-models/src/main/pegasus/com/linkedin/datajob/datahub/DatahubIngestionCheckpoint.pdl
new file mode 100644
index 0000000000000..b12a81e0e6676
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datajob/datahub/DatahubIngestionCheckpoint.pdl
@@ -0,0 +1,45 @@
+namespace com.linkedin.datajob.datahub
+
+import com.linkedin.timeseries.TimeseriesAspectBase
+import com.linkedin.timeseries.TimeWindowSize
+
+/**
+ * Checkpoint of a datahub ingestion run for a given job.
+ */
+@Aspect = {
+  "name": "datahubIngestionCheckpoint",
+  "type": "timeseries",
+}
+record DatahubIngestionCheckpoint includes TimeseriesAspectBase {
+
+  /**
+   * The name of the pipeline that ran ingestion, a stable unique user provided identifier.
+   *  e.g. my_snowflake1-to-datahub.
+   */
+  @TimeseriesField = {}
+  pipelineName: string
+
+
+  /**
+   * The id of the instance against which the ingestion pipeline ran.
+   * e.g.: Bigquery project ids, MySQL hostnames etc.
+   */
+  @TimeseriesField = {}
+  platformInstanceId: string
+
+  /**
+   * Json-encoded string representation of the non-secret members of the config .
+   */
+  config: string
+
+  /**
+   * Opaque blob of the state representation.
+   */
+  state: IngestionCheckpointState
+
+  /**
+   * The run identifier of this job.
+   */
+  @TimeseriesField = {}
+  runId: string
+}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datajob/datahub/DatahubIngestionRunSummary.pdl b/metadata-models/src/main/pegasus/com/linkedin/datajob/datahub/DatahubIngestionRunSummary.pdl
new file mode 100644
index 0000000000000..06a727385ba85
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datajob/datahub/DatahubIngestionRunSummary.pdl
@@ -0,0 +1,155 @@
+namespace com.linkedin.datajob.datahub
+
+import com.linkedin.timeseries.TimeseriesAspectBase
+import com.linkedin.timeseries.TimeWindowSize
+import com.linkedin.datajob.JobStatus
+
+/**
+ * Summary of a datahub ingestion run for a given platform.
+ */
+@Aspect = {
+  "name": "datahubIngestionRunSummary",
+  "type": "timeseries",
+}
+record DatahubIngestionRunSummary includes TimeseriesAspectBase {
+  /**
+   * The name of the pipeline that ran ingestion, a stable unique user provided identifier.
+   *  e.g. my_snowflake1-to-datahub.
+   */
+  @TimeseriesField = {}
+  pipelineName: string
+
+  /**
+   * The id of the instance against which the ingestion pipeline ran.
+   * e.g.: Bigquery project ids, MySQL hostnames etc.
+   */
+  @TimeseriesField = {}
+  platformInstanceId: string
+
+  /**
+   * The runId for this pipeline instance.
+   */
+  @TimeseriesField = {}
+  runId: string
+
+  /**
+   * Run Status - Succeeded/Skipped/Failed etc.
+   */
+  @TimeseriesField = {}
+  runStatus: JobStatus
+
+
+  ////////////////////////////////////////////////////////////////////////////////
+  // The ingestion metrics specific data such as numWorkUnits,
+  //   numEntities, numAspects, latency etc.
+  ////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * The number of workunits written to sink.
+   */
+  numWorkUnitsCommitted: optional long
+
+  /**
+   * The number of workunits that are produced.
+   */
+  numWorkUnitsCreated: optional long
+
+  /**
+   * The number of events produced (MCE + MCP).
+   */
+  numEvents: optional long
+
+  /**
+   * The total number of entities produced (unique entity urns).
+   */
+  numEntities: optional long
+
+  /**
+   * The total number of aspects produced across all entities.
+   */
+  numAspects: optional long
+
+  /**
+   * Total number of source API calls.
+   */
+   numSourceAPICalls: optional long
+
+  /**
+   * Total latency across all source API calls.
+   */
+  totalLatencySourceAPICalls: optional long
+
+  /**
+   * Total number of sink API calls.
+   */
+   numSinkAPICalls: optional long
+
+  /**
+   * Total latency across all sink API calls.
+   */
+  totalLatencySinkAPICalls: optional long
+
+  /**
+   * Number of warnings generated.
+   */
+  numWarnings: optional long
+
+  /**
+   * Number of errors generated.
+   */
+  numErrors: optional long
+
+  /**
+   * Number of entities skipped.
+   */
+  numEntitiesSkipped: optional long
+
+  ////////////////////////////////////////////////////////////////////////////////
+  // The ingestion context specific data such as softwareVersion,
+  //   hostinfo (operating system, available memory etc).
+  ////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * The non-sensitive key-value pairs of the yaml config used as json string.
+   */
+  config: optional string
+
+  /**
+   * Custom value.
+   */
+  custom_summary: optional string
+
+  /**
+   * The software version of this ingestion.
+   */
+  @TimeseriesField = {}
+  softwareVersion: optional string
+
+  // Various host specific metrics.
+
+  /**
+   * The hostname the ingestion pipeline ran on.
+   */
+  systemHostName: optional string
+
+  /**
+   * The os the ingestion pipeline ran on.
+   */
+  @TimeseriesField = {}
+  operatingSystemName: optional string
+
+  /**
+   * The number of processors on the host the ingestion pipeline ran on.
+   */
+  numProcessors: optional int
+
+  /**
+   * The total amount of memory on the host the ingestion pipeline ran on.
+   */
+  totalMemory: optional long
+
+  /**
+   * The available memory on the host the ingestion pipeline ran on.
+   */
+  availableMemory: optional long
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datajob/datahub/IngestionCheckpointState.pdl b/metadata-models/src/main/pegasus/com/linkedin/datajob/datahub/IngestionCheckpointState.pdl
new file mode 100644
index 0000000000000..cb2db8a69d0bd
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datajob/datahub/IngestionCheckpointState.pdl
@@ -0,0 +1,26 @@
+namespace com.linkedin.datajob.datahub
+
+import com.linkedin.timeseries.TimeseriesAspectBase
+import com.linkedin.timeseries.TimeWindowSize
+
+/**
+ * The checkpoint state object of a datahub ingestion run for a given job.
+ */
+record IngestionCheckpointState {
+
+  /**
+   * The version of the state format.
+   */
+  formatVersion: string
+
+  /**
+   * The serialization/deserialization protocol.
+   */
+  serde: string
+
+  /**
+   * Opaque blob of the state representation.
+   */
+  payload: optional bytes
+}
+
diff --git a/metadata-models/src/main/pegasus/com/linkedin/timeseries/TimeseriesAspectBase.pdl b/metadata-models/src/main/pegasus/com/linkedin/timeseries/TimeseriesAspectBase.pdl
index 638e601513091..e0e78a9adf684 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/timeseries/TimeseriesAspectBase.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/timeseries/TimeseriesAspectBase.pdl
@@ -15,4 +15,9 @@ record TimeseriesAspectBase {
    * The optional partition specification.
    */
   partitionSpec: optional PartitionSpec
+
+  /**
+   * The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.
+   */
+  messageId: optional string
 }
diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml
index f8c71cf6214b7..d1b81fe56ce75 100644
--- a/metadata-models/src/main/resources/entity-registry.yml
+++ b/metadata-models/src/main/resources/entity-registry.yml
@@ -12,6 +12,11 @@ entities:
     keyAspect: dataHubPolicyKey
     aspects:
       - dataHubPolicyInfo
+  - name: dataJob
+    keyAspect: dataJobKey
+    aspects:
+      - datahubIngestionRunSummary
+      - datahubIngestionCheckpoint
   - name: corpuser
     doc: CorpUser represents an identity of a person (or an account) in the enterprise.
     keyAspect: corpUserKey
diff --git a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json
index 7f7bc3f456722..e3f6b7f277e54 100644
--- a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json
+++ b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json
@@ -46,6 +46,14 @@
         "name" : "limit",
         "type" : "int",
         "default" : "10000"
+      }, {
+        "name" : "latestValue",
+        "type" : "boolean",
+        "default" : "false"
+      }, {
+        "name" : "filter",
+        "type" : "com.linkedin.metadata.query.filter.Filter",
+        "optional" : true
       } ],
       "returns" : "com.linkedin.aspect.GetTimeseriesAspectValuesResponse"
     }, {
diff --git a/metadata-service/restli-api/src/main/pegasus/com/linkedin/aspect/GetTimeseriesAspectValuesResponse.pdl b/metadata-service/restli-api/src/main/pegasus/com/linkedin/aspect/GetTimeseriesAspectValuesResponse.pdl
index 1a53753d13591..1364ff208d7e0 100644
--- a/metadata-service/restli-api/src/main/pegasus/com/linkedin/aspect/GetTimeseriesAspectValuesResponse.pdl
+++ b/metadata-service/restli-api/src/main/pegasus/com/linkedin/aspect/GetTimeseriesAspectValuesResponse.pdl
@@ -1,6 +1,6 @@
 namespace com.linkedin.aspect
 
-import com.linkedin.metadata.query.Filter
+import com.linkedin.metadata.query.filter.Filter
 import com.linkedin.metadata.aspect.EnvelopedAspect
 
 /**
@@ -37,4 +37,14 @@ record GetTimeseriesAspectValuesResponse {
    * The enveloped values returned by the getAspect API.
    **/
   values: array[EnvelopedAspect]
+
+  /**
+   * The optional flag to get the latest value of the aspect.
+   **/
+   getLatestValue: optional boolean
+
+  /**
+   * The optional filter value for filtering the aspects.
+   */
+   filter: optional Filter
 }
\ No newline at end of file
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
index 5cdb05c3f89f4..a485ee8bf3a1e 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
@@ -94,6 +94,84 @@
         }
       },
       "doc" : "The enveloped values returned by the getAspect API.\n"
+    }, {
+      "name" : "getLatestValue",
+      "type" : "boolean",
+      "doc" : "The optional flag to get the latest value of the aspect.\n",
+      "optional" : true
+    }, {
+      "name" : "filter",
+      "type" : {
+        "type" : "record",
+        "name" : "Filter",
+        "namespace" : "com.linkedin.metadata.query.filter",
+        "doc" : "The filter for finding a record or a collection of records",
+        "fields" : [ {
+          "name" : "or",
+          "type" : {
+            "type" : "array",
+            "items" : {
+              "type" : "record",
+              "name" : "ConjunctiveCriterion",
+              "doc" : "A list of criterion and'd together.",
+              "fields" : [ {
+                "name" : "and",
+                "type" : {
+                  "type" : "array",
+                  "items" : {
+                    "type" : "record",
+                    "name" : "Criterion",
+                    "doc" : "A criterion for matching a field with given value",
+                    "fields" : [ {
+                      "name" : "field",
+                      "type" : "string",
+                      "doc" : "The name of the field that the criterion refers to"
+                    }, {
+                      "name" : "value",
+                      "type" : "string",
+                      "doc" : "The value of the intended field"
+                    }, {
+                      "name" : "condition",
+                      "type" : {
+                        "type" : "enum",
+                        "name" : "Condition",
+                        "doc" : "The matching condition in a filter criterion",
+                        "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH" ],
+                        "symbolDocs" : {
+                          "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
+                          "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
+                          "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
+                          "GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
+                          "GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
+                          "IN" : "Represent the relation: String field is one of the array values to, e.g. name in [\"Profile\", \"Event\"]",
+                          "LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3",
+                          "LESS_THAN_OR_EQUAL_TO" : "Represent the relation less than or equal to, e.g. ownerCount <= 3",
+                          "START_WITH" : "Represent the relation: String field starts with value, e.g. name starts with PageView"
+                        }
+                      },
+                      "doc" : "The condition for the criterion, e.g. EQUAL, START_WITH",
+                      "default" : "EQUAL"
+                    } ]
+                  }
+                },
+                "doc" : "A list of and criteria the filter applies to the query"
+              } ]
+            }
+          },
+          "doc" : "A list of of disjunctive criterion for the filter.",
+          "optional" : true
+        }, {
+          "name" : "criteria",
+          "type" : {
+            "type" : "array",
+            "items" : "Criterion"
+          },
+          "doc" : "Deprecated! A list of conjunctive criterion for the filter. If \"or\" field is provided, then this field is ignored.",
+          "optional" : true
+        } ]
+      },
+      "doc" : "The optional filter value for filtering the aspects.",
+      "optional" : true
     } ]
   }, {
     "type" : "record",
@@ -1184,11 +1262,12 @@
         "type" : "enum",
         "name" : "JobStatus",
         "doc" : "Job statuses",
-        "symbols" : [ "STARTING", "IN_PROGRESS", "STOPPING", "STOPPED", "COMPLETED", "FAILED", "UNKNOWN" ],
+        "symbols" : [ "STARTING", "IN_PROGRESS", "STOPPING", "STOPPED", "COMPLETED", "FAILED", "UNKNOWN", "SKIPPED" ],
         "symbolDocs" : {
           "COMPLETED" : "Jobs with successful completion.",
           "FAILED" : "Jobs that have failed.",
           "IN_PROGRESS" : "Jobs currently running.",
+          "SKIPPED" : "Jobs that have been skipped.",
           "STARTING" : "Jobs being initialized.",
           "STOPPED" : "Jobs that have stopped.",
           "STOPPING" : "Jobs being stopped.",
@@ -3073,7 +3152,7 @@
       "name" : "version",
       "type" : "long"
     } ]
-  }, "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.mxe.GenericAspect", {
+  }, "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.metadata.query.filter.Condition", "com.linkedin.metadata.query.filter.ConjunctiveCriterion", "com.linkedin.metadata.query.filter.Criterion", "com.linkedin.metadata.query.filter.Filter", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.mxe.GenericAspect", {
     "type" : "record",
     "name" : "MetadataChangeProposal",
     "namespace" : "com.linkedin.mxe",
@@ -3165,6 +3244,14 @@
           "name" : "limit",
           "type" : "int",
           "default" : "10000"
+        }, {
+          "name" : "latestValue",
+          "type" : "boolean",
+          "default" : "false"
+        }, {
+          "name" : "filter",
+          "type" : "com.linkedin.metadata.query.filter.Filter",
+          "optional" : true
         } ],
         "returns" : "com.linkedin.aspect.GetTimeseriesAspectValuesResponse"
       }, {
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
index e9852ba46ede0..f6bb0dab6ee3a 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
@@ -1307,11 +1307,12 @@
         "type" : "enum",
         "name" : "JobStatus",
         "doc" : "Job statuses",
-        "symbols" : [ "STARTING", "IN_PROGRESS", "STOPPING", "STOPPED", "COMPLETED", "FAILED", "UNKNOWN" ],
+        "symbols" : [ "STARTING", "IN_PROGRESS", "STOPPING", "STOPPED", "COMPLETED", "FAILED", "UNKNOWN", "SKIPPED" ],
         "symbolDocs" : {
           "COMPLETED" : "Jobs with successful completion.",
           "FAILED" : "Jobs that have failed.",
           "IN_PROGRESS" : "Jobs currently running.",
+          "SKIPPED" : "Jobs that have been skipped.",
           "STARTING" : "Jobs being initialized.",
           "STOPPED" : "Jobs that have stopped.",
           "STOPPING" : "Jobs being stopped.",
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
index 4dbbeaec6bf5d..54bc3b1b7b1df 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
@@ -1032,11 +1032,12 @@
         "type" : "enum",
         "name" : "JobStatus",
         "doc" : "Job statuses",
-        "symbols" : [ "STARTING", "IN_PROGRESS", "STOPPING", "STOPPED", "COMPLETED", "FAILED", "UNKNOWN" ],
+        "symbols" : [ "STARTING", "IN_PROGRESS", "STOPPING", "STOPPED", "COMPLETED", "FAILED", "UNKNOWN", "SKIPPED" ],
         "symbolDocs" : {
           "COMPLETED" : "Jobs with successful completion.",
           "FAILED" : "Jobs that have failed.",
           "IN_PROGRESS" : "Jobs currently running.",
+          "SKIPPED" : "Jobs that have been skipped.",
           "STARTING" : "Jobs being initialized.",
           "STOPPED" : "Jobs that have stopped.",
           "STOPPING" : "Jobs being stopped.",
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java
index c6dfe859dbc69..0092ff91642bc 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java
@@ -10,11 +10,11 @@
 import com.linkedin.metadata.aspect.VersionedAspect;
 import com.linkedin.metadata.browse.BrowseResult;
 import com.linkedin.metadata.query.AutoCompleteResult;
-import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.ListResult;
+import com.linkedin.metadata.query.ListUrnsResult;
+import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.SortCriterion;
 import com.linkedin.metadata.search.SearchResult;
-import com.linkedin.metadata.query.ListUrnsResult;
 import com.linkedin.mxe.MetadataChangeProposal;
 import com.linkedin.mxe.SystemMetadata;
 import com.linkedin.r2.RemoteInvocationException;
@@ -25,13 +25,17 @@
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 
+
 public interface EntityClient {
 
   @Nonnull
-  public Entity get(@Nonnull final Urn urn, @Nonnull final Authentication authentication) throws RemoteInvocationException;
+  public Entity get(@Nonnull final Urn urn, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException;
 
   @Nonnull
-  public Map<Urn, Entity> batchGet(@Nonnull final Set<Urn> urns, @Nonnull final Authentication authentication) throws RemoteInvocationException;
+  public Map<Urn, Entity> batchGet(@Nonnull final Set<Urn> urns, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException;
+
   /**
    * Gets browse snapshot of a given path
    *
@@ -42,12 +46,8 @@ public interface EntityClient {
    * @throws RemoteInvocationException
    */
   @Nonnull
-  public AutoCompleteResult autoComplete(
-      @Nonnull String entityType,
-      @Nonnull String query,
-      @Nonnull Map<String, String> requestFilters,
-      @Nonnull int limit,
-      @Nullable String field,
+  public AutoCompleteResult autoComplete(@Nonnull String entityType, @Nonnull String query,
+      @Nonnull Map<String, String> requestFilters, @Nonnull int limit, @Nullable String field,
       @Nonnull Authentication authentication) throws RemoteInvocationException;
 
   /**
@@ -59,12 +59,9 @@ public AutoCompleteResult autoComplete(
    * @throws RemoteInvocationException
    */
   @Nonnull
-  public AutoCompleteResult autoComplete(
-      @Nonnull String entityType,
-      @Nonnull String query,
-      @Nonnull Map<String, String> requestFilters,
-      @Nonnull int limit,
-      @Nonnull Authentication authentication) throws RemoteInvocationException;
+  public AutoCompleteResult autoComplete(@Nonnull String entityType, @Nonnull String query,
+      @Nonnull Map<String, String> requestFilters, @Nonnull int limit, @Nonnull Authentication authentication)
+      throws RemoteInvocationException;
 
   /**
    * Gets browse snapshot of a given path
@@ -77,20 +74,14 @@ public AutoCompleteResult autoComplete(
    * @throws RemoteInvocationException
    */
   @Nonnull
-  public BrowseResult browse(
-      @Nonnull String entityType,
-      @Nonnull String path,
-      @Nullable Map<String, String> requestFilters,
-      int start,
-      int limit,
-      @Nonnull Authentication authentication) throws RemoteInvocationException;
+  public BrowseResult browse(@Nonnull String entityType, @Nonnull String path,
+      @Nullable Map<String, String> requestFilters, int start, int limit, @Nonnull Authentication authentication)
+      throws RemoteInvocationException;
 
   public void update(@Nonnull final Entity entity, @Nonnull final Authentication authentication)
       throws RemoteInvocationException;
 
-  public void updateWithSystemMetadata(
-      @Nonnull final Entity entity,
-      @Nullable final SystemMetadata systemMetadata,
+  public void updateWithSystemMetadata(@Nonnull final Entity entity, @Nullable final SystemMetadata systemMetadata,
       @Nonnull final Authentication authentication) throws RemoteInvocationException;
 
   public void batchUpdate(@Nonnull final Set<Entity> entities, @Nonnull final Authentication authentication)
@@ -107,13 +98,8 @@ public void batchUpdate(@Nonnull final Set<Entity> entities, @Nonnull final Auth
    * @throws RemoteInvocationException
    */
   @Nonnull
-  public SearchResult search(
-      @Nonnull String entity,
-      @Nonnull String input,
-      @Nullable Map<String, String> requestFilters,
-      int start,
-      int count,
-      @Nonnull Authentication authentication)
+  public SearchResult search(@Nonnull String entity, @Nonnull String input,
+      @Nullable Map<String, String> requestFilters, int start, int count, @Nonnull Authentication authentication)
       throws RemoteInvocationException;
 
   /**
@@ -126,13 +112,8 @@ public SearchResult search(
    * @throws RemoteInvocationException
    */
   @Nonnull
-  public ListResult list(
-      @Nonnull String entity,
-      @Nullable Map<String, String> requestFilters,
-      int start,
-      int count,
-      @Nonnull Authentication authentication)
-      throws RemoteInvocationException;
+  public ListResult list(@Nonnull String entity, @Nullable Map<String, String> requestFilters, int start, int count,
+      @Nonnull Authentication authentication) throws RemoteInvocationException;
 
   /**
    * Searches for datasets matching to a given query and filters
@@ -145,14 +126,8 @@ public ListResult list(
    * @throws RemoteInvocationException
    */
   @Nonnull
-  public SearchResult search(
-      @Nonnull String entity,
-      @Nonnull String input,
-      @Nullable Filter filter,
-      int start,
-      int count,
-      @Nonnull Authentication authentication)
-      throws RemoteInvocationException;
+  public SearchResult search(@Nonnull String entity, @Nonnull String input, @Nullable Filter filter, int start,
+      int count, @Nonnull Authentication authentication) throws RemoteInvocationException;
 
   /**
    * Searches for entities matching to a given query and filters across multiple entity types
@@ -166,13 +141,9 @@ public SearchResult search(
    * @throws RemoteInvocationException
    */
   @Nonnull
-  public SearchResult searchAcrossEntities(
-      @Nonnull List<String> entities,
-      @Nonnull String input,
-      @Nullable Filter filter,
-      int start,
-      int count,
-      @Nonnull Authentication authentication) throws RemoteInvocationException;
+  public SearchResult searchAcrossEntities(@Nonnull List<String> entities, @Nonnull String input,
+      @Nullable Filter filter, int start, int count, @Nonnull Authentication authentication)
+      throws RemoteInvocationException;
 
   /**
    * Gets browse path(s) given dataset urn
@@ -182,28 +153,30 @@ public SearchResult searchAcrossEntities(
    * @throws RemoteInvocationException
    */
   @Nonnull
-  public StringArray getBrowsePaths(@Nonnull Urn urn, @Nonnull Authentication authentication) throws RemoteInvocationException;
+  public StringArray getBrowsePaths(@Nonnull Urn urn, @Nonnull Authentication authentication)
+      throws RemoteInvocationException;
 
   public void setWritable(boolean canWrite, @Nonnull Authentication authentication) throws RemoteInvocationException;
 
   @Nonnull
-  public long getTotalEntityCount(@Nonnull String entityName, @Nonnull Authentication authentication) throws RemoteInvocationException;
-
+  public long getTotalEntityCount(@Nonnull String entityName, @Nonnull Authentication authentication)
+      throws RemoteInvocationException;
 
   @Nonnull
-  public Map<String, Long> batchGetTotalEntityCount(@Nonnull List<String> entityName, @Nonnull Authentication authentication)
-      throws RemoteInvocationException;
+  public Map<String, Long> batchGetTotalEntityCount(@Nonnull List<String> entityName,
+      @Nonnull Authentication authentication) throws RemoteInvocationException;
 
   /**
    * List all urns existing for a particular Entity type.
    */
-  public ListUrnsResult listUrns(@Nonnull final String entityName, final int start, final int count, @Nonnull final Authentication authentication)
-      throws RemoteInvocationException;
+  public ListUrnsResult listUrns(@Nonnull final String entityName, final int start, final int count,
+      @Nonnull final Authentication authentication) throws RemoteInvocationException;
 
   /**
    * Hard delete an entity with a particular urn.
    */
-  public void deleteEntity(@Nonnull final Urn urn, @Nonnull final Authentication authentication) throws RemoteInvocationException;
+  public void deleteEntity(@Nonnull final Urn urn, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException;
 
   /**
    * Filters entities based on a particular Filter and Sort criterion
@@ -217,56 +190,30 @@ public ListUrnsResult listUrns(@Nonnull final String entityName, final int start
    * @throws RemoteInvocationException
    */
   @Nonnull
-  public SearchResult filter(
-      @Nonnull String entity,
-      @Nonnull Filter filter,
-      @Nullable SortCriterion sortCriterion,
-      int start,
-      int count,
-      @Nonnull Authentication authentication)
-      throws RemoteInvocationException;
+  public SearchResult filter(@Nonnull String entity, @Nonnull Filter filter, @Nullable SortCriterion sortCriterion,
+      int start, int count, @Nonnull Authentication authentication) throws RemoteInvocationException;
 
   @Nullable
-  public VersionedAspect getAspect(
-      @Nonnull String urn,
-      @Nonnull String aspect,
-      @Nonnull Long version,
-      @Nonnull Authentication authentication)
-      throws RemoteInvocationException;
+  public VersionedAspect getAspect(@Nonnull String urn, @Nonnull String aspect, @Nonnull Long version,
+      @Nonnull Authentication authentication) throws RemoteInvocationException;
 
   @Nullable
-  public VersionedAspect getAspectOrNull(
-      @Nonnull String urn,
-      @Nonnull String aspect,
-      @Nonnull Long version,
+  public VersionedAspect getAspectOrNull(@Nonnull String urn, @Nonnull String aspect, @Nonnull Long version,
       @Nonnull Authentication authentication) throws RemoteInvocationException;
 
-  public List<EnvelopedAspect> getTimeseriesAspectValues(
-      @Nonnull String urn,
-      @Nonnull String entity,
-      @Nonnull String aspect,
-      @Nullable Long startTimeMillis,
-      @Nullable Long endTimeMillis,
-      @Nullable Integer limit,
-      @Nonnull Authentication authentication
-  ) throws RemoteInvocationException;
+  public List<EnvelopedAspect> getTimeseriesAspectValues(@Nonnull String urn, @Nonnull String entity,
+      @Nonnull String aspect, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis, @Nullable Integer limit,
+      @Nonnull Boolean getLatestValue, @Nullable Filter filter, @Nonnull Authentication authentication)
+      throws RemoteInvocationException;
 
-  public String ingestProposal(
-      @Nonnull final MetadataChangeProposal metadataChangeProposal,
-      @Nonnull final Authentication authentication
-  ) throws RemoteInvocationException;
+  public String ingestProposal(@Nonnull final MetadataChangeProposal metadataChangeProposal,
+      @Nonnull final Authentication authentication) throws RemoteInvocationException;
 
   @Nonnull
-  public <T extends RecordTemplate> Optional<T> getVersionedAspect(
-      @Nonnull String urn,
-      @Nonnull String aspect,
-      @Nonnull Long version,
-      @Nonnull Class<T> aspectClass,
-      @Nonnull Authentication authentication) throws RemoteInvocationException;
+  public <T extends RecordTemplate> Optional<T> getVersionedAspect(@Nonnull String urn, @Nonnull String aspect,
+      @Nonnull Long version, @Nonnull Class<T> aspectClass, @Nonnull Authentication authentication)
+      throws RemoteInvocationException;
 
-  public DataMap getRawAspect(
-      @Nonnull String urn,
-      @Nonnull String aspect,
-      @Nonnull Long version,
+  public DataMap getRawAspect(@Nonnull String urn, @Nonnull String aspect, @Nonnull Long version,
       @Nonnull Authentication authentication) throws RemoteInvocationException;
 }
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java
index 2fa428aa37046..ffd2fba821524 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java
@@ -2,7 +2,6 @@
 
 import com.datahub.authentication.Authentication;
 import com.google.common.collect.ImmutableList;
-
 import com.google.common.collect.ImmutableSet;
 import com.linkedin.aspect.GetTimeseriesAspectValuesResponse;
 import com.linkedin.common.AuditStamp;
@@ -329,20 +328,30 @@ public VersionedAspect getAspectOrNull(@Nonnull String urn, @Nonnull String aspe
     @Override
     public List<EnvelopedAspect> getTimeseriesAspectValues(@Nonnull String urn, @Nonnull String entity,
         @Nonnull String aspect, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis, @Nullable Integer limit,
-        @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        GetTimeseriesAspectValuesResponse response = new GetTimeseriesAspectValuesResponse();
-        response.setEntityName(entity);
-        response.setAspectName(aspect);
-        if (startTimeMillis != null) {
-            response.setStartTimeMillis(startTimeMillis);
-        }
-        if (endTimeMillis != null) {
-            response.setEndTimeMillis(endTimeMillis);
-        }
-        response.setValues(new EnvelopedAspectArray(
-            _timeseriesAspectService.getAspectValues(Urn.createFromString(urn), entity, aspect, startTimeMillis, endTimeMillis,
-                limit)));
-        return response.getValues();
+        @Nonnull Boolean getLatestValue, @Nullable Filter filter, @Nonnull final Authentication authentication)
+        throws RemoteInvocationException {
+      GetTimeseriesAspectValuesResponse response = new GetTimeseriesAspectValuesResponse();
+      response.setEntityName(entity);
+      response.setAspectName(aspect);
+      if (startTimeMillis != null) {
+        response.setStartTimeMillis(startTimeMillis);
+      }
+      if (endTimeMillis != null) {
+        response.setEndTimeMillis(endTimeMillis);
+      }
+      if (limit != null) {
+        response.setLimit(limit);
+      }
+      if (getLatestValue != null) {
+        response.setGetLatestValue(getLatestValue);
+      }
+      if (filter != null) {
+        response.setFilter(filter);
+      }
+      response.setValues(new EnvelopedAspectArray(
+          _timeseriesAspectService.getAspectValues(Urn.createFromString(urn), entity, aspect, startTimeMillis,
+              endTimeMillis, limit, getLatestValue, filter)));
+      return response.getValues();
     }
 
     // TODO: Factor out ingest logic into a util that can be accessed by the java client and the resource
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
index 8401dcc74efb3..acbafb90eb11f 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
@@ -20,10 +20,10 @@
 import com.linkedin.entity.EntitiesDoGetBrowsePathsRequestBuilder;
 import com.linkedin.entity.EntitiesDoGetTotalEntityCountRequestBuilder;
 import com.linkedin.entity.EntitiesDoIngestRequestBuilder;
-import com.linkedin.entity.EntitiesDoSearchAcrossEntitiesRequestBuilder;
+import com.linkedin.entity.EntitiesDoListRequestBuilder;
 import com.linkedin.entity.EntitiesDoListUrnsRequestBuilder;
+import com.linkedin.entity.EntitiesDoSearchAcrossEntitiesRequestBuilder;
 import com.linkedin.entity.EntitiesDoSearchRequestBuilder;
-import com.linkedin.entity.EntitiesDoListRequestBuilder;
 import com.linkedin.entity.EntitiesDoSetWritableRequestBuilder;
 import com.linkedin.entity.EntitiesRequestBuilders;
 import com.linkedin.entity.Entity;
@@ -34,10 +34,10 @@
 import com.linkedin.metadata.dao.utils.RecordUtils;
 import com.linkedin.metadata.query.AutoCompleteResult;
 import com.linkedin.metadata.query.ListResult;
+import com.linkedin.metadata.query.ListUrnsResult;
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.SortCriterion;
 import com.linkedin.metadata.search.SearchResult;
-import com.linkedin.metadata.query.ListUrnsResult;
 import com.linkedin.mxe.MetadataChangeProposal;
 import com.linkedin.mxe.SystemMetadata;
 import com.linkedin.r2.RemoteInvocationException;
@@ -59,509 +59,459 @@
 import lombok.SneakyThrows;
 import lombok.extern.slf4j.Slf4j;
 
-import static com.linkedin.metadata.search.utils.QueryUtils.newFilter;
+import static com.linkedin.metadata.search.utils.QueryUtils.*;
+
 
 @Slf4j
 public class RestliEntityClient extends BaseClient implements EntityClient {
 
-    private static final EntitiesRequestBuilders ENTITIES_REQUEST_BUILDERS = new EntitiesRequestBuilders();
-    private static final AspectsRequestBuilders ASPECTS_REQUEST_BUILDERS = new AspectsRequestBuilders();
-
-    public RestliEntityClient(@Nonnull final Client restliClient) {
-        super(restliClient);
+  private static final EntitiesRequestBuilders ENTITIES_REQUEST_BUILDERS = new EntitiesRequestBuilders();
+  private static final AspectsRequestBuilders ASPECTS_REQUEST_BUILDERS = new AspectsRequestBuilders();
+
+  public RestliEntityClient(@Nonnull final Client restliClient) {
+    super(restliClient);
+  }
+
+  @Nonnull
+  public Entity get(@Nonnull final Urn urn, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+    return sendClientRequest(ENTITIES_REQUEST_BUILDERS.get().id(urn.toString()), authentication).getEntity();
+  }
+
+  @Nonnull
+  public Map<Urn, Entity> batchGet(@Nonnull final Set<Urn> urns, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+
+    final Integer batchSize = 25;
+    final AtomicInteger index = new AtomicInteger(0);
+
+    final Collection<List<Urn>> entityUrnBatches =
+        urns.stream().collect(Collectors.groupingBy(x -> index.getAndIncrement() / batchSize)).values();
+
+    final Map<Urn, Entity> response = new HashMap<>();
+
+    for (List<Urn> urnsInBatch : entityUrnBatches) {
+      EntitiesBatchGetRequestBuilder batchGetRequestBuilder =
+          ENTITIES_REQUEST_BUILDERS.batchGet().ids(urnsInBatch.stream().map(Urn::toString).collect(Collectors.toSet()));
+      final Map<Urn, Entity> batchResponse = sendClientRequest(batchGetRequestBuilder, authentication).getEntity()
+          .getResults()
+          .entrySet()
+          .stream()
+          .collect(Collectors.toMap(entry -> {
+            try {
+              return Urn.createFromString(entry.getKey());
+            } catch (URISyntaxException e) {
+              throw new RuntimeException(String.format("Failed to create Urn from key string %s", entry.getKey()));
+            }
+          }, entry -> entry.getValue().getEntity()));
+      response.putAll(batchResponse);
     }
-
-    @Nonnull
-    public Entity get(@Nonnull final Urn urn, @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        return sendClientRequest(
-            ENTITIES_REQUEST_BUILDERS.get().id(urn.toString()),
-            authentication)
-            .getEntity();
+    return response;
+  }
+
+  /**
+   * Gets browse snapshot of a given path
+   *
+   * @param query search query
+   * @param field field of the dataset
+   * @param requestFilters autocomplete filters
+   * @param limit max number of autocomplete results
+   * @throws RemoteInvocationException
+   */
+  @Nonnull
+  public AutoCompleteResult autoComplete(@Nonnull String entityType, @Nonnull String query,
+      @Nonnull Map<String, String> requestFilters, @Nonnull int limit, @Nullable String field,
+      @Nonnull final Authentication authentication) throws RemoteInvocationException {
+    EntitiesDoAutocompleteRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionAutocomplete()
+        .entityParam(entityType)
+        .queryParam(query)
+        .fieldParam(field)
+        .filterParam(newFilter(requestFilters))
+        .limitParam(limit);
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  /**
+   * Gets browse snapshot of a given path
+   *
+   * @param query search query
+   * @param requestFilters autocomplete filters
+   * @param limit max number of autocomplete results
+   * @throws RemoteInvocationException
+   */
+  @Nonnull
+  public AutoCompleteResult autoComplete(@Nonnull String entityType, @Nonnull String query,
+      @Nonnull Map<String, String> requestFilters, @Nonnull int limit, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+    EntitiesDoAutocompleteRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionAutocomplete()
+        .entityParam(entityType)
+        .queryParam(query)
+        .filterParam(newFilter(requestFilters))
+        .limitParam(limit);
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  /**
+   * Gets browse snapshot of a given path
+   *
+   * @param entityType entity type being browse
+   * @param path path being browsed
+   * @param requestFilters browse filters
+   * @param start start offset of first dataset
+   * @param limit max number of datasets
+   * @throws RemoteInvocationException
+   */
+  @Nonnull
+  public BrowseResult browse(@Nonnull String entityType, @Nonnull String path,
+      @Nullable Map<String, String> requestFilters, int start, int limit, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+    EntitiesDoBrowseRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionBrowse()
+        .pathParam(path)
+        .entityParam(entityType)
+        .startParam(start)
+        .limitParam(limit);
+    if (requestFilters != null) {
+      requestBuilder.filterParam(newFilter(requestFilters));
     }
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
 
-    @Nonnull
-    public Map<Urn, Entity> batchGet(@Nonnull final Set<Urn> urns, @Nonnull final Authentication authentication) throws RemoteInvocationException {
-
-        final Integer batchSize = 25;
-        final AtomicInteger index = new AtomicInteger(0);
-
-        final Collection<List<Urn>> entityUrnBatches = urns.stream()
-                .collect(Collectors.groupingBy(x -> index.getAndIncrement() / batchSize))
-                .values();
-
-        final Map<Urn, Entity> response = new HashMap<>();
-
-        for (List<Urn> urnsInBatch : entityUrnBatches) {
-            EntitiesBatchGetRequestBuilder batchGetRequestBuilder =
-                    ENTITIES_REQUEST_BUILDERS.batchGet()
-                            .ids(urnsInBatch.stream().map(Urn::toString).collect(Collectors.toSet()));
-            final Map<Urn, Entity> batchResponse = sendClientRequest(batchGetRequestBuilder, authentication).getEntity().getResults()
-                    .entrySet().stream().collect(Collectors.toMap(
-                            entry -> {
-                                try {
-                                    return Urn.createFromString(entry.getKey());
-                                } catch (URISyntaxException e) {
-                                   throw new RuntimeException(String.format("Failed to create Urn from key string %s", entry.getKey()));
-                                }
-                            },
-                            entry -> entry.getValue().getEntity())
-                    );
-            response.putAll(batchResponse);
-        }
-        return response;
-    }
+  public void update(@Nonnull final Entity entity, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+    EntitiesDoIngestRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionIngest().entityParam(entity);
 
-    /**
-     * Gets browse snapshot of a given path
-     *
-     * @param query search query
-     * @param field field of the dataset
-     * @param requestFilters autocomplete filters
-     * @param limit max number of autocomplete results
-     * @throws RemoteInvocationException
-     */
-    @Nonnull
-    public AutoCompleteResult autoComplete(
-        @Nonnull String entityType,
-        @Nonnull String query,
-        @Nonnull Map<String, String> requestFilters,
-        @Nonnull int limit,
-        @Nullable String field,
-        @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        EntitiesDoAutocompleteRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS
-            .actionAutocomplete()
-            .entityParam(entityType)
-            .queryParam(query)
-            .fieldParam(field)
-            .filterParam(newFilter(requestFilters))
-            .limitParam(limit);
-        return sendClientRequest(requestBuilder, authentication).getEntity();
-    }
+    sendClientRequest(requestBuilder, authentication);
+  }
 
-    /**
-     * Gets browse snapshot of a given path
-     *
-     * @param query search query
-     * @param requestFilters autocomplete filters
-     * @param limit max number of autocomplete results
-     * @throws RemoteInvocationException
-     */
-    @Nonnull
-    public AutoCompleteResult autoComplete(
-        @Nonnull String entityType,
-        @Nonnull String query,
-        @Nonnull Map<String, String> requestFilters,
-        @Nonnull int limit,
-        @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        EntitiesDoAutocompleteRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS
-            .actionAutocomplete()
-            .entityParam(entityType)
-            .queryParam(query)
-            .filterParam(newFilter(requestFilters))
-            .limitParam(limit);
-        return sendClientRequest(requestBuilder, authentication).getEntity();
+  public void updateWithSystemMetadata(@Nonnull final Entity entity, @Nullable final SystemMetadata systemMetadata,
+      @Nonnull final Authentication authentication) throws RemoteInvocationException {
+    if (systemMetadata == null) {
+      update(entity, authentication);
+      return;
     }
 
-    /**
-     * Gets browse snapshot of a given path
-     *
-     * @param entityType entity type being browse
-     * @param path path being browsed
-     * @param requestFilters browse filters
-     * @param start start offset of first dataset
-     * @param limit max number of datasets
-     * @throws RemoteInvocationException
-     */
-    @Nonnull
-    public BrowseResult browse(
-        @Nonnull String entityType,
-        @Nonnull String path,
-        @Nullable Map<String, String> requestFilters,
-        int start,
-        int limit,
-        @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        EntitiesDoBrowseRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS
-            .actionBrowse()
-            .pathParam(path)
-            .entityParam(entityType)
-            .startParam(start)
-            .limitParam(limit);
-        if (requestFilters != null) {
-            requestBuilder.filterParam(newFilter(requestFilters));
-        }
-        return sendClientRequest(requestBuilder, authentication).getEntity();
+    EntitiesDoIngestRequestBuilder requestBuilder =
+        ENTITIES_REQUEST_BUILDERS.actionIngest().entityParam(entity).systemMetadataParam(systemMetadata);
+
+    sendClientRequest(requestBuilder, authentication);
+  }
+
+  public void batchUpdate(@Nonnull final Set<Entity> entities, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+    EntitiesDoBatchIngestRequestBuilder requestBuilder =
+        ENTITIES_REQUEST_BUILDERS.actionBatchIngest().entitiesParam(new EntityArray(entities));
+
+    sendClientRequest(requestBuilder, authentication);
+  }
+
+  /**
+   * Searches for entities matching to a given query and filters
+   *
+   * @param input search query
+   * @param requestFilters search filters
+   * @param start start offset for search results
+   * @param count max number of search results requested
+   * @return a set of search results
+   * @throws RemoteInvocationException
+   */
+  @Nonnull
+  public SearchResult search(@Nonnull String entity, @Nonnull String input,
+      @Nullable Map<String, String> requestFilters, int start, int count, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+
+    final EntitiesDoSearchRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionSearch()
+        .entityParam(entity)
+        .inputParam(input)
+        .filterParam(newFilter(requestFilters))
+        .startParam(start)
+        .countParam(count);
+
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  /**
+   * Filters for entities matching to a given query and filters
+   *
+   * @param requestFilters search filters
+   * @param start start offset for search results
+   * @param count max number of search results requested
+   * @return a set of list results
+   * @throws RemoteInvocationException
+   */
+  @Nonnull
+  public ListResult list(@Nonnull String entity, @Nullable Map<String, String> requestFilters, int start, int count,
+      @Nonnull final Authentication authentication) throws RemoteInvocationException {
+    final EntitiesDoListRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionList()
+        .entityParam(entity)
+        .filterParam(newFilter(requestFilters))
+        .startParam(start)
+        .countParam(count);
+
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  /**
+   * Searches for datasets matching to a given query and filters
+   *
+   * @param input search query
+   * @param filter search filters
+   * @param start start offset for search results
+   * @param count max number of search results requested
+   * @return Snapshot key
+   * @throws RemoteInvocationException
+   */
+  @Nonnull
+  public SearchResult search(@Nonnull String entity, @Nonnull String input, @Nullable Filter filter, int start,
+      int count, @Nonnull final Authentication authentication) throws RemoteInvocationException {
+
+    final EntitiesDoSearchRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionSearch()
+        .entityParam(entity)
+        .inputParam(input)
+        .startParam(start)
+        .countParam(count);
+
+    if (filter != null) {
+      requestBuilder.filterParam(filter);
     }
 
-    public void update(@Nonnull final Entity entity, @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        EntitiesDoIngestRequestBuilder requestBuilder =
-            ENTITIES_REQUEST_BUILDERS.actionIngest().entityParam(entity);
-
-        sendClientRequest(requestBuilder, authentication);
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  /**
+   * Searches for entities matching to a given query and filters across multiple entity types
+   *
+   * @param entities entity types to search (if empty, searches all entities)
+   * @param input search query
+   * @param filter search filters
+   * @param start start offset for search results
+   * @param count max number of search results requested
+   * @return Snapshot key
+   * @throws RemoteInvocationException
+   */
+  @Nonnull
+  public SearchResult searchAcrossEntities(@Nonnull List<String> entities, @Nonnull String input,
+      @Nullable Filter filter, int start, int count, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+
+    final EntitiesDoSearchAcrossEntitiesRequestBuilder requestBuilder =
+        ENTITIES_REQUEST_BUILDERS.actionSearchAcrossEntities().inputParam(input).startParam(start).countParam(count);
+
+    if (entities != null) {
+      requestBuilder.entitiesParam(new StringArray(entities));
     }
-
-    public void updateWithSystemMetadata(
-        @Nonnull final Entity entity,
-        @Nullable final SystemMetadata systemMetadata,
-        @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        if (systemMetadata == null) {
-            update(entity, authentication);
-            return;
-        }
-
-        EntitiesDoIngestRequestBuilder requestBuilder =
-            ENTITIES_REQUEST_BUILDERS.actionIngest().entityParam(entity).systemMetadataParam(systemMetadata);
-
-        sendClientRequest(requestBuilder, authentication);
+    if (filter != null) {
+      requestBuilder.filterParam(filter);
     }
 
-    public void batchUpdate(@Nonnull final Set<Entity> entities, @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        EntitiesDoBatchIngestRequestBuilder requestBuilder =
-            ENTITIES_REQUEST_BUILDERS.actionBatchIngest().entitiesParam(new EntityArray(entities));
-
-        sendClientRequest(requestBuilder, authentication);
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  /**
+   * Gets browse path(s) given dataset urn
+   *
+   * @param urn urn for the entity
+   * @return list of paths given urn
+   * @throws RemoteInvocationException
+   */
+  @Nonnull
+  public StringArray getBrowsePaths(@Nonnull Urn urn, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+    EntitiesDoGetBrowsePathsRequestBuilder requestBuilder =
+        ENTITIES_REQUEST_BUILDERS.actionGetBrowsePaths().urnParam(urn);
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  public void setWritable(boolean canWrite, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+    EntitiesDoSetWritableRequestBuilder requestBuilder =
+        ENTITIES_REQUEST_BUILDERS.actionSetWritable().valueParam(canWrite);
+    sendClientRequest(requestBuilder, authentication);
+  }
+
+  @Nonnull
+  public long getTotalEntityCount(@Nonnull String entityName, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+    EntitiesDoGetTotalEntityCountRequestBuilder requestBuilder =
+        ENTITIES_REQUEST_BUILDERS.actionGetTotalEntityCount().entityParam(entityName);
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  @Nonnull
+  public Map<String, Long> batchGetTotalEntityCount(@Nonnull List<String> entityName,
+      @Nonnull final Authentication authentication) throws RemoteInvocationException {
+    EntitiesDoBatchGetTotalEntityCountRequestBuilder requestBuilder =
+        ENTITIES_REQUEST_BUILDERS.actionBatchGetTotalEntityCount().entitiesParam(new StringArray(entityName));
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  /**
+   * List all urns existing for a particular Entity type.
+   */
+  public ListUrnsResult listUrns(@Nonnull final String entityName, final int start, final int count,
+      @Nonnull final Authentication authentication) throws RemoteInvocationException {
+    EntitiesDoListUrnsRequestBuilder requestBuilder =
+        ENTITIES_REQUEST_BUILDERS.actionListUrns().entityParam(entityName).startParam(start).countParam(count);
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  /**
+   * Hard delete an entity with a particular urn.
+   */
+  public void deleteEntity(@Nonnull final Urn urn, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+    EntitiesDoDeleteRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionDelete().urnParam(urn.toString());
+    sendClientRequest(requestBuilder, authentication);
+  }
+
+  @Nonnull
+  @Override
+  public SearchResult filter(@Nonnull String entity, @Nonnull Filter filter, @Nullable SortCriterion sortCriterion,
+      int start, int count, @Nonnull final Authentication authentication) throws RemoteInvocationException {
+    EntitiesDoFilterRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionFilter()
+        .entityParam(entity)
+        .filterParam(filter)
+        .startParam(start)
+        .countParam(count);
+    if (sortCriterion != null) {
+      requestBuilder.sortParam(sortCriterion);
     }
-
-    /**
-     * Searches for entities matching to a given query and filters
-     *
-     * @param input search query
-     * @param requestFilters search filters
-     * @param start start offset for search results
-     * @param count max number of search results requested
-     * @return a set of search results
-     * @throws RemoteInvocationException
-     */
-    @Nonnull
-    public SearchResult search(
-        @Nonnull String entity,
-        @Nonnull String input,
-        @Nullable Map<String, String> requestFilters,
-        int start,
-        int count,
-        @Nonnull final Authentication authentication)
-        throws RemoteInvocationException {
-
-        final EntitiesDoSearchRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionSearch()
-            .entityParam(entity)
-            .inputParam(input)
-            .filterParam(newFilter(requestFilters))
-            .startParam(start)
-            .countParam(count);
-
-        return sendClientRequest(requestBuilder, authentication).getEntity();
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  /**
+   * Gets aspect at version for an entity
+   *
+   * @param urn urn for the entity
+   * @return list of paths given urn
+   * @throws RemoteInvocationException on remote request error.
+   */
+  @Nonnull
+  public VersionedAspect getAspect(@Nonnull String urn, @Nonnull String aspect, @Nonnull Long version,
+      @Nonnull final Authentication authentication) throws RemoteInvocationException {
+
+    AspectsGetRequestBuilder requestBuilder =
+        ASPECTS_REQUEST_BUILDERS.get().id(urn).aspectParam(aspect).versionParam(version);
+
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  /**
+   * Gets aspect at version for an entity, or null if one doesn't exist.
+   *
+   * @param urn urn for the entity
+   * @return list of paths given urn
+   * @throws RemoteInvocationException on remote request error.
+   */
+  @Nullable
+  public VersionedAspect getAspectOrNull(@Nonnull String urn, @Nonnull String aspect, @Nonnull Long version,
+      @Nonnull final Authentication authentication) throws RemoteInvocationException {
+
+    AspectsGetRequestBuilder requestBuilder =
+        ASPECTS_REQUEST_BUILDERS.get().id(urn).aspectParam(aspect).versionParam(version);
+    try {
+      return sendClientRequest(requestBuilder, authentication).getEntity();
+    } catch (RestLiResponseException e) {
+      if (e.getStatus() == HttpStatus.S_404_NOT_FOUND.getCode()) {
+        // Then the aspect was not found. Return null.
+        return null;
+      }
+      throw e;
     }
-
-    /**
-     * Filters for entities matching to a given query and filters
-     *
-     * @param requestFilters search filters
-     * @param start start offset for search results
-     * @param count max number of search results requested
-     * @return a set of list results
-     * @throws RemoteInvocationException
-     */
-    @Nonnull
-    public ListResult list(
-        @Nonnull String entity,
-        @Nullable Map<String, String> requestFilters,
-        int start,
-        int count,
-        @Nonnull final Authentication authentication)
-        throws RemoteInvocationException {
-        final EntitiesDoListRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionList()
+  }
+
+  /**
+   * Retrieve instances of a particular aspect.
+   *
+   * @param urn urn for the entity.
+   * @param entity the name of the entity.
+   * @param aspect the name of the aspect.
+   * @param startTimeMillis the earliest desired event time of the aspect value in milliseconds.
+   * @param endTimeMillis the latest desired event time of the aspect value in milliseconds.
+   * @param limit the maximum number of desired aspect values.
+   * @param authentication the actor associated with the request [internal]
+   * @return the list of EnvelopedAspect values satisfying the input parameters.
+   * @throws RemoteInvocationException on remote request error.
+   */
+  @Nonnull
+  public List<EnvelopedAspect> getTimeseriesAspectValues(@Nonnull String urn, @Nonnull String entity,
+      @Nonnull String aspect, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis, @Nullable Integer limit,
+      @Nonnull Boolean getLatestValue, @Nullable Filter filter, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+
+    AspectsDoGetTimeseriesAspectValuesRequestBuilder requestBuilder =
+        ASPECTS_REQUEST_BUILDERS.actionGetTimeseriesAspectValues()
+            .urnParam(urn)
             .entityParam(entity)
-            .filterParam(newFilter(requestFilters))
-            .startParam(start)
-            .countParam(count);
-
-        return sendClientRequest(requestBuilder, authentication).getEntity();
-    }
-
-    /**
-     * Searches for datasets matching to a given query and filters
-     *
-     * @param input search query
-     * @param filter search filters
-     * @param start start offset for search results
-     * @param count max number of search results requested
-     * @return Snapshot key
-     * @throws RemoteInvocationException
-     */
-    @Nonnull
-    public SearchResult search(
-        @Nonnull String entity,
-        @Nonnull String input,
-        @Nullable Filter filter,
-        int start,
-        int count,
-        @Nonnull final Authentication authentication)
-        throws RemoteInvocationException {
-
-        final EntitiesDoSearchRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionSearch()
-            .entityParam(entity)
-            .inputParam(input)
-            .startParam(start)
-            .countParam(count);
-
-        if (filter != null) {
-            requestBuilder.filterParam(filter);
-        }
-
-        return sendClientRequest(requestBuilder, authentication).getEntity();
-    }
-
-    /**
-     * Searches for entities matching to a given query and filters across multiple entity types
-     *
-     * @param entities entity types to search (if empty, searches all entities)
-     * @param input search query
-     * @param filter search filters
-     * @param start start offset for search results
-     * @param count max number of search results requested
-     * @return Snapshot key
-     * @throws RemoteInvocationException
-     */
-    @Nonnull
-    public SearchResult searchAcrossEntities(
-        @Nonnull List<String> entities,
-        @Nonnull String input,
-        @Nullable Filter filter,
-        int start,
-        int count,
-        @Nonnull final Authentication authentication) throws RemoteInvocationException {
-
-        final EntitiesDoSearchAcrossEntitiesRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionSearchAcrossEntities()
-            .inputParam(input)
-            .startParam(start)
-            .countParam(count);
-
-        if (entities != null) {
-            requestBuilder.entitiesParam(new StringArray(entities));
-        }
-        if (filter != null) {
-            requestBuilder.filterParam(filter);
-        }
-
-        return sendClientRequest(requestBuilder, authentication).getEntity();
-    }
-
-    /**
-     * Gets browse path(s) given dataset urn
-     *
-     * @param urn urn for the entity
-     * @return list of paths given urn
-     * @throws RemoteInvocationException
-     */
-    @Nonnull
-    public StringArray getBrowsePaths(@Nonnull Urn urn, @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        EntitiesDoGetBrowsePathsRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS
-            .actionGetBrowsePaths()
-            .urnParam(urn);
-        return sendClientRequest(requestBuilder, authentication).getEntity();
-    }
+            .aspectParam(aspect)
+            .latestValueParam(getLatestValue);
 
-    public void setWritable(boolean canWrite, @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        EntitiesDoSetWritableRequestBuilder requestBuilder =
-            ENTITIES_REQUEST_BUILDERS.actionSetWritable().valueParam(canWrite);
-        sendClientRequest(requestBuilder, authentication);
+    if (startTimeMillis != null) {
+      requestBuilder.startTimeMillisParam(startTimeMillis);
     }
 
-    @Nonnull
-    public long getTotalEntityCount(@Nonnull String entityName, @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        EntitiesDoGetTotalEntityCountRequestBuilder requestBuilder =
-            ENTITIES_REQUEST_BUILDERS.actionGetTotalEntityCount().entityParam(entityName);
-        return sendClientRequest(requestBuilder, authentication).getEntity();
+    if (endTimeMillis != null) {
+      requestBuilder.endTimeMillisParam(endTimeMillis);
     }
 
-    @Nonnull
-    public Map<String, Long> batchGetTotalEntityCount(
-        @Nonnull List<String> entityName,
-        @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        EntitiesDoBatchGetTotalEntityCountRequestBuilder requestBuilder =
-            ENTITIES_REQUEST_BUILDERS.actionBatchGetTotalEntityCount().entitiesParam(new StringArray(entityName));
-        return sendClientRequest(requestBuilder, authentication).getEntity();
+    if (limit != null) {
+      requestBuilder.limitParam(limit);
     }
 
-    /**
-     * List all urns existing for a particular Entity type.
-     */
-    public ListUrnsResult listUrns(@Nonnull final String entityName, final int start, final int count, @Nonnull final Authentication authentication)
-        throws RemoteInvocationException {
-        EntitiesDoListUrnsRequestBuilder requestBuilder =
-            ENTITIES_REQUEST_BUILDERS.actionListUrns()
-                .entityParam(entityName)
-                .startParam(start)
-                .countParam(count);
-        return sendClientRequest(requestBuilder, authentication).getEntity();
+    if (getLatestValue != null) {
+      requestBuilder.latestValueParam(getLatestValue);
     }
 
-    /**
-     * Hard delete an entity with a particular urn.
-     */
-    public void deleteEntity(@Nonnull final Urn urn, @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        EntitiesDoDeleteRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionDelete()
-                .urnParam(urn.toString());
-        sendClientRequest(requestBuilder, authentication);
+    if (filter != null) {
+      requestBuilder.filterParam(filter);
     }
 
-    @Nonnull
-    @Override
-    public SearchResult filter(@Nonnull String entity, @Nonnull Filter filter, @Nullable SortCriterion sortCriterion,
-        int start, int count, @Nonnull final Authentication authentication) throws RemoteInvocationException {
-        EntitiesDoFilterRequestBuilder requestBuilder =
-            ENTITIES_REQUEST_BUILDERS.actionFilter()
-                .entityParam(entity)
-                .filterParam(filter)
-                .startParam(start)
-                .countParam(count);
-        if (sortCriterion != null) {
-            requestBuilder.sortParam(sortCriterion);
+    return sendClientRequest(requestBuilder, authentication).getEntity().getValues();
+  }
+
+  /**
+   * Ingest a MetadataChangeProposal event.
+   * @return
+   */
+  public String ingestProposal(@Nonnull final MetadataChangeProposal metadataChangeProposal,
+      @Nonnull final Authentication authentication) throws RemoteInvocationException {
+    final AspectsDoIngestProposalRequestBuilder requestBuilder =
+        ASPECTS_REQUEST_BUILDERS.actionIngestProposal().proposalParam(metadataChangeProposal);
+    return sendClientRequest(requestBuilder, authentication).getEntity();
+  }
+
+  public <T extends RecordTemplate> Optional<T> getVersionedAspect(@Nonnull String urn, @Nonnull String aspect,
+      @Nonnull Long version, @Nonnull Class<T> aspectClass, @Nonnull final Authentication authentication)
+      throws RemoteInvocationException {
+
+    AspectsGetRequestBuilder requestBuilder =
+        ASPECTS_REQUEST_BUILDERS.get().id(urn).aspectParam(aspect).versionParam(version);
+
+    try {
+      VersionedAspect entity = sendClientRequest(requestBuilder, authentication).getEntity();
+      if (entity.hasAspect()) {
+        DataMap rawAspect = ((DataMap) entity.data().get("aspect"));
+        if (rawAspect.containsKey(aspectClass.getCanonicalName())) {
+          DataMap aspectDataMap = rawAspect.getDataMap(aspectClass.getCanonicalName());
+          return Optional.of(RecordUtils.toRecordTemplate(aspectClass, aspectDataMap));
         }
-        return sendClientRequest(requestBuilder, authentication).getEntity();
-    }
-
-    /**
-     * Gets aspect at version for an entity
-     *
-     * @param urn urn for the entity
-     * @return list of paths given urn
-     * @throws RemoteInvocationException on remote request error.
-     */
-    @Nonnull
-    public VersionedAspect getAspect(
-        @Nonnull String urn,
-        @Nonnull String aspect,
-        @Nonnull Long version,
-        @Nonnull final Authentication authentication)
-        throws RemoteInvocationException {
-
-        AspectsGetRequestBuilder requestBuilder =
-            ASPECTS_REQUEST_BUILDERS.get().id(urn).aspectParam(aspect).versionParam(version);
-
-        return sendClientRequest(requestBuilder, authentication).getEntity();
-    }
-
-    /**
-     * Gets aspect at version for an entity, or null if one doesn't exist.
-     *
-     * @param urn urn for the entity
-     * @return list of paths given urn
-     * @throws RemoteInvocationException on remote request error.
-     */
-    @Nullable
-    public VersionedAspect getAspectOrNull(
-        @Nonnull String urn,
-        @Nonnull String aspect,
-        @Nonnull Long version,
-        @Nonnull final Authentication authentication)
-        throws RemoteInvocationException {
-
-        AspectsGetRequestBuilder requestBuilder =
-            ASPECTS_REQUEST_BUILDERS.get().id(urn).aspectParam(aspect).versionParam(version);
-        try {
-            return sendClientRequest(requestBuilder, authentication).getEntity();
-        } catch (RestLiResponseException e) {
-            if (e.getStatus() == HttpStatus.S_404_NOT_FOUND.getCode()) {
-                // Then the aspect was not found. Return null.
-                return null;
-            }
-            throw e;
-        }
-    }
-
-    /**
-     * Retrieve instances of a particular aspect.
-     *
-     * @param urn urn for the entity.
-     * @param entity the name of the entity.
-     * @param aspect the name of the aspect.
-     * @param startTimeMillis the earliest desired event time of the aspect value in milliseconds.
-     * @param endTimeMillis the latest desired event time of the aspect value in milliseconds.
-     * @param limit the maximum number of desired aspect values.
-     * @param authentication the actor associated with the request [internal]
-     * @return  the list of EnvelopedAspect values satisfying the input parameters.
-     * @throws RemoteInvocationException on remote request error.
-     */
-    @Nonnull
-    public List<EnvelopedAspect> getTimeseriesAspectValues(
-        @Nonnull String urn,
-        @Nonnull String entity,
-        @Nonnull String aspect,
-        @Nullable Long startTimeMillis,
-        @Nullable Long endTimeMillis,
-        @Nullable Integer limit,
-        @Nonnull final Authentication authentication
-    )
-        throws RemoteInvocationException {
-
-        AspectsDoGetTimeseriesAspectValuesRequestBuilder requestBuilder =
-            ASPECTS_REQUEST_BUILDERS.actionGetTimeseriesAspectValues()
-                .urnParam(urn)
-                .entityParam(entity)
-                .aspectParam(aspect);
-
-        if (startTimeMillis != null) {
-            requestBuilder.startTimeMillisParam(startTimeMillis);
-        }
-
-        if (endTimeMillis != null) {
-            requestBuilder.endTimeMillisParam(endTimeMillis);
-        }
-
-        if (limit != null) {
-            requestBuilder.limitParam(limit);
-        }
-        return sendClientRequest(requestBuilder, authentication).getEntity().getValues();
-    }
-
-    /**
-     * Ingest a MetadataChangeProposal event.
-     * @return
-     */
-    public String ingestProposal(@Nonnull final MetadataChangeProposal metadataChangeProposal, @Nonnull final Authentication authentication)
-        throws RemoteInvocationException {
-        final AspectsDoIngestProposalRequestBuilder requestBuilder = ASPECTS_REQUEST_BUILDERS.actionIngestProposal()
-            .proposalParam(metadataChangeProposal);
-        return sendClientRequest(requestBuilder, authentication).getEntity();
-    }
-
-    public <T extends RecordTemplate> Optional<T> getVersionedAspect(
-        @Nonnull String urn,
-        @Nonnull String aspect,
-        @Nonnull Long version,
-        @Nonnull Class<T> aspectClass,
-        @Nonnull final Authentication authentication)
-        throws RemoteInvocationException {
-
-        AspectsGetRequestBuilder requestBuilder =
-            ASPECTS_REQUEST_BUILDERS.get().id(urn).aspectParam(aspect).versionParam(version);
-
-        try {
-            VersionedAspect entity = sendClientRequest(requestBuilder, authentication).getEntity();
-            if (entity.hasAspect()) {
-                DataMap rawAspect = ((DataMap) entity.data().get("aspect"));
-                if (rawAspect.containsKey(aspectClass.getCanonicalName())) {
-                    DataMap aspectDataMap = rawAspect.getDataMap(aspectClass.getCanonicalName());
-                    return Optional.of(RecordUtils.toRecordTemplate(aspectClass, aspectDataMap));
-                }
-            }
-        } catch (RestLiResponseException e) {
-            if (e.getStatus() == 404) {
-                log.debug("Could not find aspect {} for entity {}", aspect, urn);
-                return Optional.empty();
-            } else {
-                // re-throw other exceptions
-                throw e;
-            }
-        }
-
+      }
+    } catch (RestLiResponseException e) {
+      if (e.getStatus() == 404) {
+        log.debug("Could not find aspect {} for entity {}", aspect, urn);
         return Optional.empty();
+      } else {
+        // re-throw other exceptions
+        throw e;
+      }
     }
 
-    @SneakyThrows
-    @Override
-    public DataMap getRawAspect(@Nonnull String urn, @Nonnull String aspect, @Nonnull Long version,
-        @Nonnull Authentication authentication) throws RemoteInvocationException {
-        throw new MethodNotSupportedException();
-    }
+    return Optional.empty();
+  }
+
+  @SneakyThrows
+  @Override
+  public DataMap getRawAspect(@Nonnull String urn, @Nonnull String aspect, @Nonnull Long version,
+      @Nonnull Authentication authentication) throws RemoteInvocationException {
+    throw new MethodNotSupportedException();
+  }
 }
diff --git a/metadata-service/restli-servlet-impl/build.gradle b/metadata-service/restli-servlet-impl/build.gradle
index 512ca3e51bfa0..80f2247b98de9 100644
--- a/metadata-service/restli-servlet-impl/build.gradle
+++ b/metadata-service/restli-servlet-impl/build.gradle
@@ -24,11 +24,11 @@ configurations {
 
 dependencies {
   constraints {
-    implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228") 
+    implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105") 
     }
-    implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
   }
 
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
index eb6c0428b5b8c..b7dfa27624742 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
@@ -9,6 +9,7 @@
 import com.linkedin.metadata.aspect.EnvelopedAspectArray;
 import com.linkedin.metadata.aspect.VersionedAspect;
 import com.linkedin.metadata.entity.EntityService;
+import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.entity.ValidationException;
 import com.linkedin.metadata.restli.RestliUtil;
 import com.linkedin.metadata.timeseries.TimeseriesAspectService;
@@ -34,8 +35,7 @@
 import javax.inject.Named;
 import lombok.extern.slf4j.Slf4j;
 
-import static com.linkedin.metadata.restli.RestliConstants.PARAM_LIMIT;
-import static com.linkedin.metadata.restli.RestliConstants.PARAM_URN;
+import static com.linkedin.metadata.restli.RestliConstants.*;
 
 
 /**
@@ -53,6 +53,7 @@ public class AspectResource extends CollectionResourceTaskTemplate<String, Versi
   private static final String PARAM_PROPOSAL = "proposal";
   private static final String PARAM_START_TIME_MILLIS = "startTimeMillis";
   private static final String PARAM_END_TIME_MILLIS = "endTimeMillis";
+  private static final String PARAM_LATEST_VALUE = "latestValue";
 
   private final Clock _clock = Clock.systemUTC();
 
@@ -92,7 +93,9 @@ public Task<GetTimeseriesAspectValuesResponse> getTimeseriesAspectValues(
       @ActionParam(PARAM_ASPECT) @Nonnull String aspectName,
       @ActionParam(PARAM_START_TIME_MILLIS) @Optional @Nullable Long startTimeMillis,
       @ActionParam(PARAM_END_TIME_MILLIS) @Optional @Nullable Long endTimeMillis,
-      @ActionParam(PARAM_LIMIT) @Optional("10000") int limit) throws URISyntaxException {
+      @ActionParam(PARAM_LIMIT) @Optional("10000") int limit,
+      @ActionParam(PARAM_LATEST_VALUE) @Optional("false") boolean latestValue,
+      @ActionParam(PARAM_FILTER) @Optional @Nullable Filter filter) throws URISyntaxException {
     log.info(
         "Get Timeseries Aspect values for aspect {} for entity {} with startTimeMillis {}, endTimeMillis {} and limit {}.",
         aspectName, entityName, startTimeMillis, endTimeMillis, limit);
@@ -109,8 +112,8 @@ public Task<GetTimeseriesAspectValuesResponse> getTimeseriesAspectValues(
       }
       response.setLimit(limit);
       response.setValues(new EnvelopedAspectArray(
-          _timeseriesAspectService.getAspectValues(urn, entityName, aspectName, startTimeMillis, endTimeMillis,
-              limit)));
+          _timeseriesAspectService.getAspectValues(urn, entityName, aspectName, startTimeMillis, endTimeMillis, limit,
+              latestValue, filter)));
       return response;
     }, MetricRegistry.name(this.getClass(), "getTimeseriesAspectValues"));
   }
@@ -120,7 +123,6 @@ public Task<GetTimeseriesAspectValuesResponse> getTimeseriesAspectValues(
   @WithSpan
   public Task<String> ingestProposal(
       @ActionParam(PARAM_PROPOSAL) @Nonnull MetadataChangeProposal metadataChangeProposal) throws URISyntaxException {
-
     log.info("INGEST PROPOSAL proposal: {}", metadataChangeProposal);
 
     // TODO: Use the actor present in the IC.
diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java
index 86da4b37dda15..fc77571e6efc1 100644
--- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java
+++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java
@@ -22,9 +22,10 @@
 // Return a 200 for health checks
 
 public class Config extends HttpServlet {
-  Map<String, String> config = new HashMap<String, String>() {{
+  Map<String, Object> config = new HashMap<String, Object>() {{
     put("noCode", "true");
     put("retention", "true");
+    put("statefulIngestionCapable", true);
   }};
   ObjectMapper objectMapper = new ObjectMapper().setSerializationInclusion(JsonInclude.Include.NON_NULL);
 
diff --git a/metadata-testing/metadata-models-test-utils/build.gradle b/metadata-testing/metadata-models-test-utils/build.gradle
index 7ea27afddb67e..f8ed8f146024b 100644
--- a/metadata-testing/metadata-models-test-utils/build.gradle
+++ b/metadata-testing/metadata-models-test-utils/build.gradle
@@ -12,11 +12,11 @@ dependencies {
   compile externalDependency.neo4jHarness
 
   constraints {
-    implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
-    implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
   }
 }
diff --git a/metadata-testing/metadata-test-utils/build.gradle b/metadata-testing/metadata-test-utils/build.gradle
index f337f1016647d..56172dd6ceae2 100644
--- a/metadata-testing/metadata-test-utils/build.gradle
+++ b/metadata-testing/metadata-test-utils/build.gradle
@@ -11,11 +11,11 @@ dependencies {
   compile externalDependency.neo4jHarness
 
   constraints {
-    implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
-    implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+    implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
   }
 }
diff --git a/metadata-utils/build.gradle b/metadata-utils/build.gradle
index 2f6566aef677b..61465f890fd03 100644
--- a/metadata-utils/build.gradle
+++ b/metadata-utils/build.gradle
@@ -28,11 +28,11 @@ dependencies {
   testCompile project(':metadata-testing:metadata-test-utils')
 
   constraints {
-      implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
-          because("previous versions are vulnerable to CVE-2021-44228")
+      implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
+          because("previous versions are vulnerable to CVE-2021-45105")
       }
-      implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
-        because("previous versions are vulnerable to CVE-2021-44228")
+      implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
+        because("previous versions are vulnerable to CVE-2021-45105")
     }
   }
   
diff --git a/smoke-test/requirements.txt b/smoke-test/requirements.txt
index 207ff3401005f..8bf1adfd48457 100644
--- a/smoke-test/requirements.txt
+++ b/smoke-test/requirements.txt
@@ -1,3 +1,3 @@
 pytest>=6.2
 pytest-dependency>=0.5.1
--e ../metadata-ingestion[datahub-rest,datahub-kafka]
+-e ../metadata-ingestion[datahub-rest,datahub-kafka,mysql]
\ No newline at end of file
diff --git a/smoke-test/test_e2e.py b/smoke-test/test_e2e.py
index 213f76f2fd6e1..d93632352e16f 100644
--- a/smoke-test/test_e2e.py
+++ b/smoke-test/test_e2e.py
@@ -1,10 +1,17 @@
 import time
+import urllib
+from typing import Any, Dict, Optional, cast
 
 import pytest
 import requests
-import urllib
+from sqlalchemy import create_engine
+from sqlalchemy.sql import text
+
 from datahub.cli.docker import check_local_docker_containers
 from datahub.ingestion.run.pipeline import Pipeline
+from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource
+from datahub.ingestion.source.sql.sql_common import BaseSQLAlchemyCheckpointState
+from datahub.ingestion.source.state.checkpoint import Checkpoint
 from tests.utils import ingest_file_via_rest
 
 GMS_ENDPOINT = "http://localhost:8080"
@@ -1270,4 +1277,99 @@ def test_generate_personal_access_token(frontend_session):
     res_data = response.json()
 
     assert res_data
-    assert "errors" in res_data # Assert the request fails
\ No newline at end of file
+    assert "errors" in res_data # Assert the request fails
+
+
+@pytest.mark.dependency(depends=["test_healthchecks"])
+def test_stateful_ingestion(wait_for_healthchecks):
+    def create_mysql_engine(mysql_source_config_dict: Dict[str, Any]) -> Any:
+        mysql_config = MySQLConfig.parse_obj(mysql_source_config_dict)
+        url = mysql_config.get_sql_alchemy_url()
+        return create_engine(url)
+
+    def create_table(engine: Any, name:str, defn:str) -> None:
+        create_table_query = text(f"CREATE TABLE IF NOT EXISTS {name}{defn};")
+        engine.execute(create_table_query)
+
+    def drop_table(engine: Any, table_name: str) -> None:
+        drop_table_query = text(f"DROP TABLE {table_name};")
+        engine.execute(drop_table_query)
+
+    def get_current_checkpoint_from_pipeline(
+            pipeline_config_dict: Dict[str, Any]
+    ) -> Optional[Checkpoint]:
+        pipeline = Pipeline.create(pipeline_config_dict)
+        pipeline.run()
+        pipeline.raise_from_status()
+        mysql_source = cast(MySQLSource, pipeline.source)
+        return mysql_source.get_current_checkpoint(
+            mysql_source.get_default_ingestion_job_id()
+        )
+
+    source_config_dict: Dict[str, Any] = {
+            "username": "datahub",
+            "password": "datahub",
+            "database": "datahub",
+            "stateful_ingestion": {
+                "enabled": True,
+                "remove_stale_metadata": True,
+                "state_provider": {
+                    "type": "datahub",
+                    "config": {"datahub_api": {"server": "http://localhost:8080"}},
+                },
+            },
+        }
+
+    pipeline_config_dict: Dict[str, Any] = {
+            "source": {
+                "type": "mysql",
+                "config": source_config_dict,
+            },
+            "sink": {
+                "type": "datahub-rest",
+                "config": {"server": "http://localhost:8080"},
+            },
+            "pipeline_name": "mysql_stateful_ingestion_smoke_test_pipeline",
+        }
+
+    # 1. Setup the SQL engine
+    mysql_engine = create_mysql_engine(source_config_dict)
+
+    # 2. Create test tables for first run of the  pipeline.
+    table_prefix = "stateful_ingestion_test"
+    table_defs = {
+        f"{table_prefix}_t1": "(id INT, name VARCHAR(10))",
+        f"{table_prefix}_t2": "(id INT)"
+    }
+    table_names = sorted(table_defs.keys())
+    for table_name, defn in table_defs.items():
+        create_table(mysql_engine, table_name, defn)
+
+    # 3. Do the first run of the pipeline and get the default job's checkpoint.
+    checkpoint1 = get_current_checkpoint_from_pipeline(
+        pipeline_config_dict=pipeline_config_dict
+    )
+    assert checkpoint1
+
+    # 4. Drop table t1 created during step 2 + rerun the pipeline and get the checkpoint state.
+    drop_table(mysql_engine, table_names[0])
+    checkpoint2 = get_current_checkpoint_from_pipeline(
+        pipeline_config_dict=pipeline_config_dict
+    )
+    assert checkpoint2
+
+    # 5. Perform all assertions on the states
+    state1 = cast(BaseSQLAlchemyCheckpointState, checkpoint1.state)
+    state2 = cast(BaseSQLAlchemyCheckpointState, checkpoint2.state)
+    difference_urns = list(state1.get_table_urns_not_in(state2))
+    assert len(difference_urns) == 1
+    assert (
+        difference_urns[0]
+        == "urn:li:dataset:(urn:li:dataPlatform:mysql,datahub.stateful_ingestion_test_t1,PROD)"
+    )
+
+    # 6. Perform all assertions on the config.
+    assert checkpoint1.config == checkpoint2.config
+
+    # 7. Cleanup table t2 as well to prevent other tests that rely on data in the smoke-test world.
+    drop_table(mysql_engine, table_names[1])
diff --git a/smoke-test/tests/utils.py b/smoke-test/tests/utils.py
index 3300cffc64fcc..b16931898fce7 100644
--- a/smoke-test/tests/utils.py
+++ b/smoke-test/tests/utils.py
@@ -7,7 +7,7 @@
 GMS_ENDPOINT = "http://localhost:8080"
 FRONTEND_ENDPOINT = "http://localhost:9002"
 
-def ingest_file_via_rest(filename: str):
+def ingest_file_via_rest(filename: str) -> None:
     pipeline = Pipeline.create(
         {
             "source": {
@@ -24,7 +24,7 @@ def ingest_file_via_rest(filename: str):
     pipeline.raise_from_status()
 
 
-def delete_urns_from_file(filename: str):
+def delete_urns_from_file(filename: str) -> None:
     session = requests.Session()
     session.headers.update(
         {
diff --git a/spark-lineage/README.md b/spark-lineage/README.md
index 89ef07d45df56..7dbe1dae4841d 100644
--- a/spark-lineage/README.md
+++ b/spark-lineage/README.md
@@ -11,7 +11,7 @@ When running jobs using spark-submit, the listener is to be configured in the co
 spark.master                                 spark://spark-master:7077
 
 #Configuring datahub spark listener jar
-spark.jars.packages			     io.acryl:spark-lineage:0.0.1
+spark.jars.packages			     io.acryl:datahub-spark-lineage:0.0.2
 spark.extraListeners                         com.linkedin.datahub.lineage.spark.interceptor.DatahubLineageEmitter
 spark.datahub.lineage.mcpEmitter.gmsUrl      http://localhost:8080
 ```
@@ -23,7 +23,7 @@ When running interactive jobs from a notebook, the listener can be configured wh
 spark = SparkSession.builder \
           .master("spark://spark-master:7077") \
           .appName("test-application") \
-          .config("spark.jars.packages","io.acryl:spark-lineage:0.0.1") \
+          .config("spark.jars.packages","io.acryl:datahub-spark-lineage:0.0.2") \
           .config("spark.extraListeners","com.linkedin.datahub.lineage.interceptor.spark.DatahubLineageEmitter") \
           .config("spark.datahub.lineage.mcpEmitter.gmsUrl", "http://localhost:8080") \
           .enableHiveSupport() \
@@ -42,7 +42,7 @@ The following custom properties in pipelines and tasks relate to the Spark UI:
 Other custom properties of pipelines and tasks capture the start and end times of execution etc. 
 The query plan is captured in the *queryPlan* property of a task.
 
-## Release notes for v0.0.1
+## Release notes for v0.0.2
 In this version, basic dataset-level lineage is captured using the model mapping as mentioned earlier.
 
 ### Spark versions supported