diff --git a/.gitignore b/.gitignore
index 631630d64c7fac..331dd6ee7fc76d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,7 +39,17 @@ MANIFEST
# Mac OS
**/.DS_Store
+#spark-lineage
+**/spark-lineage/metastore_db/
+**/spark-lineage/**/derby.log
+**/spark-lineage/**/hive/
+**/spark-lineage/**/out.csv/
.vscode
+# cypress integration test generated files
+**/cypress/videos
+**/cypress/screenshots
+**/cypress/node_modules
+
# Metadata Ingestion Generated
metadata-ingestion/generated/**
diff --git a/build.gradle b/build.gradle
index 77c30b3680b427..dc9ca7e85a0509 100644
--- a/build.gradle
+++ b/build.gradle
@@ -3,7 +3,6 @@ buildscript {
ext.gmaVersion = '0.2.81'
ext.pegasusVersion = '28.3.7'
ext.mavenVersion = '3.6.3'
-
apply from: './repositories.gradle'
buildscript.repositories.addAll(project.repositories)
dependencies {
@@ -11,6 +10,7 @@ buildscript {
classpath 'com.github.node-gradle:gradle-node-plugin:2.2.4'
classpath 'com.commercehub.gradle.plugin:gradle-avro-plugin:0.8.1'
classpath 'org.springframework.boot:spring-boot-gradle-plugin:2.1.4.RELEASE'
+ classpath 'com.github.jengelman.gradle.plugins:shadow:5.2.0'
}
}
@@ -68,6 +68,8 @@ project.ext.externalDependency = [
'guava': 'com.google.guava:guava:27.0.1-jre',
'h2': 'com.h2database:h2:1.4.196',
'hadoopClient': 'org.apache.hadoop:hadoop-client:3.1.1',
+ 'hadoopCommon':'org.apache.hadoop:hadoop-common:2.7.2',
+ 'hadoopMapreduceClient':'org.apache.hadoop:hadoop-mapreduce-client-core:2.7.2',
'hibernateCore': 'org.hibernate:hibernate-core:5.2.16.Final',
'httpClient': 'org.apache.httpcomponents:httpclient:4.5.9',
'iStackCommons': 'com.sun.istack:istack-commons-runtime:4.0.1',
@@ -114,6 +116,8 @@ project.ext.externalDependency = [
'rythmEngine': 'org.rythmengine:rythm-engine:1.3.0',
'servletApi': 'javax.servlet:javax.servlet-api:3.1.0',
'shiroCore': 'org.apache.shiro:shiro-core:1.7.1',
+ 'sparkSql' : 'org.apache.spark:spark-sql_2.11:2.4.8',
+ 'sparkHive' : 'org.apache.spark:spark-hive_2.11:2.4.8',
'springBeans': 'org.springframework:spring-beans:5.2.3.RELEASE',
'springContext': 'org.springframework:spring-context:5.2.3.RELEASE',
'springCore': 'org.springframework:spring-core:5.2.3.RELEASE',
@@ -129,7 +133,9 @@ project.ext.externalDependency = [
'testng': 'org.testng:testng:7.3.0',
'testContainers': 'org.testcontainers:testcontainers:1.15.1',
'testContainersJunit': 'org.testcontainers:junit-jupiter:1.15.1',
+ 'testContainersPostgresql':'org.testcontainers:postgresql:1.2.0',
'testContainersElasticsearch': 'org.testcontainers:elasticsearch:1.15.3',
+ 'wiremock':'com.github.tomakehurst:wiremock:2.10.0',
'zookeeper': 'org.apache.zookeeper:zookeeper:3.4.14'
]
diff --git a/datahub-web-react/src/images/metabaselogo.png b/datahub-web-react/src/images/metabaselogo.png
new file mode 100644
index 00000000000000..c158f33e020bbc
Binary files /dev/null and b/datahub-web-react/src/images/metabaselogo.png differ
diff --git a/docker/elasticsearch/env/docker.env b/docker/elasticsearch/env/docker.env
index 4239de21e50689..511e627dd152b3 100644
--- a/docker/elasticsearch/env/docker.env
+++ b/docker/elasticsearch/env/docker.env
@@ -1,3 +1,3 @@
discovery.type=single-node
xpack.security.enabled=false
-ES_JAVA_OPTS=-Xms256m -Xmx256m
+ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true
diff --git a/docker/monitoring/grafana/dashboards/datahub_dashboard.json b/docker/monitoring/grafana/dashboards/datahub_dashboard.json
index 9d19a1e76f2a9c..b25148bd1fd3d5 100644
--- a/docker/monitoring/grafana/dashboards/datahub_dashboard.json
+++ b/docker/monitoring/grafana/dashboards/datahub_dashboard.json
@@ -1,2080 +1,2161 @@
{
- "__inputs": [],
- "__requires": [
+ "annotations": {
+ "list": [
{
- "type": "grafana",
- "id": "grafana",
- "name": "Grafana",
- "version": "8.1.2"
- },
- {
- "type": "panel",
- "id": "timeseries",
- "name": "Time series",
- "version": ""
- }
- ],
- "annotations": {
- "list": [
- {
- "builtIn": 1,
- "datasource": "-- Grafana --",
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "target": {
- "limit": 100,
- "matchAny": false,
- "tags": [],
- "type": "dashboard"
- },
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [],
"type": "dashboard"
- }
- ]
- },
- "editable": true,
- "gnetId": null,
- "graphTooltip": 0,
- "id": null,
- "links": [],
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 0
- },
- "id": 37,
- "panels": [],
- "title": "Get",
- "type": "row"
+ },
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "id": 2,
+ "links": [],
+ "liveNow": false,
+ "panels": [
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "id": 37,
+ "panels": [],
+ "title": "Get",
+ "type": "row"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 0,
- "y": 1
- },
- "id": 40,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_get_Count{}[1m])/60",
- "interval": "",
- "legendFormat": "Get QPS",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_get_failed_Count{}[1m])/60",
- "hide": false,
- "interval": "",
- "legendFormat": "Get Failure",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_Count{}[1m])/60",
- "hide": false,
- "interval": "",
- "legendFormat": "BatchGet QPS",
- "refId": "C"
- },
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_failed_Count{}[1m])/60",
- "hide": false,
- "interval": "",
- "legendFormat": "BatchGet Failure",
- "refId": "D"
- }
- ],
- "title": "Get QPS",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 0,
+ "y": 1
+ },
+ "id": 40,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_get_Count{}[1m])/60",
+ "interval": "",
+ "legendFormat": "Get QPS",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_get_failed_Count{}[1m])/60",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Get Failure",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_Count{}[1m])/60",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "BatchGet QPS",
+ "refId": "C"
+ },
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_failed_Count{}[1m])/60",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "BatchGet Failure",
+ "refId": "D"
+ }
+ ],
+ "title": "Get QPS",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 7,
- "y": 1
- },
- "id": 41,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_Mean{}",
- "interval": "",
- "legendFormat": "Get Avg",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_75thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "Get P75",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_95thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "Get P95",
- "refId": "C"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_Mean{}",
- "hide": false,
- "interval": "",
- "legendFormat": "BatchGet Avg",
- "refId": "D"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_75thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "BatchGet P75",
- "refId": "E"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_95thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "BatchGet P95",
- "refId": "F"
- }
- ],
- "title": "Get Latency",
- "type": "timeseries"
+ "overrides": []
},
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 9
- },
- "id": 6,
- "panels": [],
- "title": "Ingest",
- "type": "row"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 7,
+ "y": 1
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "id": 41,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_Mean{}",
+ "interval": "",
+ "legendFormat": "Get Avg",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_75thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Get P75",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_95thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Get P95",
+ "refId": "C"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_Mean{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "BatchGet Avg",
+ "refId": "D"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_75thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "BatchGet P75",
+ "refId": "E"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_95thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "BatchGet P95",
+ "refId": "F"
+ }
+ ],
+ "title": "Get Latency",
+ "type": "timeseries"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 9
+ },
+ "id": 6,
+ "panels": [],
+ "title": "Ingest",
+ "type": "row"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 0,
- "y": 10
- },
- "id": 8,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_Count{}[1m])/60",
- "hide": false,
- "interval": "",
- "legendFormat": "Ingest Count",
- "refId": "E"
- },
- {
- "exemplar": false,
- "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchIngest_Count{}[1m])/60",
- "hide": false,
- "instant": false,
- "interval": "",
- "legendFormat": "BatchIngest Count",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_failed_Count[1m])/60",
- "hide": false,
- "interval": "",
- "legendFormat": "Ingest Failure",
- "refId": "C"
- },
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchIngest_failed_Count[1m])/60",
- "hide": false,
- "interval": "",
- "legendFormat": "BatchIngest Failure",
- "refId": "D"
- }
- ],
- "title": "Ingest QPS",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 0,
+ "y": 10
+ },
+ "id": 8,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_Count{}[1m])/60",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Ingest Count",
+ "refId": "E"
+ },
+ {
+ "exemplar": false,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchIngest_Count{}[1m])/60",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "legendFormat": "BatchIngest Count",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_failed_Count[1m])/60",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Ingest Failure",
+ "refId": "C"
+ },
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchIngest_failed_Count[1m])/60",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "BatchIngest Failure",
+ "refId": "D"
+ }
+ ],
+ "title": "Ingest QPS",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 7,
- "y": 10
- },
- "id": 10,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_Mean{}",
- "interval": "",
- "legendFormat": "Avg",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_75thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P75",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_95thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P95",
- "refId": "C"
- }
- ],
- "title": "Ingest Latency",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 7,
+ "y": 10
+ },
+ "id": 10,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_Mean{}",
+ "interval": "",
+ "legendFormat": "Avg",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_75thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P75",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_95thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P95",
+ "refId": "C"
+ }
+ ],
+ "title": "Ingest Latency",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 14,
- "y": 10
- },
- "id": 21,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_entity_ebean_EbeanEntityService_ingestAspectToLocalDB_Mean{}",
- "hide": false,
- "interval": "",
- "legendFormat": "Ingest To DB",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_entity_ebean_EbeanEntityService_produceMAE_Mean{}",
- "hide": false,
- "interval": "",
- "legendFormat": "Produce MAE",
- "refId": "C"
- }
- ],
- "title": "Ingest Steps",
- "type": "timeseries"
+ "overrides": []
},
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 18
- },
- "id": 12,
- "panels": [],
- "title": "Search",
- "type": "row"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 14,
+ "y": 10
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "id": 21,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_entity_ebean_EbeanEntityService_ingestAspectToLocalDB_Mean{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Ingest To DB",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_entity_ebean_EbeanEntityService_produceMAE_Mean{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Produce MAE",
+ "refId": "C"
+ }
+ ],
+ "title": "Ingest Steps",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 0,
- "y": 19
- },
- "id": 23,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Count{}[1m])/60",
- "interval": "",
- "legendFormat": "QPS",
- "refId": "A"
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 0,
+ "y": 18
+ },
+ "id": 43,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_maeProcess_Mean",
+ "interval": "",
+ "legendFormat": "Avg",
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
},
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_search_failed_Count{}[1m])/60",
- "hide": false,
- "interval": "",
- "legendFormat": "Failure",
- "refId": "B"
- }
- ],
- "title": "Search QPS",
- "type": "timeseries"
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_maeProcess_75thPercentile",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P75",
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_maeProcess_95thPercentile",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P95",
+ "refId": "C"
+ }
+ ],
+ "title": "MAE Process Latency",
+ "type": "timeseries"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 26
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "id": 12,
+ "panels": [],
+ "title": "Search",
+ "type": "row"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 7,
- "y": 19
- },
- "id": 29,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Mean{}",
- "interval": "",
- "legendFormat": "Avg",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_75thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P75",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_95thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P95",
- "refId": "C"
- }
- ],
- "title": "Search Latency",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 0,
+ "y": 27
+ },
+ "id": 23,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Count{}[1m])/60",
+ "interval": "",
+ "legendFormat": "QPS",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_search_failed_Count{}[1m])/60",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Failure",
+ "refId": "B"
+ }
+ ],
+ "title": "Search QPS",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 14,
- "y": 19
- },
- "id": 25,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESSearchDAO_esSearch_Mean{}",
- "hide": false,
- "interval": "",
- "legendFormat": "ES Search",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESSearchDAO_searchRequest_Mean{}",
- "hide": false,
- "interval": "",
- "legendFormat": "Request Builder",
- "refId": "D"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Mean{}",
- "hide": false,
- "interval": "",
- "legendFormat": "Total Search",
- "refId": "B"
- }
- ],
- "title": "Search Steps",
- "type": "timeseries"
+ "overrides": []
},
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 27
- },
- "id": 27,
- "panels": [],
- "title": "Browse",
- "type": "row"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 7,
+ "y": 27
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "id": 29,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Mean{}",
+ "interval": "",
+ "legendFormat": "Avg",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_75thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P75",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_95thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P95",
+ "refId": "C"
+ }
+ ],
+ "title": "Search Latency",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 0,
- "y": 28
- },
- "id": 28,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Count{}[1m])/60",
- "interval": "",
- "legendFormat": "QPS",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_failed_Count{}[1m])/60",
- "hide": false,
- "interval": "",
- "legendFormat": "Failure",
- "refId": "B"
- }
- ],
- "title": "Browse QPS",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 14,
+ "y": 27
+ },
+ "id": 25,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESSearchDAO_esSearch_Mean{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "ES Search",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESSearchDAO_searchRequest_Mean{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Request Builder",
+ "refId": "D"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Mean{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Total Search",
+ "refId": "B"
+ }
+ ],
+ "title": "Search Steps",
+ "type": "timeseries"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 35
+ },
+ "id": 27,
+ "panels": [],
+ "title": "Browse",
+ "type": "row"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 7,
- "y": 28
- },
- "id": 24,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Mean{}",
- "interval": "",
- "legendFormat": "Avg",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_75thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P75",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_95thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P95",
- "refId": "C"
- }
- ],
- "title": "Browse Latency",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 0,
+ "y": 36
+ },
+ "id": 28,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Count{}[1m])/60",
+ "interval": "",
+ "legendFormat": "QPS",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_failed_Count{}[1m])/60",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Failure",
+ "refId": "B"
+ }
+ ],
+ "title": "Browse QPS",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 14,
- "y": 28
- },
- "id": 35,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESBrowseDAO_esGroupSearch_Mean{}",
- "hide": false,
- "interval": "",
- "legendFormat": "ES Groups Query",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESBrowseDAO_esEntitiesSearch_Mean{}",
- "hide": false,
- "interval": "",
- "legendFormat": "ES Entities Query",
- "refId": "D"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Mean{}",
- "hide": false,
- "interval": "",
- "legendFormat": "Total Browse",
- "refId": "B"
- }
- ],
- "title": "Browse Steps",
- "type": "timeseries"
+ "overrides": []
},
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 36
- },
- "id": 32,
- "panels": [],
- "title": "Graph",
- "type": "row"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 7,
+ "y": 36
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "id": 24,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Mean{}",
+ "interval": "",
+ "legendFormat": "Avg",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_75thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P75",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_95thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P95",
+ "refId": "C"
+ }
+ ],
+ "title": "Browse Latency",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 0,
- "y": 37
- },
- "id": 33,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_Count{}[1m])/60",
- "interval": "",
- "legendFormat": "Relationships QPS",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Lineage_get_Count{}[1m])/60",
- "hide": false,
- "interval": "",
- "legendFormat": "Lineage QPS",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_failed_Count{}[1m])/60",
- "hide": false,
- "interval": "",
- "legendFormat": "Relationships Failure",
- "refId": "C"
- },
- {
- "exemplar": true,
- "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Lineage_get_failed_Count{}[1m])/60",
- "hide": false,
- "interval": "",
- "legendFormat": "Lineage Failure",
- "refId": "D"
- }
- ],
- "title": "Graph QPS",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 14,
+ "y": 36
+ },
+ "id": 35,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESBrowseDAO_esGroupSearch_Mean{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "ES Groups Query",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESBrowseDAO_esEntitiesSearch_Mean{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "ES Entities Query",
+ "refId": "D"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Mean{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Total Browse",
+ "refId": "B"
+ }
+ ],
+ "title": "Browse Steps",
+ "type": "timeseries"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 44
+ },
+ "id": 32,
+ "panels": [],
+ "title": "Graph",
+ "type": "row"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 7,
- "x": 7,
- "y": 37
- },
- "id": 34,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_Mean{}",
- "interval": "",
- "legendFormat": "Avg",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_75thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P75",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_95thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P95",
- "refId": "C"
- }
- ],
- "title": "Graph Latency",
- "type": "timeseries"
+ "overrides": []
},
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 45
- },
- "id": 4,
- "panels": [],
- "title": "Kafka",
- "type": "row"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 0,
+ "y": 45
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "id": 33,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_Count{}[1m])/60",
+ "interval": "",
+ "legendFormat": "Relationships QPS",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Lineage_get_Count{}[1m])/60",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Lineage QPS",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_failed_Count{}[1m])/60",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Relationships Failure",
+ "refId": "C"
+ },
+ {
+ "exemplar": true,
+ "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Lineage_get_failed_Count{}[1m])/60",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "Lineage Failure",
+ "refId": "D"
+ }
+ ],
+ "title": "Graph QPS",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 46
- },
- "id": 18,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "sum by (topic) (kafka_producer_producer_topic_metrics_record_send_rate{})",
- "interval": "",
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "title": "Producer Metrics",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 7,
+ "y": 45
+ },
+ "id": 34,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_Mean{}",
+ "interval": "",
+ "legendFormat": "Avg",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_75thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P75",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_95thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P95",
+ "refId": "C"
+ }
+ ],
+ "title": "Graph Latency",
+ "type": "timeseries"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 53
+ },
+ "id": 4,
+ "panels": [],
+ "title": "Kafka",
+ "type": "row"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 46
- },
- "id": 19,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "sum by (client_id) (kafka_consumer_consumer_metrics_request_rate{})",
- "interval": "",
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "title": "Consumer Metrics",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 54
+ },
+ "id": 18,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum by (topic) (kafka_producer_producer_topic_metrics_record_send_rate{})",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Producer Metrics",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 54
- },
- "id": 15,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeEventsProcessor_kafkaLag_Mean{}",
- "interval": "",
- "legendFormat": "Avg",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeEventsProcessor_kafkaLag_75thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P75",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeEventsProcessor_kafkaLag_95thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P95",
- "refId": "C"
- }
- ],
- "title": "MetadataChangeEvent Topic Lag",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 54
+ },
+ "id": 19,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum by (client_id) (kafka_consumer_consumer_metrics_request_rate{})",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Consumer Metrics",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 12,
- "y": 54
- },
- "id": 13,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeProposalsProcessor_kafkaLag_Mean{}",
- "interval": "",
- "legendFormat": "Avg",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeProposalsProcessor_kafkaLag_75thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P75",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeProposalsProcessor_kafkaLag_95thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P95",
- "refId": "C"
- }
- ],
- "title": "MetadataChangeProposal Topic Lag",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 62
+ },
+ "id": 15,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeEventsProcessor_kafkaLag_Mean{}",
+ "interval": "",
+ "legendFormat": "Avg",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeEventsProcessor_kafkaLag_75thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P75",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeEventsProcessor_kafkaLag_95thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P95",
+ "refId": "C"
+ }
+ ],
+ "title": "MetadataChangeEvent Topic Lag",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 63
- },
- "id": 2,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_kafkaLag_Mean{}",
- "interval": "",
- "legendFormat": "Avg",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_kafkaLag_75thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P75",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_kafkaLag_95thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P95",
- "refId": "C"
- }
- ],
- "title": "MetadataAuditEvent Topic Lag",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 62
+ },
+ "id": 13,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeProposalsProcessor_kafkaLag_Mean{}",
+ "interval": "",
+ "legendFormat": "Avg",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeProposalsProcessor_kafkaLag_75thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P75",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeProposalsProcessor_kafkaLag_95thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P95",
+ "refId": "C"
+ }
+ ],
+ "title": "MetadataChangeProposal Topic Lag",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 12,
- "y": 63
- },
- "id": 14,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_kafkaLag_Mean{}",
- "interval": "",
- "legendFormat": "Avg",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_kafkaLag_75thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P75",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_kafkaLag_95thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P95",
- "refId": "C"
- }
- ],
- "title": "MetadataChangeLog Topic Lag",
- "type": "timeseries"
+ "overrides": []
},
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 71
+ },
+ "id": 2,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_kafkaLag_Mean{}",
+ "interval": "",
+ "legendFormat": "Avg",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_kafkaLag_75thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P75",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_kafkaLag_95thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P95",
+ "refId": "C"
+ }
+ ],
+ "title": "MetadataAuditEvent Topic Lag",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
},
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
},
- "thresholdsStyle": {
- "mode": "off"
+ {
+ "color": "red",
+ "value": 80
}
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 71
+ },
+ "id": 14,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_kafkaLag_Mean{}",
+ "interval": "",
+ "legendFormat": "Avg",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_kafkaLag_75thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P75",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_kafkaLag_95thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P95",
+ "refId": "C"
+ }
+ ],
+ "title": "MetadataChangeLog Topic Lag",
+ "type": "timeseries"
+ },
+ {
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "thresholdsStyle": {
+ "mode": "off"
}
},
- "overrides": []
- },
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 72
- },
- "id": 16,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single"
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
}
},
- "targets": [
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_DataHubUsageEventsProcessor_kafkaLag_Mean{}",
- "interval": "",
- "legendFormat": "Avg",
- "refId": "A"
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 80
+ },
+ "id": 16,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
},
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_DataHubUsageEventsProcessor_kafkaLag_75thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P75",
- "refId": "B"
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_DataHubUsageEventsProcessor_kafkaLag_Mean{}",
+ "interval": "",
+ "legendFormat": "Avg",
+ "refId": "A"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
},
- {
- "exemplar": true,
- "expr": "metrics_com_linkedin_metadata_kafka_DataHubUsageEventsProcessor_kafkaLag_95thPercentile{}",
- "hide": false,
- "interval": "",
- "legendFormat": "P95",
- "refId": "C"
- }
- ],
- "title": "DataHubUsageEvent Topic Lag",
- "type": "timeseries"
- }
- ],
- "refresh": "5s",
- "schemaVersion": 30,
- "style": "dark",
- "tags": [],
- "templating": {
- "list": []
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {},
- "timezone": "",
- "title": "DataHub Dashboard",
- "uid": "x4fS54Vnk",
- "version": 3
- }
\ No newline at end of file
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_DataHubUsageEventsProcessor_kafkaLag_75thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P75",
+ "refId": "B"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "exemplar": true,
+ "expr": "metrics_com_linkedin_metadata_kafka_DataHubUsageEventsProcessor_kafkaLag_95thPercentile{}",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "P95",
+ "refId": "C"
+ }
+ ],
+ "title": "DataHubUsageEvent Topic Lag",
+ "type": "timeseries"
+ }
+ ],
+ "refresh": "5s",
+ "schemaVersion": 33,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "",
+ "title": "DataHub Dashboard",
+ "uid": "x4fS54Vnk",
+ "version": 1,
+ "weekStart": ""
+}
\ No newline at end of file
diff --git a/docker/monitoring/prometheus.yaml b/docker/monitoring/prometheus.yaml
index 6916f60a8af915..c28a38fef1e7fe 100644
--- a/docker/monitoring/prometheus.yaml
+++ b/docker/monitoring/prometheus.yaml
@@ -3,6 +3,7 @@ scrape_configs:
scrape_interval: 10s
static_configs:
- targets:
+ - 'host.docker.internal:4318'
- 'datahub-gms:4318'
- 'datahub-mae-consumer:4318'
- 'datahub-mce-consumer:4318'
\ No newline at end of file
diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
index 7a70b77bc6f6e6..88ca38c63a6df3 100644
--- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
+++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
@@ -71,7 +71,7 @@ services:
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- - ES_JAVA_OPTS=-Xms256m -Xmx256m
+ - ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true
healthcheck:
retries: 4
start_period: 2m
diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml
index 1232e63a69add0..918fa336768f1f 100644
--- a/docker/quickstart/docker-compose.quickstart.yml
+++ b/docker/quickstart/docker-compose.quickstart.yml
@@ -75,7 +75,7 @@ services:
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- - ES_JAVA_OPTS=-Xms256m -Xmx256m
+ - ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true
healthcheck:
retries: 4
start_period: 2m
diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index e20d407991eaba..2f59da6a10f8ab 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -98,6 +98,7 @@ module.exports = {
"docs/lineage/airflow",
"docker/airflow/local_airflow",
"docs/lineage/sample_code",
+ "spark-lineage/README",
],
},
{
@@ -227,6 +228,7 @@ module.exports = {
{
Advanced: [
"docs/advanced/no-code-modeling",
+ "docs/advanced/db-retention",
"docs/advanced/aspect-versioning",
"docs/advanced/es-7-upgrade",
"docs/advanced/high-cardinality",
diff --git a/docs/advanced/db-retention.md b/docs/advanced/db-retention.md
new file mode 100644
index 00000000000000..41f59bfcf93eed
--- /dev/null
+++ b/docs/advanced/db-retention.md
@@ -0,0 +1,79 @@
+# Configuring Database Retention
+
+## Goal
+
+DataHub uses a database (or key-value store) to store different versions of the aspects as they get ingested. Storing
+multiple versions of the aspects allows us to look at the history of how the aspect changed and to rollback to previous
+version when incorrect metadata gets ingested. However, each version takes up space in the database, while bringing less
+value to the system. We need to be able to impose **retention** on these records to keep the size of the DB in check.
+
+Goal of the retention system is to be able to **configure and enforce retention policies** on documents in various
+levels (
+global, entity-level, aspect-level)
+
+## What type of retention policies are supported?
+
+We support 3 types of retention policies.
+
+1. Indefinite retention: Keep all versions of aspects
+2. Version-based retention: Keep the latest N versions
+3. Time-based retention: Keep versions that have been ingested in the last N seconds
+
+Note, the latest version of each aspect (version 0) is never deleted. This is to ensure that we do not impact the core
+functionality of DataHub while applying retention.
+
+## When is the retention policy applied?
+
+As of now, retention policies are applied in two places
+
+1. **GMS boot-up**: On boot, it runs a bootstrap step to ingest the predefined set of retention policies. If there were
+ no existing policies or the existing policies got updated, it will trigger an asynchronous call to apply retention
+ to **
+ all** records in the database.
+2. **Ingest**: On every ingest, if an existing aspect got updated, it applies retention to the urn, aspect pair being
+ ingested.
+
+We are planning to support a cron-based application of retention in the near future to ensure that the time-based
+retention is applied correctly.
+
+## How to configure?
+
+For the initial iteration, we have made this feature opt-in. Please set **ENTITY_SERVICE_ENABLE_RETENTION=true** when
+creating the datahub-gms container/k8s pod.
+
+On GMS start up, it fetches the list of retention policies to ingest from two sources. First is the default we provide,
+which adds a version-based retention to keep 20 latest aspects for all entity-aspect pairs. Second, we read YAML files
+from the `/etc/datahub/plugins/retention` directory and overlay them on the default set of policies we provide.
+
+For docker, we set docker-compose to mount `${HOME}/.datahub/plugins` directory to `/etc/datahub/plugins` directory
+within the containers, so you can customize the initial set of retention policies by creating
+a `${HOME}/.datahub/plugins/retention/retention.yaml` file.
+
+We will support a standardized way to do this in kubernetes setup in the near future.
+
+The format for the YAML file is as follows.
+
+```yaml
+- entity: "*" # denotes that policy will be applied to all entities
+ aspect: "*" # denotes that policy will be applied to all aspects
+ config:
+ retention:
+ version:
+ maxVersions: 20
+- entity: "dataset"
+ aspect: "datasetProperties"
+ config:
+ retention:
+ version:
+ maxVersions: 20
+ time:
+ maxAgeInSeconds: 2592000 # 30 days
+```
+
+Note, it searches for the policies corresponding to the entity, aspect pair in the following order
+1. entity, aspect
+2. *, aspect
+3. entity, *
+4. *, *
+
+By restarting datahub-gms after creating the plugin yaml file, the new set of retention policies will be applied.
\ No newline at end of file
diff --git a/docs/quickstart.md b/docs/quickstart.md
index b4256dfbbb48ca..05b083ce47b45b 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -52,6 +52,8 @@ To cleanse DataHub of all of it's state (e.g. before ingesting your own), you ca
datahub docker nuke
```
+If you want to delete the containers but keep the data you can add `--keep-data` flag to the command. This allows you to run the `quickstart` command to get DataHub running with your data that was ingested earlier.
+
## Troubleshooting
### Command not found: datahub
diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md
index 6ce0128179fad4..d19ccac08abd5d 100644
--- a/metadata-ingestion/README.md
+++ b/metadata-ingestion/README.md
@@ -49,6 +49,7 @@ Sources:
| [ldap](./source_docs/ldap.md) | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source |
| [looker](./source_docs/looker.md) | `pip install 'acryl-datahub[looker]'` | Looker source |
| [lookml](./source_docs/lookml.md) | `pip install 'acryl-datahub[lookml]'` | LookML source, requires Python 3.7+ |
+| [metabase](./source_docs/metabase.md) | `pip install 'acryl-datahub[metabase]` | Metabase source |
| [mode](./source_docs/mode.md) | `pip install 'acryl-datahub[mode]'` | Mode Analytics source |
| [mongodb](./source_docs/mongodb.md) | `pip install 'acryl-datahub[mongodb]'` | MongoDB source |
| [mssql](./source_docs/mssql.md) | `pip install 'acryl-datahub[mssql]'` | SQL Server source |
diff --git a/metadata-ingestion/examples/recipes/metabase_to_rest.yml b/metadata-ingestion/examples/recipes/metabase_to_rest.yml
new file mode 100644
index 00000000000000..6d4a7e841b3803
--- /dev/null
+++ b/metadata-ingestion/examples/recipes/metabase_to_rest.yml
@@ -0,0 +1,16 @@
+# see https://datahubproject.io/docs/metadata-ingestion/source_docs/metabase for complete documentation
+source:
+ type: "metabase"
+ config:
+ username: admin@metabase.com
+ password: admin12345
+ connect_uri: http://localhost:3000/
+ default_schema: public
+ database_alias_map:
+ h2: sample-dataset.db
+
+# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
+sink:
+ type: "datahub-rest"
+ config:
+ server: "http://localhost:8080"
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index b3a776f511f1f5..c0afd76928d814 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -112,6 +112,7 @@ def get_long_description():
"ldap": {"python-ldap>=2.4"},
"looker": looker_common,
"lookml": looker_common | {"lkml>=1.1.0", "sql-metadata==2.2.2"},
+ "metabase": {"requests"},
"mode": {"requests", "sqllineage"},
"mongodb": {"pymongo>=3.11"},
"mssql": sql_common | {"sqlalchemy-pytds>=0.3"},
@@ -298,6 +299,7 @@ def get_long_description():
"snowflake-usage = datahub.ingestion.source.usage.snowflake_usage:SnowflakeUsageSource",
"superset = datahub.ingestion.source.superset:SupersetSource",
"openapi = datahub.ingestion.source.openapi:OpenApiSource",
+ "metabase = datahub.ingestion.source.metabase:MetabaseSource",
"trino = datahub.ingestion.source.sql.trino:TrinoSource",
"starburst-trino-usage = datahub.ingestion.source.usage.starburst_trino_usage:TrinoUsageSource",
"nifi = datahub.ingestion.source.nifi:NifiSource",
diff --git a/metadata-ingestion/source_docs/metabase.md b/metadata-ingestion/source_docs/metabase.md
new file mode 100644
index 00000000000000..b62765618733d9
--- /dev/null
+++ b/metadata-ingestion/source_docs/metabase.md
@@ -0,0 +1,99 @@
+# Metabase
+
+For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
+
+## Setup
+
+To install this plugin, run `pip install 'acryl-datahub[metabase]'`.
+
+See documentation for Metabase's API at https://www.metabase.com/learn/administration/metabase-api.html
+for more details on Metabase's login api.
+
+
+## Capabilities
+
+This plugin extracts Charts, dashboards, and associated metadata. This plugin is in beta and has only been tested
+on PostgreSQL and H2 database.
+
+### Dashboard
+
+[/api/dashboard](https://www.metabase.com/docs/latest/api-documentation.html#dashboard) endpoint is used to
+retrieve the following dashboard information.
+
+- Title and description
+- Last edited by
+- Owner
+- Link to the dashboard in Metabase
+- Associated charts
+
+### Chart
+
+[/api/card](https://www.metabase.com/docs/latest/api-documentation.html#card) endpoint is used to
+retrieve the following information.
+
+- Title and description
+- Last edited by
+- Owner
+- Link to the chart in Metabase
+- Datasource and lineage
+
+The following properties for a chart are ingested in DataHub.
+
+| Name | Description |
+| ------------- | ----------------------------------------------- |
+| `Dimensions` | Column names |
+| `Filters` | Any filters applied to the chart |
+| `Metrics` | All columns that are being used for aggregation |
+
+
+## Quickstart recipe
+
+Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
+
+For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
+
+```yml
+source:
+ type: metabase
+ config:
+ # Coordinates
+ connect_uri: http://localhost:3000
+
+ # Credentials
+ username: user
+ password: pass
+
+ # Options
+ default_schema: public
+ database_alias_map:
+ h2: sample-dataset.db
+
+sink:
+ # sink configs
+```
+
+## Config details
+
+
+| Field | Required | Default | Description |
+| -------------------- | -------- | ------------------ |------------------------------------------------------------------------|
+| `connect_uri` | ✅ | `"localhost:8088"` | Metabase host URL. |
+| `username` | ✅ | | Metabase username. |
+| `password` | ✅ | | Metabase password. |
+| `database_alias_map` | | | Database name map to use when constructing dataset URN. |
+| `default_schema` | | `public` | Default schema name to use when schema is not provided in an SQL query |
+| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. |
+
+
+DataHub will try to determine database name from Metabase [api/database](https://www.metabase.com/docs/latest/api-documentation.html#database)
+payload. However, the name can be overridden from `database_alias_map` for a given database connected to Metabase.
+
+## Compatibility
+
+Metabase version [v0.41.2](https://www.metabase.com/start/oss/)
+
+
+## Questions
+
+If you've got any questions on configuring this source, feel free to ping us on
+[our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md
index 1947998fc1bbd1..8c6989bbd548b2 100644
--- a/metadata-ingestion/source_docs/snowflake.md
+++ b/metadata-ingestion/source_docs/snowflake.md
@@ -139,19 +139,23 @@ sink:
Note that a `.` is used to denote nested fields in the YAML recipe.
-| Field | Required | Default | Description |
-| ----------------- | -------- | -------------------------------------------------------------- | --------------------------------------------------------------- |
-| `username` | | | Snowflake username. |
-| `password` | | | Snowflake password. |
-| `host_port` | ✅ | | Snowflake host URL. |
-| `warehouse` | | | Snowflake warehouse. |
-| `role` | | | Snowflake role. |
-| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. |
-| `bucket_duration` | | `"DAY"` | Duration to bucket usage events by. Can be `"DAY"` or `"HOUR"`. |
-| `start_time` | | Last full day in UTC (or hour, depending on `bucket_duration`) | Earliest date of usage logs to consider. |
-| `end_time` | | Last full day in UTC (or hour, depending on `bucket_duration`) | Latest date of usage logs to consider. |
-| `top_n_queries` | | `10` | Number of top queries to save to each table. |
+| Field | Required | Default | Description |
+| ----------------- | -------- | --------------------------------------------------------------------| --------------------------------------------------------------- |
+| `username` | | | Snowflake username. |
+| `password` | | | Snowflake password. |
+| `host_port` | ✅ | | Snowflake host URL. |
+| `warehouse` | | | Snowflake warehouse. |
+| `role` | | | Snowflake role. |
+| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. |
+| `bucket_duration` | | `"DAY"` | Duration to bucket usage events by. Can be `"DAY"` or `"HOUR"`. |
+| `start_time` | | Last full day in UTC (or hour, depending on `bucket_duration`) | Earliest date of usage logs to consider. |
+| `end_time` | | Last full day in UTC (or hour, depending on `bucket_duration`) | Latest date of usage logs to consider. |
+| `top_n_queries` | | `10` | Number of top queries to save to each table. |
+| `database_pattern`| | `"^UTIL_DB$" `
`"^SNOWFLAKE$"`
`"^SNOWFLAKE_SAMPLE_DATA$" | Allow/deny patterns for db in snowflake dataset names. |
+| `schema_pattern` | | | Allow/deny patterns for schema in snowflake dataset names. |
+| `view_pattern` | | | Allow/deny patterns for views in snowflake dataset names. |
+| `table_pattern` | | | Allow/deny patterns for tables in snowflake dataset names. |
### Compatibility
Coming soon!
diff --git a/metadata-ingestion/src/datahub/cli/docker.py b/metadata-ingestion/src/datahub/cli/docker.py
index 1c44c477481704..c94f50dd2e5fe8 100644
--- a/metadata-ingestion/src/datahub/cli/docker.py
+++ b/metadata-ingestion/src/datahub/cli/docker.py
@@ -365,7 +365,14 @@ def ingest_sample_data(path: Optional[str]) -> None:
@docker.command()
@telemetry.with_telemetry
-def nuke() -> None:
+@click.option(
+ "--keep-data",
+ type=bool,
+ is_flag=True,
+ default=False,
+ help="Delete data volumes",
+)
+def nuke(keep_data: bool) -> None:
"""Remove all Docker containers, networks, and volumes associated with DataHub."""
with get_client_with_error() as (client, error):
@@ -381,11 +388,14 @@ def nuke() -> None:
):
container.remove(v=True, force=True)
- click.echo("Removing volumes in the datahub project")
- for volume in client.volumes.list(
- filters={"label": "com.docker.compose.project=datahub"}
- ):
- volume.remove(force=True)
+ if keep_data:
+ click.echo("Skipping deleting data volumes in the datahub project")
+ else:
+ click.echo("Removing volumes in the datahub project")
+ for volume in client.volumes.list(
+ filters={"label": "com.docker.compose.project=datahub"}
+ ):
+ volume.remove(force=True)
click.echo("Removing networks in the datahub project")
for network in client.networks.list(
diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py
index f33546358e2201..b3d974f8704ddd 100644
--- a/metadata-ingestion/src/datahub/emitter/mce_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py
@@ -32,15 +32,9 @@ def get_sys_time() -> int:
return int(time.time() * 1000)
-def _check_data_platform_name(platform_name: str) -> None:
- if not platform_name.isalpha():
- logger.warning(f"improperly formatted data platform: {platform_name}")
-
-
def make_data_platform_urn(platform: str) -> str:
if platform.startswith("urn:li:dataPlatform:"):
return platform
- _check_data_platform_name(platform)
return f"urn:li:dataPlatform:{platform}"
@@ -94,13 +88,11 @@ def make_data_job_urn(
def make_dashboard_urn(platform: str, name: str) -> str:
# FIXME: dashboards don't currently include data platform urn prefixes.
- _check_data_platform_name(platform)
return f"urn:li:dashboard:({platform},{name})"
def make_chart_urn(platform: str, name: str) -> str:
# FIXME: charts don't currently include data platform urn prefixes.
- _check_data_platform_name(platform)
return f"urn:li:chart:({platform},{name})"
diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index cc6fadeb982adc..ec03b0f0779f3e 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -6,7 +6,6 @@
from avrogen.dict_wrapper import DictWrapper
from requests.models import HTTPError
-from requests.sessions import Session
from datahub.configuration.common import ConfigModel, OperationalError
from datahub.emitter.rest_emitter import DatahubRestEmitter
@@ -40,11 +39,10 @@ def __init__(self, config: DatahubClientConfig) -> None:
ca_certificate_path=self.config.ca_certificate_path,
)
self.test_connection()
- self.g_session = Session()
def _get_generic(self, url: str) -> Dict:
try:
- response = self.g_session.get(url)
+ response = self._session.get(url)
response.raise_for_status()
return response.json()
except HTTPError as e:
@@ -67,7 +65,7 @@ def get_aspect(
aspect_type: Type[Aspect],
) -> Optional[Aspect]:
url = f"{self._gms_server}/aspects/{urllib.parse.quote(entity_urn)}?aspect={aspect}&version=0"
- response = self.g_session.get(url)
+ response = self._session.get(url)
if response.status_code == 404:
# not found
return None
@@ -103,7 +101,7 @@ def get_usage_aspects_from_urn(
url = f"{self._gms_server}/aspects?action=getTimeseriesAspectValues"
try:
usage_aspects: List[DatasetUsageStatisticsClass] = []
- response = self.g_session.post(
+ response = self._session.post(
url, data=json.dumps(payload), headers=headers
)
if response.status_code != 200:
@@ -135,7 +133,7 @@ def list_all_entity_urns(
"Content-Type": "application/json",
}
try:
- response = self.g_session.post(
+ response = self._session.post(
url, data=json.dumps(payload), headers=headers
)
if response.status_code != 200:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
new file mode 100644
index 00000000000000..87331addf1ba70
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
@@ -0,0 +1,516 @@
+from functools import lru_cache
+from typing import Dict, Iterable, Optional
+
+import dateutil.parser as dp
+import requests
+from pydantic import validator
+from requests.models import HTTPError
+from sqllineage.runner import LineageRunner
+
+import datahub.emitter.mce_builder as builder
+from datahub.configuration.common import ConfigModel
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.source import Source, SourceReport
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.metadata.com.linkedin.pegasus2avro.common import (
+ AuditStamp,
+ ChangeAuditStamps,
+)
+from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
+ ChartSnapshot,
+ DashboardSnapshot,
+)
+from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
+from datahub.metadata.schema_classes import (
+ ChartInfoClass,
+ ChartQueryClass,
+ ChartQueryTypeClass,
+ ChartTypeClass,
+ DashboardInfoClass,
+ OwnerClass,
+ OwnershipClass,
+ OwnershipTypeClass,
+)
+from datahub.utilities import config_clean
+
+
+class MetabaseConfig(ConfigModel):
+ # See the Metabase /api/session endpoint for details
+ # https://www.metabase.com/docs/latest/api-documentation.html#post-apisession
+ connect_uri: str = "localhost:3000"
+ username: Optional[str] = None
+ password: Optional[str] = None
+ database_alias_map: Optional[dict] = None
+ default_schema: str = "public"
+ env: str = builder.DEFAULT_ENV
+
+ @validator("connect_uri")
+ def remove_trailing_slash(cls, v):
+ return config_clean.remove_trailing_slashes(v)
+
+
+class MetabaseSource(Source):
+ config: MetabaseConfig
+ report: SourceReport
+ platform = "metabase"
+
+ def __hash__(self):
+ return id(self)
+
+ def __init__(self, ctx: PipelineContext, config: MetabaseConfig):
+ super().__init__(ctx)
+ self.config = config
+ self.report = SourceReport()
+
+ login_response = requests.post(
+ f"{self.config.connect_uri}/api/session",
+ None,
+ {
+ "username": self.config.username,
+ "password": self.config.password,
+ },
+ )
+
+ login_response.raise_for_status()
+ self.access_token = login_response.json().get("id", "")
+
+ self.session = requests.session()
+ self.session.headers.update(
+ {
+ "X-Metabase-Session": f"{self.access_token}",
+ "Content-Type": "application/json",
+ "Accept": "*/*",
+ }
+ )
+
+ # Test the connection
+ try:
+ test_response = self.session.get(
+ f"{self.config.connect_uri}/api/user/current"
+ )
+ test_response.raise_for_status()
+ except HTTPError as e:
+ self.report.report_failure(
+ key="metabase-session",
+ reason=f"Unable to retrieve user {self.config.username} information. %s"
+ % str(e),
+ )
+
+ def close(self) -> None:
+ response = requests.delete(
+ f"{self.config.connect_uri}/api/session",
+ headers={"X-Metabase-Session": self.access_token},
+ )
+ if response.status_code not in (200, 204):
+ self.report.report_failure(
+ key="metabase-session",
+ reason=f"Unable to logout for user {self.config.username}",
+ )
+
+ def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]:
+ try:
+ dashboard_response = self.session.get(
+ f"{self.config.connect_uri}/api/dashboard"
+ )
+ dashboard_response.raise_for_status()
+ dashboards = dashboard_response.json()
+
+ for dashboard_info in dashboards:
+ dashboard_snapshot = self.construct_dashboard_from_api_data(
+ dashboard_info
+ )
+ if dashboard_snapshot is not None:
+ mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
+ wu = MetadataWorkUnit(id=dashboard_snapshot.urn, mce=mce)
+ self.report.report_workunit(wu)
+ yield wu
+
+ except HTTPError as http_error:
+ self.report.report_failure(
+ key="metabase-dashboard",
+ reason=f"Unable to retrieve dashboards. " f"Reason: {str(http_error)}",
+ )
+
+ def construct_dashboard_from_api_data(
+ self, dashboard_info: dict
+ ) -> Optional[DashboardSnapshot]:
+
+ dashboard_id = dashboard_info.get("id", "")
+ dashboard_url = f"{self.config.connect_uri}/api/dashboard/{dashboard_id}"
+ try:
+ dashboard_response = self.session.get(dashboard_url)
+ dashboard_response.raise_for_status()
+ dashboard_details = dashboard_response.json()
+ except HTTPError as http_error:
+ self.report.report_failure(
+ key=f"metabase-dashboard-{dashboard_id}",
+ reason=f"Unable to retrieve dashboard. " f"Reason: {str(http_error)}",
+ )
+ return None
+
+ dashboard_urn = builder.make_dashboard_urn(
+ self.platform, dashboard_details.get("id", "")
+ )
+ dashboard_snapshot = DashboardSnapshot(
+ urn=dashboard_urn,
+ aspects=[],
+ )
+ last_edit_by = dashboard_details.get("last-edit-info") or {}
+ modified_actor = builder.make_user_urn(last_edit_by.get("email", "unknown"))
+ modified_ts = int(
+ dp.parse(f"{last_edit_by.get('timestamp', 'now')}").timestamp() * 1000
+ )
+ title = dashboard_details.get("name", "") or ""
+ description = dashboard_details.get("description", "") or ""
+ last_modified = ChangeAuditStamps(
+ created=AuditStamp(time=modified_ts, actor=modified_actor),
+ lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
+ )
+
+ chart_urns = []
+ cards_data = dashboard_details.get("ordered_cards", "{}")
+ for card_info in cards_data:
+ chart_urn = builder.make_chart_urn(self.platform, card_info.get("id", ""))
+ chart_urns.append(chart_urn)
+
+ dashboard_info_class = DashboardInfoClass(
+ description=description,
+ title=title,
+ charts=chart_urns,
+ lastModified=last_modified,
+ dashboardUrl=f"{self.config.connect_uri}/dashboard/{dashboard_id}",
+ customProperties={},
+ )
+ dashboard_snapshot.aspects.append(dashboard_info_class)
+
+ # Ownership
+ ownership = self._get_ownership(dashboard_details.get("creator_id", ""))
+ if ownership is not None:
+ dashboard_snapshot.aspects.append(ownership)
+
+ return dashboard_snapshot
+
+ @lru_cache(maxsize=None)
+ def _get_ownership(self, creator_id: int) -> Optional[OwnershipClass]:
+ user_info_url = f"{self.config.connect_uri}/api/user/{creator_id}"
+ try:
+ user_info_response = self.session.get(user_info_url)
+ user_info_response.raise_for_status()
+ user_details = user_info_response.json()
+ except HTTPError as http_error:
+ self.report.report_failure(
+ key=f"metabase-user-{creator_id}",
+ reason=f"Unable to retrieve User info. " f"Reason: {str(http_error)}",
+ )
+ return None
+
+ owner_urn = builder.make_user_urn(user_details.get("email", ""))
+ if owner_urn is not None:
+ ownership: OwnershipClass = OwnershipClass(
+ owners=[
+ OwnerClass(
+ owner=owner_urn,
+ type=OwnershipTypeClass.DATAOWNER,
+ )
+ ]
+ )
+ return ownership
+
+ return None
+
+ def emit_card_mces(self) -> Iterable[MetadataWorkUnit]:
+ try:
+ card_response = self.session.get(f"{self.config.connect_uri}/api/card")
+ card_response.raise_for_status()
+ cards = card_response.json()
+
+ for card_info in cards:
+ chart_snapshot = self.construct_card_from_api_data(card_info)
+ if chart_snapshot is not None:
+ mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot)
+ wu = MetadataWorkUnit(id=chart_snapshot.urn, mce=mce)
+ self.report.report_workunit(wu)
+ yield wu
+
+ except HTTPError as http_error:
+ self.report.report_failure(
+ key="metabase-cards",
+ reason=f"Unable to retrieve cards. " f"Reason: {str(http_error)}",
+ )
+ return None
+
+ def construct_card_from_api_data(self, card_data: dict) -> Optional[ChartSnapshot]:
+ card_id = card_data.get("id", "")
+ card_url = f"{self.config.connect_uri}/api/card/{card_id}"
+ try:
+ card_response = self.session.get(card_url)
+ card_response.raise_for_status()
+ card_details = card_response.json()
+ except HTTPError as http_error:
+ self.report.report_failure(
+ key=f"metabase-card-{card_id}",
+ reason=f"Unable to retrieve Card info. " f"Reason: {str(http_error)}",
+ )
+ return None
+
+ chart_urn = builder.make_chart_urn(self.platform, card_id)
+ chart_snapshot = ChartSnapshot(
+ urn=chart_urn,
+ aspects=[],
+ )
+
+ last_edit_by = card_details.get("last-edit-info") or {}
+ modified_actor = builder.make_user_urn(last_edit_by.get("email", "unknown"))
+ modified_ts = int(
+ dp.parse(f"{last_edit_by.get('timestamp', 'now')}").timestamp() * 1000
+ )
+ last_modified = ChangeAuditStamps(
+ created=AuditStamp(time=modified_ts, actor=modified_actor),
+ lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
+ )
+
+ chart_type = self._get_chart_type(
+ card_details.get("id", ""), card_details.get("display")
+ )
+ description = card_details.get("description") or ""
+ title = card_details.get("name") or ""
+ datasource_urn = self.get_datasource_urn(card_details)
+ custom_properties = self.construct_card_custom_properties(card_details)
+
+ chart_info = ChartInfoClass(
+ type=chart_type,
+ description=description,
+ title=title,
+ lastModified=last_modified,
+ chartUrl=f"{self.config.connect_uri}/card/{card_id}",
+ inputs=datasource_urn,
+ customProperties=custom_properties,
+ )
+ chart_snapshot.aspects.append(chart_info)
+
+ if card_details.get("query_type", "") == "native":
+ raw_query = (
+ card_details.get("dataset_query", {}).get("native", {}).get("query", "")
+ )
+ chart_query_native = ChartQueryClass(
+ rawQuery=raw_query,
+ type=ChartQueryTypeClass.SQL,
+ )
+ chart_snapshot.aspects.append(chart_query_native)
+
+ # Ownership
+ ownership = self._get_ownership(card_details.get("creator_id", ""))
+ if ownership is not None:
+ chart_snapshot.aspects.append(ownership)
+
+ return chart_snapshot
+
+ def _get_chart_type(self, card_id: int, display_type: str) -> Optional[str]:
+ type_mapping = {
+ "table": ChartTypeClass.TABLE,
+ "bar": ChartTypeClass.BAR,
+ "line": ChartTypeClass.LINE,
+ "row": ChartTypeClass.BAR,
+ "area": ChartTypeClass.AREA,
+ "pie": ChartTypeClass.PIE,
+ "funnel": ChartTypeClass.BAR,
+ "scatter": ChartTypeClass.SCATTER,
+ "scalar": ChartTypeClass.TEXT,
+ "smartscalar": ChartTypeClass.TEXT,
+ "pivot": ChartTypeClass.TABLE,
+ "waterfall": ChartTypeClass.BAR,
+ "progress": None,
+ "combo": None,
+ "gauge": None,
+ "map": None,
+ }
+ if not display_type:
+ self.report.report_warning(
+ key=f"metabase-card-{card_id}",
+ reason=f"Card type {display_type} is missing. Setting to None",
+ )
+ return None
+ try:
+ chart_type = type_mapping[display_type]
+ except KeyError:
+ self.report.report_warning(
+ key=f"metabase-card-{card_id}",
+ reason=f"Chart type {display_type} not supported. Setting to None",
+ )
+ chart_type = None
+
+ return chart_type
+
+ def construct_card_custom_properties(self, card_details: dict) -> Dict:
+ result_metadata = card_details.get("result_metadata", [])
+ metrics, dimensions = [], []
+ for meta_data in result_metadata:
+ display_name = meta_data.get("display_name", "") or ""
+ metrics.append(display_name) if "aggregation" in meta_data.get(
+ "field_ref", ""
+ ) else dimensions.append(display_name)
+
+ filters = (card_details.get("dataset_query", {}).get("query", {})).get(
+ "filter", []
+ )
+
+ custom_properties = {
+ "Metrics": ", ".join(metrics),
+ "Filters": f"{filters}" if len(filters) else "",
+ "Dimensions": ", ".join(dimensions),
+ }
+
+ return custom_properties
+
+ def get_datasource_urn(self, card_details):
+ platform, database_name = self.get_datasource_from_id(
+ card_details.get("database_id", "")
+ )
+ query_type = card_details.get("dataset_query", {}).get("type", {})
+ source_paths = set()
+
+ if query_type == "query":
+ source_table_id = (
+ card_details.get("dataset_query", {})
+ .get("query", {})
+ .get("source-table", {})
+ )
+ schema_name, table_name = self.get_source_table_from_id(source_table_id)
+ if table_name:
+ source_paths.add(
+ f"{schema_name + '.' if schema_name else ''}{table_name}"
+ )
+ else:
+ try:
+ raw_query = (
+ card_details.get("dataset_query", {})
+ .get("native", {})
+ .get("query", "")
+ )
+ parser = LineageRunner(raw_query)
+
+ for table in parser.source_tables:
+ sources = str(table).split(".")
+ source_schema, source_table = sources[-2], sources[-1]
+ if source_schema == "":
+ source_schema = str(self.config.default_schema)
+
+ source_paths.add(f"{source_schema}.{source_table}")
+ except Exception as e:
+ self.report.report_failure(
+ key="metabase-query",
+ reason=f"Unable to retrieve lineage from query. "
+ f"Query: {raw_query} "
+ f"Reason: {str(e)} ",
+ )
+ return None
+
+ # Create dataset URNs
+ dataset_urn = []
+ dbname = f"{database_name + '.' if database_name else ''}"
+ source_tables = list(map(lambda tbl: f"{dbname}{tbl}", source_paths))
+ dataset_urn = [
+ builder.make_dataset_urn(platform, name, self.config.env)
+ for name in source_tables
+ ]
+
+ return dataset_urn
+
+ @lru_cache(maxsize=None)
+ def get_source_table_from_id(self, table_id):
+ try:
+ dataset_response = self.session.get(
+ f"{self.config.connect_uri}/api/table/{table_id}"
+ )
+ dataset_response.raise_for_status()
+ dataset_json = dataset_response.json()
+ schema = dataset_json.get("schema", "")
+ name = dataset_json.get("name", "")
+ return schema, name
+
+ except HTTPError as http_error:
+ self.report.report_failure(
+ key=f"metabase-table-{table_id}",
+ reason=f"Unable to retrieve source table. "
+ f"Reason: {str(http_error)}",
+ )
+
+ return None, None
+
+ @lru_cache(maxsize=None)
+ def get_datasource_from_id(self, datasource_id):
+ try:
+ dataset_response = self.session.get(
+ f"{self.config.connect_uri}/api/database/{datasource_id}"
+ )
+ dataset_response.raise_for_status()
+ dataset_json = dataset_response.json()
+ except HTTPError as http_error:
+ self.report.report_failure(
+ key=f"metabase-datasource-{datasource_id}",
+ reason=f"Unable to retrieve Datasource. " f"Reason: {str(http_error)}",
+ )
+ return None, None
+
+ # Map engine names to what datahub expects in
+ # https://github.com/linkedin/datahub/blob/master/metadata-service/war/src/main/resources/boot/data_platforms.json
+ engine = dataset_json.get("engine", "")
+ platform = engine
+
+ engine_mapping = {
+ "sparksql": "spark",
+ "mongo": "mongodb",
+ "presto-jdbc": "presto",
+ "sqlserver": "mssql",
+ "bigquery-cloud-sdk": "bigquery",
+ }
+ if engine in engine_mapping:
+ platform = engine_mapping[engine]
+ else:
+ self.report.report_warning(
+ key=f"metabase-platform-{datasource_id}",
+ reason=f"Platform was not found in DataHub. Using {platform} name as is",
+ )
+
+ field_for_dbname_mapping = {
+ "postgres": "dbname",
+ "sparksql": "dbname",
+ "mongo": "dbname",
+ "redshift": "db",
+ "snowflake": "db",
+ "presto-jdbc": "catalog",
+ "presto": "catalog",
+ "mysql": "dbname",
+ "sqlserver": "db",
+ }
+
+ dbname = (
+ dataset_json.get("details", {}).get(field_for_dbname_mapping[engine])
+ if engine in field_for_dbname_mapping
+ else None
+ )
+
+ if (
+ self.config.database_alias_map is not None
+ and platform in self.config.database_alias_map
+ ):
+ dbname = self.config.database_alias_map[platform]
+ else:
+ self.report.report_warning(
+ key=f"metabase-dbname-{datasource_id}",
+ reason=f"Cannot determine database name for platform: {platform}",
+ )
+
+ return platform, dbname
+
+ @classmethod
+ def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
+ config = MetabaseConfig.parse_obj(config_dict)
+ return cls(ctx, config)
+
+ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
+ yield from self.emit_dashboard_mces()
+ yield from self.emit_card_mces()
+
+ def get_report(self) -> SourceReport:
+ return self.report
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py
index 692eaba595c9a8..93cc1ce281ae2c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py
@@ -203,6 +203,8 @@ def _get_upstream_lineage_info(
for lineage_entry in lineage:
# Update the table-lineage
upstream_table_name = lineage_entry[0]
+ if not self._is_dataset_allowed(upstream_table_name):
+ continue
upstream_table = UpstreamClass(
dataset=builder.make_dataset_urn(
self.platform, upstream_table_name, self.config.env
@@ -229,8 +231,9 @@ def _get_upstream_lineage_info(
)
column_lineage[column_lineage_key] = column_lineage_value
logger.debug(f"{column_lineage_key}:{column_lineage_value}")
-
- return UpstreamLineage(upstreams=upstream_tables), column_lineage
+ if upstream_tables:
+ return UpstreamLineage(upstreams=upstream_tables), column_lineage
+ return None
# Override the base class method.
def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
@@ -288,3 +291,18 @@ def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
# Emit the work unit from super.
yield wu
+
+ def _is_dataset_allowed(self, dataset_name: Optional[str]) -> bool:
+ # View lineages is not supported. Add the allow/deny pattern for that when it is supported.
+ if dataset_name is None:
+ return True
+ dataset_params = dataset_name.split(".")
+ if len(dataset_params) != 3:
+ return True
+ if (
+ not self.config.database_pattern.allowed(dataset_params[0])
+ or not self.config.schema_pattern.allowed(dataset_params[1])
+ or not self.config.table_pattern.allowed(dataset_params[2])
+ ):
+ return False
+ return True
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index 28af83929cd7e1..470e28a8a163f3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -123,6 +123,7 @@ def make_sqlalchemy_uri(
class SQLSourceReport(SourceReport):
tables_scanned: int = 0
views_scanned: int = 0
+ entities_profiled: int = 0
filtered: List[str] = field(default_factory=list)
query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None
@@ -138,6 +139,9 @@ def report_entity_scanned(self, name: str, ent_type: str = "table") -> None:
else:
raise KeyError(f"Unknown entity {ent_type}.")
+ def report_entity_profiled(self, name: str) -> None:
+ self.entities_profiled += 1
+
def report_dropped(self, ent_name: str) -> None:
self.filtered.append(ent_name)
@@ -642,12 +646,12 @@ def loop_profiler_requests(
dataset_name = self.get_identifier(
schema=schema, entity=table, inspector=inspector
)
- self.report.report_entity_scanned(f"profile of {dataset_name}")
if not sql_config.profile_pattern.allowed(dataset_name):
self.report.report_dropped(f"profile of {dataset_name}")
continue
+ self.report.report_entity_profiled(dataset_name)
yield GEProfilerRequest(
pretty_name=dataset_name,
batch_kwargs=self.prepare_profiler_args(schema=schema, table=table),
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py
index 6c8fd051c2c352..2c616b879802c3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py
@@ -12,6 +12,7 @@
from sqlalchemy.engine import Engine
import datahub.emitter.mce_builder as builder
+from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.time_window_config import get_time_bucket
from datahub.ingestion.api.source import Source, SourceReport
from datahub.ingestion.api.workunit import MetadataWorkUnit
@@ -33,7 +34,7 @@
query_history.query_text,
query_history.query_type,
access_history.base_objects_accessed,
- -- access_history.direct_objects_accessed, -- might be useful in the future
+ access_history.direct_objects_accessed, -- when dealing with views, direct objects will show the view while base will show the underlying table
-- query_history.execution_status, -- not really necessary, but should equal "SUCCESS"
-- query_history.warehouse_name,
access_history.user_name,
@@ -82,6 +83,7 @@ class SnowflakeJoinedAccessEvent(PermissiveModel):
query_text: str
query_type: str
base_objects_accessed: List[SnowflakeObjectAccessEntry]
+ direct_objects_accessed: List[SnowflakeObjectAccessEntry]
user_name: str
first_name: Optional[str]
@@ -94,6 +96,13 @@ class SnowflakeJoinedAccessEvent(PermissiveModel):
class SnowflakeUsageConfig(BaseSnowflakeConfig, BaseUsageConfig):
env: str = builder.DEFAULT_ENV
options: dict = {}
+ database_pattern: AllowDenyPattern = AllowDenyPattern(
+ deny=[r"^UTIL_DB$", r"^SNOWFLAKE$", r"^SNOWFLAKE_SAMPLE_DATA$"]
+ )
+ schema_pattern: AllowDenyPattern = AllowDenyPattern.allow_all()
+ table_pattern: AllowDenyPattern = AllowDenyPattern.allow_all()
+ view_pattern: AllowDenyPattern = AllowDenyPattern.allow_all()
+ apply_view_usage_to_tables: bool = False
@pydantic.validator("role", always=True)
def role_accountadmin(cls, v):
@@ -161,14 +170,61 @@ def _get_snowflake_history(self) -> Iterable[SnowflakeJoinedAccessEvent]:
if event_dict["query_text"] is None:
continue
- def is_unsupported_base_object_accessed(obj: Dict[str, Any]) -> bool:
+ def is_unsupported_object_accessed(obj: Dict[str, Any]) -> bool:
unsupported_keys = ["locations"]
return any([obj.get(key) is not None for key in unsupported_keys])
+ def is_dataset_pattern_allowed(
+ dataset_name: Optional[Any], dataset_type: Optional[Any]
+ ) -> bool:
+ # TODO: support table/view patterns for usage logs by pulling that information as well from the usage query
+ if not dataset_type or not dataset_name:
+ return True
+
+ table_or_view_pattern: Optional[
+ AllowDenyPattern
+ ] = AllowDenyPattern.allow_all()
+ # Test domain type = external_table and then add it
+ table_or_view_pattern = (
+ self.config.table_pattern
+ if dataset_type.lower() in {"table"}
+ else (
+ self.config.view_pattern
+ if dataset_type.lower() in {"view", "materialized_view"}
+ else None
+ )
+ )
+ if table_or_view_pattern is None:
+ return True
+
+ dataset_params = dataset_name.split(".")
+ assert len(dataset_params) == 3
+ if (
+ not self.config.database_pattern.allowed(dataset_params[0])
+ or not self.config.schema_pattern.allowed(dataset_params[1])
+ or not table_or_view_pattern.allowed(dataset_params[2])
+ ):
+ return False
+ return True
+
+ def is_object_valid(obj: Dict[str, Any]) -> bool:
+ if is_unsupported_object_accessed(
+ obj
+ ) or not is_dataset_pattern_allowed(
+ obj.get("objectName"), obj.get("objectDomain")
+ ):
+ return False
+ return True
+
event_dict["base_objects_accessed"] = [
obj
for obj in json.loads(event_dict["base_objects_accessed"])
- if not is_unsupported_base_object_accessed(obj)
+ if is_object_valid(obj)
+ ]
+ event_dict["direct_objects_accessed"] = [
+ obj
+ for obj in json.loads(event_dict["direct_objects_accessed"])
+ if is_object_valid(obj)
]
event_dict["query_start_time"] = (
event_dict["query_start_time"]
@@ -195,9 +251,13 @@ def _aggregate_access_events(
event.query_start_time, self.config.bucket_duration
)
- for object in event.base_objects_accessed:
+ accessed_data = (
+ event.base_objects_accessed
+ if self.config.apply_view_usage_to_tables
+ else event.direct_objects_accessed
+ )
+ for object in accessed_data:
resource = object.objectName
-
agg_bucket = datasets[floored_ts].setdefault(
resource,
AggregatedDataset(bucket_start_time=floored_ts, resource=resource),
diff --git a/metadata-ingestion/src/datahub/integrations/airflow/__init__.py b/metadata-ingestion/src/datahub/integrations/airflow/__init__.py
deleted file mode 100644
index a6cb3c58d24ee5..00000000000000
--- a/metadata-ingestion/src/datahub/integrations/airflow/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import warnings
-
-try:
- from datahub_provider.lineage.datahub import (
- DatahubLineageBackend as DatahubAirflowLineageBackend,
- )
-except ModuleNotFoundError:
- # Compat for older versions of Airflow.
- pass
-
-warnings.warn(
- "importing from datahub.integrations.airflow.* is deprecated; "
- "use datahub_provider.{hooks,operators,lineage}.datahub instead"
-)
diff --git a/metadata-ingestion/src/datahub/integrations/airflow/entities.py b/metadata-ingestion/src/datahub/integrations/airflow/entities.py
deleted file mode 100644
index b9471c24f02275..00000000000000
--- a/metadata-ingestion/src/datahub/integrations/airflow/entities.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import warnings
-
-from datahub_provider.entities import Dataset, _Entity # noqa: F401
-
-warnings.warn(
- "importing from datahub.integrations.airflow.* is deprecated; "
- "use datahub_provider.{hooks,operators,lineage}.datahub instead"
-)
diff --git a/metadata-ingestion/src/datahub/integrations/airflow/hooks.py b/metadata-ingestion/src/datahub/integrations/airflow/hooks.py
deleted file mode 100644
index 31008b2d6bc07c..00000000000000
--- a/metadata-ingestion/src/datahub/integrations/airflow/hooks.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import warnings
-
-from datahub_provider.hooks.datahub import ( # noqa: F401
- DatahubGenericHook,
- DatahubKafkaHook,
- DatahubRestHook,
-)
-
-warnings.warn(
- "importing from datahub.integrations.airflow.* is deprecated; "
- "use datahub_provider.{hooks,operators,lineage}.datahub instead"
-)
diff --git a/metadata-ingestion/src/datahub/integrations/airflow/operators.py b/metadata-ingestion/src/datahub/integrations/airflow/operators.py
deleted file mode 100644
index f889dca581d34a..00000000000000
--- a/metadata-ingestion/src/datahub/integrations/airflow/operators.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import warnings
-
-from datahub_provider.operators.datahub import ( # noqa: F401
- DatahubBaseOperator,
- DatahubEmitterOperator,
-)
-
-warnings.warn(
- "importing from datahub.integrations.airflow.* is deprecated; "
- "use datahub_provider.{hooks,operators,lineage}.datahub instead"
-)
diff --git a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json
new file mode 100644
index 00000000000000..127988ba381d7b
--- /dev/null
+++ b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json
@@ -0,0 +1,206 @@
+[
+{
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": {
+ "urn": "urn:li:dashboard:(metabase,1)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.dashboard.DashboardInfo": {
+ "customProperties": {},
+ "externalUrl": null,
+ "title": "Dashboard 1",
+ "description": "",
+ "charts": [
+ "urn:li:chart:(metabase,1)",
+ "urn:li:chart:(metabase,2)"
+ ],
+ "lastModified": {
+ "created": {
+ "time": 1639417721742,
+ "actor": "urn:li:corpuser:admin@metabase.com",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 1639417721742,
+ "actor": "urn:li:corpuser:admin@metabase.com",
+ "impersonator": null
+ },
+ "deleted": null
+ },
+ "dashboardUrl": "http://localhost:3000/dashboard/1",
+ "access": null,
+ "lastRefreshed": null
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:admin@metabase.com",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown",
+ "impersonator": null
+ }
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null,
+ "systemMetadata": {
+ "lastObserved": 1636614000000,
+ "runId": "metabase-test",
+ "registryName": null,
+ "registryVersion": null,
+ "properties": null
+ }
+},
+{
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
+ "urn": "urn:li:chart:(metabase,1)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.chart.ChartInfo": {
+ "customProperties": {
+ "Metrics": "",
+ "Filters": "",
+ "Dimensions": "customer_id, first_name, last_name, amount, payment_date, rental_id"
+ },
+ "externalUrl": null,
+ "title": "Customer Payment",
+ "description": "",
+ "lastModified": {
+ "created": {
+ "time": 1639417592792,
+ "actor": "urn:li:corpuser:admin@metabase.com",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 1639417592792,
+ "actor": "urn:li:corpuser:admin@metabase.com",
+ "impersonator": null
+ },
+ "deleted": null
+ },
+ "chartUrl": "http://localhost:3000/card/1",
+ "inputs": [
+ {
+ "string": "urn:li:dataset:(urn:li:dataPlatform:postgres,dvdrental.public.payment,PROD)"
+ }
+ ],
+ "type": "TABLE",
+ "access": null,
+ "lastRefreshed": null
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.chart.ChartQuery": {
+ "rawQuery": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date",
+ "type": "SQL"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:admin@metabase.com",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown",
+ "impersonator": null
+ }
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null,
+ "systemMetadata": {
+ "lastObserved": 1636614000000,
+ "runId": "metabase-test",
+ "registryName": null,
+ "registryVersion": null,
+ "properties": null
+ }
+},
+{
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
+ "urn": "urn:li:chart:(metabase,2)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.chart.ChartInfo": {
+ "customProperties": {
+ "Metrics": "Count",
+ "Filters": "['=', ['field', 136, None], 2006]",
+ "Dimensions": "Rating"
+ },
+ "externalUrl": null,
+ "title": "Films, Count, Grouped by Rating, Filtered by Release Year, Sorted by [Unknown Field] descending",
+ "description": "",
+ "lastModified": {
+ "created": {
+ "time": 1639417717110,
+ "actor": "urn:li:corpuser:admin@metabase.com",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 1639417717110,
+ "actor": "urn:li:corpuser:admin@metabase.com",
+ "impersonator": null
+ },
+ "deleted": null
+ },
+ "chartUrl": "http://localhost:3000/card/2",
+ "inputs": [
+ {
+ "string": "urn:li:dataset:(urn:li:dataPlatform:postgres,dvdrental.public.film,PROD)"
+ }
+ ],
+ "type": "BAR",
+ "access": null,
+ "lastRefreshed": null
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:admin@metabase.com",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown",
+ "impersonator": null
+ }
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null,
+ "systemMetadata": {
+ "lastObserved": 1636614000000,
+ "runId": "metabase-test",
+ "registryName": null,
+ "registryVersion": null,
+ "properties": null
+ }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/metabase/metabase_to_file.yml b/metadata-ingestion/tests/integration/metabase/metabase_to_file.yml
new file mode 100644
index 00000000000000..3a5984d7ad36b5
--- /dev/null
+++ b/metadata-ingestion/tests/integration/metabase/metabase_to_file.yml
@@ -0,0 +1,15 @@
+run_id: metabase-test
+
+source:
+ type: "metabase"
+ config:
+ username: admin@metabase.com
+ password: admin12345
+ connect_uri: http://localhost:3000/
+ database_alias_map:
+ h2: sample-dataset.db
+
+sink:
+ type: file
+ config:
+ filename: "./metabase_mces.json"
diff --git a/metadata-ingestion/tests/integration/metabase/setup/card.json b/metadata-ingestion/tests/integration/metabase/setup/card.json
new file mode 100644
index 00000000000000..439edbf60014fa
--- /dev/null
+++ b/metadata-ingestion/tests/integration/metabase/setup/card.json
@@ -0,0 +1,307 @@
+[{
+ "description": null,
+ "archived": false,
+ "collection_position": null,
+ "table_id": null,
+ "result_metadata": [{
+ "name": "customer_id",
+ "display_name": "customer_id",
+ "base_type": "type/Integer",
+ "effective_type": "type/Integer",
+ "field_ref": ["field", "customer_id", {
+ "base-type": "type/Integer"
+ }],
+ "semantic_type": null,
+ "fingerprint": {
+ "global": {
+ "distinct-count": 517,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Number": {
+ "min": 1.0,
+ "q1": 127.95550051624855,
+ "q3": 457.48181481488376,
+ "max": 599.0,
+ "sd": 183.35453319901166,
+ "avg": 293.316
+ }
+ }
+ }
+ }, {
+ "name": "first_name",
+ "display_name": "first_name",
+ "base_type": "type/Text",
+ "effective_type": "type/Text",
+ "field_ref": ["field", "first_name", {
+ "base-type": "type/Text"
+ }],
+ "semantic_type": "type/Name",
+ "fingerprint": {
+ "global": {
+ "distinct-count": 509,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Text": {
+ "percent-json": 0.0,
+ "percent-url": 0.0,
+ "percent-email": 0.0,
+ "percent-state": 0.0035,
+ "average-length": 5.629
+ }
+ }
+ }
+ }, {
+ "name": "last_name",
+ "display_name": "last_name",
+ "base_type": "type/Text",
+ "effective_type": "type/Text",
+ "field_ref": ["field", "last_name", {
+ "base-type": "type/Text"
+ }],
+ "semantic_type": "type/Name",
+ "fingerprint": {
+ "global": {
+ "distinct-count": 517,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Text": {
+ "percent-json": 0.0,
+ "percent-url": 0.0,
+ "percent-email": 0.0,
+ "percent-state": 0.0015,
+ "average-length": 6.126
+ }
+ }
+ }
+ }, {
+ "name": "amount",
+ "display_name": "amount",
+ "base_type": "type/Decimal",
+ "effective_type": "type/Decimal",
+ "field_ref": ["field", "amount", {
+ "base-type": "type/Decimal"
+ }],
+ "semantic_type": null,
+ "fingerprint": {
+ "global": {
+ "distinct-count": 11,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Number": {
+ "min": 0.99,
+ "q1": 2.399411317392306,
+ "q3": 5.52734176879965,
+ "max": 10.99,
+ "sd": 2.352151368009511,
+ "avg": 4.1405
+ }
+ }
+ }
+ }, {
+ "name": "payment_date",
+ "display_name": "payment_date",
+ "base_type": "type/DateTime",
+ "effective_type": "type/DateTime",
+ "field_ref": ["field", "payment_date", {
+ "base-type": "type/DateTime"
+ }],
+ "semantic_type": null,
+ "fingerprint": {
+ "global": {
+ "distinct-count": 1998,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/DateTime": {
+ "earliest": "2007-02-14T21:21:59.996577Z",
+ "latest": "2007-02-21T19:27:46.996577Z"
+ }
+ }
+ }
+ }, {
+ "name": "rental_id",
+ "display_name": "rental_id",
+ "base_type": "type/Integer",
+ "effective_type": "type/Integer",
+ "field_ref": ["field", "rental_id", {
+ "base-type": "type/Integer"
+ }],
+ "semantic_type": null,
+ "fingerprint": {
+ "global": {
+ "distinct-count": 2000,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Number": {
+ "min": 1158.0,
+ "q1": 1731.7967120913397,
+ "q3": 2871.359273326854,
+ "max": 4591.0,
+ "sd": 660.7468728104022,
+ "avg": 2303.4565
+ }
+ }
+ }
+ }],
+ "creator": {
+ "email": "admin@metabase.com",
+ "first_name": "FirstName",
+ "last_login": "2021-12-13T18:51:32.999",
+ "is_qbnewb": true,
+ "is_superuser": true,
+ "id": 1,
+ "last_name": "LastName",
+ "date_joined": "2021-12-13T07:34:21.806",
+ "common_name": "FirstName LastName"
+ },
+ "database_id": 2,
+ "enable_embedding": false,
+ "collection_id": null,
+ "query_type": "native",
+ "name": "Customer Payment",
+ "creator_id": 1,
+ "updated_at": "2021-12-13T17:48:40.478",
+ "made_public_by_id": null,
+ "embedding_params": null,
+ "cache_ttl": null,
+ "dataset_query": {
+ "type": "native",
+ "native": {
+ "query": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date",
+ "template-tags": {}
+ },
+ "database": 2
+ },
+ "id": 1,
+ "display": "table",
+ "last-edit-info": {
+ "id": 1,
+ "email": "admin@metabase.com",
+ "first_name": "FirstName",
+ "last_name": "LastName",
+ "timestamp": "2021-12-13T17:46:32.792"
+ },
+ "visualization_settings": {
+ "table.pivot_column": "amount",
+ "table.cell_column": "customer_id"
+ },
+ "collection": null,
+ "favorite": false,
+ "created_at": "2021-12-13T17:46:32.77",
+ "public_uuid": null
+}, {
+ "description": null,
+ "archived": false,
+ "collection_position": null,
+ "table_id": 21,
+ "result_metadata": [{
+ "semantic_type": "type/Category",
+ "coercion_strategy": null,
+ "name": "rating",
+ "field_ref": ["field", 131, null],
+ "effective_type": "type/*",
+ "id": 131,
+ "display_name": "Rating",
+ "fingerprint": {
+ "global": {
+ "distinct-count": 5,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Text": {
+ "percent-json": 0.0,
+ "percent-url": 0.0,
+ "percent-email": 0.0,
+ "percent-state": 0.0,
+ "average-length": 2.926
+ }
+ }
+ },
+ "base_type": "type/PostgresEnum"
+ }, {
+ "name": "count",
+ "display_name": "Count",
+ "base_type": "type/BigInteger",
+ "effective_type": "type/BigInteger",
+ "semantic_type": "type/Quantity",
+ "field_ref": ["aggregation", 0],
+ "fingerprint": {
+ "global": {
+ "distinct-count": 5,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Number": {
+ "min": 178.0,
+ "q1": 190.0,
+ "q3": 213.25,
+ "max": 223.0,
+ "sd": 17.131841699011815,
+ "avg": 200.0
+ }
+ }
+ }
+ }],
+ "creator": {
+ "email": "admin@metabase.com",
+ "first_name": "FirstName",
+ "last_login": "2021-12-13T18:51:32.999",
+ "is_qbnewb": true,
+ "is_superuser": true,
+ "id": 1,
+ "last_name": "LastName",
+ "date_joined": "2021-12-13T07:34:21.806",
+ "common_name": "FirstName LastName"
+ },
+ "database_id": 2,
+ "enable_embedding": false,
+ "collection_id": null,
+ "query_type": "query",
+ "name": "Films, Count, Grouped by Rating, Filtered by Release Year, Sorted by [Unknown Field] descending",
+ "creator_id": 1,
+ "updated_at": "2021-12-13T17:48:39.999",
+ "made_public_by_id": null,
+ "embedding_params": null,
+ "cache_ttl": null,
+ "dataset_query": {
+ "query": {
+ "source-table": 21,
+ "breakout": [
+ ["field", 131, null]
+ ],
+ "aggregation": [
+ ["count"]
+ ],
+ "order-by": [
+ ["desc", ["aggregation", 0]]
+ ],
+ "filter": ["=", ["field", 136, null], 2006]
+ },
+ "database": 2,
+ "type": "query"
+ },
+ "id": 2,
+ "display": "row",
+ "last-edit-info": {
+ "id": 1,
+ "email": "admin@metabase.com",
+ "first_name": "FirstName",
+ "last_name": "LastName",
+ "timestamp": "2021-12-13T17:48:37.11"
+ },
+ "visualization_settings": {
+ "graph.series_labels": ["number"],
+ "graph.dimensions": ["rating"],
+ "graph.colors": ["#509EE3"],
+ "graph.metrics": ["count"]
+ },
+ "collection": null,
+ "favorite": false,
+ "created_at": "2021-12-13T17:48:37.102",
+ "public_uuid": null
+}]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/metabase/setup/card_1.json b/metadata-ingestion/tests/integration/metabase/setup/card_1.json
new file mode 100644
index 00000000000000..11f1adee39c660
--- /dev/null
+++ b/metadata-ingestion/tests/integration/metabase/setup/card_1.json
@@ -0,0 +1,201 @@
+{
+ "description": null,
+ "archived": false,
+ "collection_position": null,
+ "table_id": null,
+ "result_metadata": [{
+ "name": "customer_id",
+ "display_name": "customer_id",
+ "base_type": "type/Integer",
+ "effective_type": "type/Integer",
+ "field_ref": ["field", "customer_id", {
+ "base-type": "type/Integer"
+ }],
+ "semantic_type": null,
+ "fingerprint": {
+ "global": {
+ "distinct-count": 517,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Number": {
+ "min": 1.0,
+ "q1": 127.95550051624855,
+ "q3": 457.48181481488376,
+ "max": 599.0,
+ "sd": 183.35453319901166,
+ "avg": 293.316
+ }
+ }
+ }
+ }, {
+ "name": "first_name",
+ "display_name": "first_name",
+ "base_type": "type/Text",
+ "effective_type": "type/Text",
+ "field_ref": ["field", "first_name", {
+ "base-type": "type/Text"
+ }],
+ "semantic_type": "type/Name",
+ "fingerprint": {
+ "global": {
+ "distinct-count": 509,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Text": {
+ "percent-json": 0.0,
+ "percent-url": 0.0,
+ "percent-email": 0.0,
+ "percent-state": 0.0035,
+ "average-length": 5.629
+ }
+ }
+ }
+ }, {
+ "name": "last_name",
+ "display_name": "last_name",
+ "base_type": "type/Text",
+ "effective_type": "type/Text",
+ "field_ref": ["field", "last_name", {
+ "base-type": "type/Text"
+ }],
+ "semantic_type": "type/Name",
+ "fingerprint": {
+ "global": {
+ "distinct-count": 517,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Text": {
+ "percent-json": 0.0,
+ "percent-url": 0.0,
+ "percent-email": 0.0,
+ "percent-state": 0.0015,
+ "average-length": 6.126
+ }
+ }
+ }
+ }, {
+ "name": "amount",
+ "display_name": "amount",
+ "base_type": "type/Decimal",
+ "effective_type": "type/Decimal",
+ "field_ref": ["field", "amount", {
+ "base-type": "type/Decimal"
+ }],
+ "semantic_type": null,
+ "fingerprint": {
+ "global": {
+ "distinct-count": 11,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Number": {
+ "min": 0.99,
+ "q1": 2.399411317392306,
+ "q3": 5.52734176879965,
+ "max": 10.99,
+ "sd": 2.352151368009511,
+ "avg": 4.1405
+ }
+ }
+ }
+ }, {
+ "name": "payment_date",
+ "display_name": "payment_date",
+ "base_type": "type/DateTime",
+ "effective_type": "type/DateTime",
+ "field_ref": ["field", "payment_date", {
+ "base-type": "type/DateTime"
+ }],
+ "semantic_type": null,
+ "fingerprint": {
+ "global": {
+ "distinct-count": 1998,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/DateTime": {
+ "earliest": "2007-02-14T21:21:59.996577Z",
+ "latest": "2007-02-21T19:27:46.996577Z"
+ }
+ }
+ }
+ }, {
+ "name": "rental_id",
+ "display_name": "rental_id",
+ "base_type": "type/Integer",
+ "effective_type": "type/Integer",
+ "field_ref": ["field", "rental_id", {
+ "base-type": "type/Integer"
+ }],
+ "semantic_type": null,
+ "fingerprint": {
+ "global": {
+ "distinct-count": 2000,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Number": {
+ "min": 1158.0,
+ "q1": 1731.7967120913397,
+ "q3": 2871.359273326854,
+ "max": 4591.0,
+ "sd": 660.7468728104022,
+ "avg": 2303.4565
+ }
+ }
+ }
+ }],
+ "creator": {
+ "email": "admin@metabase.com",
+ "first_name": "FirstName",
+ "last_login": "2021-12-13T22:25:45.761",
+ "is_qbnewb": true,
+ "is_superuser": true,
+ "id": 1,
+ "last_name": "LastName",
+ "date_joined": "2021-12-13T07:34:21.806",
+ "common_name": "FirstName LastName"
+ },
+ "can_write": true,
+ "database_id": 2,
+ "enable_embedding": false,
+ "collection_id": null,
+ "query_type": "native",
+ "name": "Customer Payment",
+ "last_query_start": "2021-12-13T17:48:39.802905Z",
+ "dashboard_count": 1,
+ "average_query_time": 662,
+ "creator_id": 1,
+ "moderation_reviews": [],
+ "updated_at": "2021-12-13T17:48:40.478",
+ "made_public_by_id": null,
+ "embedding_params": null,
+ "cache_ttl": null,
+ "dataset_query": {
+ "type": "native",
+ "native": {
+ "query": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date",
+ "template-tags": {}
+ },
+ "database": 2
+ },
+ "id": 1,
+ "display": "table",
+ "last-edit-info": {
+ "id": 1,
+ "email": "admin@metabase.com",
+ "first_name": "FirstName",
+ "last_name": "LastName",
+ "timestamp": "2021-12-13T17:46:32.792"
+ },
+ "visualization_settings": {
+ "table.pivot_column": "amount",
+ "table.cell_column": "customer_id"
+ },
+ "collection": null,
+ "created_at": "2021-12-13T17:46:32.77",
+ "public_uuid": null
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/metabase/setup/card_2.json b/metadata-ingestion/tests/integration/metabase/setup/card_2.json
new file mode 100644
index 00000000000000..979178cd24fb0d
--- /dev/null
+++ b/metadata-ingestion/tests/integration/metabase/setup/card_2.json
@@ -0,0 +1,115 @@
+{
+ "description": null,
+ "archived": false,
+ "collection_position": null,
+ "table_id": 21,
+ "result_metadata": [{
+ "semantic_type": "type/Category",
+ "coercion_strategy": null,
+ "name": "rating",
+ "field_ref": ["field", 131, null],
+ "effective_type": "type/*",
+ "id": 131,
+ "display_name": "Rating",
+ "fingerprint": {
+ "global": {
+ "distinct-count": 5,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Text": {
+ "percent-json": 0.0,
+ "percent-url": 0.0,
+ "percent-email": 0.0,
+ "percent-state": 0.0,
+ "average-length": 2.926
+ }
+ }
+ },
+ "base_type": "type/PostgresEnum"
+ }, {
+ "name": "count",
+ "display_name": "Count",
+ "base_type": "type/BigInteger",
+ "effective_type": "type/BigInteger",
+ "semantic_type": "type/Quantity",
+ "field_ref": ["aggregation", 0],
+ "fingerprint": {
+ "global": {
+ "distinct-count": 5,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Number": {
+ "min": 178.0,
+ "q1": 190.0,
+ "q3": 213.25,
+ "max": 223.0,
+ "sd": 17.131841699011815,
+ "avg": 200.0
+ }
+ }
+ }
+ }],
+ "creator": {
+ "email": "admin@metabase.com",
+ "first_name": "FirstName",
+ "last_login": "2021-12-13T22:24:03.913",
+ "is_qbnewb": true,
+ "is_superuser": true,
+ "id": 1,
+ "last_name": "LastName",
+ "date_joined": "2021-12-13T07:34:21.806",
+ "common_name": "FirstName LastName"
+ },
+ "can_write": true,
+ "database_id": 2,
+ "enable_embedding": false,
+ "collection_id": null,
+ "query_type": "query",
+ "name": "Films, Count, Grouped by Rating, Filtered by Release Year, Sorted by [Unknown Field] descending",
+ "last_query_start": "2021-12-13T17:48:39.93677Z",
+ "dashboard_count": 1,
+ "average_query_time": 64,
+ "creator_id": 1,
+ "moderation_reviews": [],
+ "updated_at": "2021-12-13T17:48:39.999",
+ "made_public_by_id": null,
+ "embedding_params": null,
+ "cache_ttl": null,
+ "dataset_query": {
+ "query": {
+ "source-table": 21,
+ "breakout": [
+ ["field", 131, null]
+ ],
+ "aggregation": [
+ ["count"]
+ ],
+ "order-by": [
+ ["desc", ["aggregation", 0]]
+ ],
+ "filter": ["=", ["field", 136, null], 2006]
+ },
+ "database": 2,
+ "type": "query"
+ },
+ "id": 2,
+ "display": "row",
+ "last-edit-info": {
+ "id": 1,
+ "email": "admin@metabase.com",
+ "first_name": "FirstName",
+ "last_name": "LastName",
+ "timestamp": "2021-12-13T17:48:37.11"
+ },
+ "visualization_settings": {
+ "graph.series_labels": ["number"],
+ "graph.dimensions": ["rating"],
+ "graph.colors": ["#509EE3"],
+ "graph.metrics": ["count"]
+ },
+ "collection": null,
+ "created_at": "2021-12-13T17:48:37.102",
+ "public_uuid": null
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/metabase/setup/dashboard.json b/metadata-ingestion/tests/integration/metabase/setup/dashboard.json
new file mode 100644
index 00000000000000..095abf1bbdc6d5
--- /dev/null
+++ b/metadata-ingestion/tests/integration/metabase/setup/dashboard.json
@@ -0,0 +1,40 @@
+[{
+ "description": null,
+ "archived": false,
+ "collection_position": null,
+ "creator": {
+ "email": "admin@metabase.com",
+ "first_name": "FirstName",
+ "last_login": "2021-12-13T18:51:32.999",
+ "is_qbnewb": true,
+ "is_superuser": true,
+ "id": 1,
+ "last_name": "LastName",
+ "date_joined": "2021-12-13T07:34:21.806",
+ "common_name": "FirstName LastName"
+ },
+ "enable_embedding": false,
+ "collection_id": null,
+ "show_in_getting_started": false,
+ "name": "Dashboard 1",
+ "caveats": null,
+ "creator_id": 1,
+ "updated_at": "2021-12-13T17:48:41.735",
+ "made_public_by_id": null,
+ "embedding_params": null,
+ "cache_ttl": null,
+ "id": 1,
+ "position": null,
+ "last-edit-info": {
+ "id": 1,
+ "email": "admin@metabase.com",
+ "first_name": "FirstName",
+ "last_name": "LastName",
+ "timestamp": "2021-12-13T17:48:41.742"
+ },
+ "parameters": [],
+ "favorite": false,
+ "created_at": "2021-12-13T17:46:48.185",
+ "public_uuid": null,
+ "points_of_interest": null
+}]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json b/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json
new file mode 100644
index 00000000000000..0b232cd220045c
--- /dev/null
+++ b/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json
@@ -0,0 +1,333 @@
+{
+ "description": null,
+ "archived": false,
+ "collection_position": null,
+ "ordered_cards": [{
+ "sizeX": 4,
+ "series": [],
+ "collection_authority_level": null,
+ "card": {
+ "description": null,
+ "archived": false,
+ "collection_position": null,
+ "table_id": null,
+ "result_metadata": [{
+ "name": "customer_id",
+ "display_name": "customer_id",
+ "base_type": "type/Integer",
+ "effective_type": "type/Integer",
+ "field_ref": ["field", "customer_id", {
+ "base-type": "type/Integer"
+ }],
+ "semantic_type": null,
+ "fingerprint": {
+ "global": {
+ "distinct-count": 517,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Number": {
+ "min": 1.0,
+ "q1": 127.95550051624855,
+ "q3": 457.48181481488376,
+ "max": 599.0,
+ "sd": 183.35453319901166,
+ "avg": 293.316
+ }
+ }
+ }
+ }, {
+ "name": "first_name",
+ "display_name": "first_name",
+ "base_type": "type/Text",
+ "effective_type": "type/Text",
+ "field_ref": ["field", "first_name", {
+ "base-type": "type/Text"
+ }],
+ "semantic_type": "type/Name",
+ "fingerprint": {
+ "global": {
+ "distinct-count": 509,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Text": {
+ "percent-json": 0.0,
+ "percent-url": 0.0,
+ "percent-email": 0.0,
+ "percent-state": 0.0035,
+ "average-length": 5.629
+ }
+ }
+ }
+ }, {
+ "name": "last_name",
+ "display_name": "last_name",
+ "base_type": "type/Text",
+ "effective_type": "type/Text",
+ "field_ref": ["field", "last_name", {
+ "base-type": "type/Text"
+ }],
+ "semantic_type": "type/Name",
+ "fingerprint": {
+ "global": {
+ "distinct-count": 517,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Text": {
+ "percent-json": 0.0,
+ "percent-url": 0.0,
+ "percent-email": 0.0,
+ "percent-state": 0.0015,
+ "average-length": 6.126
+ }
+ }
+ }
+ }, {
+ "name": "amount",
+ "display_name": "amount",
+ "base_type": "type/Decimal",
+ "effective_type": "type/Decimal",
+ "field_ref": ["field", "amount", {
+ "base-type": "type/Decimal"
+ }],
+ "semantic_type": null,
+ "fingerprint": {
+ "global": {
+ "distinct-count": 11,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Number": {
+ "min": 0.99,
+ "q1": 2.399411317392306,
+ "q3": 5.52734176879965,
+ "max": 10.99,
+ "sd": 2.352151368009511,
+ "avg": 4.1405
+ }
+ }
+ }
+ }, {
+ "name": "payment_date",
+ "display_name": "payment_date",
+ "base_type": "type/DateTime",
+ "effective_type": "type/DateTime",
+ "field_ref": ["field", "payment_date", {
+ "base-type": "type/DateTime"
+ }],
+ "semantic_type": null,
+ "fingerprint": {
+ "global": {
+ "distinct-count": 1998,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/DateTime": {
+ "earliest": "2007-02-14T21:21:59.996577Z",
+ "latest": "2007-02-21T19:27:46.996577Z"
+ }
+ }
+ }
+ }, {
+ "name": "rental_id",
+ "display_name": "rental_id",
+ "base_type": "type/Integer",
+ "effective_type": "type/Integer",
+ "field_ref": ["field", "rental_id", {
+ "base-type": "type/Integer"
+ }],
+ "semantic_type": null,
+ "fingerprint": {
+ "global": {
+ "distinct-count": 2000,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Number": {
+ "min": 1158.0,
+ "q1": 1731.7967120913397,
+ "q3": 2871.359273326854,
+ "max": 4591.0,
+ "sd": 660.7468728104022,
+ "avg": 2303.4565
+ }
+ }
+ }
+ }],
+ "database_id": 2,
+ "enable_embedding": false,
+ "collection_id": null,
+ "query_type": "native",
+ "name": "Customer Payment",
+ "query_average_duration": 820,
+ "creator_id": 1,
+ "moderation_reviews": [],
+ "updated_at": "2021-12-13T17:48:40.478",
+ "made_public_by_id": null,
+ "embedding_params": null,
+ "cache_ttl": null,
+ "dataset_query": {
+ "type": "native",
+ "native": {
+ "query": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date",
+ "template-tags": {}
+ },
+ "database": 2
+ },
+ "id": 1,
+ "display": "table",
+ "visualization_settings": {
+ "table.pivot_column": "amount",
+ "table.cell_column": "customer_id"
+ },
+ "created_at": "2021-12-13T17:46:32.77",
+ "public_uuid": null
+ },
+ "updated_at": "2021-12-13T17:48:41.68",
+ "col": 0,
+ "id": 1,
+ "parameter_mappings": [],
+ "card_id": 1,
+ "visualization_settings": {},
+ "dashboard_id": 1,
+ "created_at": "2021-12-13T17:46:52.278",
+ "sizeY": 4,
+ "row": 0
+ }, {
+ "sizeX": 4,
+ "series": [],
+ "collection_authority_level": null,
+ "card": {
+ "description": null,
+ "archived": false,
+ "collection_position": null,
+ "table_id": 21,
+ "result_metadata": [{
+ "semantic_type": "type/Category",
+ "coercion_strategy": null,
+ "name": "rating",
+ "field_ref": ["field", 131, null],
+ "effective_type": "type/*",
+ "id": 131,
+ "display_name": "Rating",
+ "fingerprint": {
+ "global": {
+ "distinct-count": 5,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Text": {
+ "percent-json": 0.0,
+ "percent-url": 0.0,
+ "percent-email": 0.0,
+ "percent-state": 0.0,
+ "average-length": 2.926
+ }
+ }
+ },
+ "base_type": "type/PostgresEnum"
+ }, {
+ "name": "count",
+ "display_name": "Count",
+ "base_type": "type/BigInteger",
+ "effective_type": "type/BigInteger",
+ "semantic_type": "type/Quantity",
+ "field_ref": ["aggregation", 0],
+ "fingerprint": {
+ "global": {
+ "distinct-count": 5,
+ "nil%": 0.0
+ },
+ "type": {
+ "type/Number": {
+ "min": 178.0,
+ "q1": 190.0,
+ "q3": 213.25,
+ "max": 223.0,
+ "sd": 17.131841699011815,
+ "avg": 200.0
+ }
+ }
+ }
+ }],
+ "database_id": 2,
+ "enable_embedding": false,
+ "collection_id": null,
+ "query_type": "query",
+ "name": "Films, Count, Grouped by Rating, Filtered by Release Year, Sorted by [Unknown Field] descending",
+ "query_average_duration": 25,
+ "creator_id": 1,
+ "moderation_reviews": [],
+ "updated_at": "2021-12-13T17:48:39.999",
+ "made_public_by_id": null,
+ "embedding_params": null,
+ "cache_ttl": null,
+ "dataset_query": {
+ "query": {
+ "source-table": 21,
+ "breakout": [
+ ["field", 131, null]
+ ],
+ "aggregation": [
+ ["count"]
+ ],
+ "order-by": [
+ ["desc", ["aggregation", 0]]
+ ],
+ "filter": ["=", ["field", 136, null], 2006]
+ },
+ "database": 2,
+ "type": "query"
+ },
+ "id": 2,
+ "display": "row",
+ "visualization_settings": {
+ "graph.series_labels": ["number"],
+ "graph.dimensions": ["rating"],
+ "graph.colors": ["#509EE3"],
+ "graph.metrics": ["count"]
+ },
+ "created_at": "2021-12-13T17:48:37.102",
+ "public_uuid": null
+ },
+ "updated_at": "2021-12-13T17:48:41.682",
+ "col": 4,
+ "id": 2,
+ "parameter_mappings": [],
+ "card_id": 2,
+ "visualization_settings": {},
+ "dashboard_id": 1,
+ "created_at": "2021-12-13T17:48:41.62",
+ "sizeY": 4,
+ "row": 0
+ }],
+ "param_values": null,
+ "can_write": true,
+ "enable_embedding": false,
+ "collection_id": null,
+ "show_in_getting_started": false,
+ "name": "Dashboard 1",
+ "caveats": null,
+ "collection_authority_level": null,
+ "creator_id": 1,
+ "updated_at": "2021-12-13T17:48:41.735",
+ "made_public_by_id": null,
+ "embedding_params": null,
+ "cache_ttl": null,
+ "id": 1,
+ "position": null,
+ "param_fields": null,
+ "last-edit-info": {
+ "id": 1,
+ "email": "admin@metabase.com",
+ "first_name": "FirstName",
+ "last_name": "LastName",
+ "timestamp": "2021-12-13T17:48:41.742"
+ },
+ "parameters": [],
+ "created_at": "2021-12-13T17:46:48.185",
+ "public_uuid": null,
+ "points_of_interest": null
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/metabase/setup/database.json b/metadata-ingestion/tests/integration/metabase/setup/database.json
new file mode 100644
index 00000000000000..cd1cbdaf6624f1
--- /dev/null
+++ b/metadata-ingestion/tests/integration/metabase/setup/database.json
@@ -0,0 +1,47 @@
+{
+ "description": null,
+ "features": ["full-join", "basic-aggregations", "standard-deviation-aggregations", "expression-aggregations", "percentile-aggregations", "foreign-keys", "right-join", "left-join", "native-parameters", "nested-queries", "expressions", "set-timezone", "regex", "case-sensitivity-string-filter-options", "binning", "inner-join", "advanced-math-expressions"],
+ "cache_field_values_schedule": "0 0 5 * * ? *",
+ "timezone": "UTC",
+ "auto_run_queries": true,
+ "metadata_sync_schedule": "0 15 * * * ? *",
+ "name": "DVD Rental",
+ "caveats": null,
+ "is_full_sync": true,
+ "updated_at": "2021-12-13T17:45:04.871",
+ "cache_ttl": null,
+ "details": {
+ "host": "172.17.0.1",
+ "port": 5432,
+ "dbname": "dvdrental",
+ "user": "postgres",
+ "password": "**MetabasePass**",
+ "ssl": true,
+ "additional-options": null,
+ "tunnel-enabled": false
+ },
+ "is_sample": false,
+ "id": 2,
+ "is_on_demand": false,
+ "options": null,
+ "schedules": {
+ "cache_field_values": {
+ "schedule_minute": 0,
+ "schedule_day": null,
+ "schedule_frame": null,
+ "schedule_hour": 5,
+ "schedule_type": "daily"
+ },
+ "metadata_sync": {
+ "schedule_minute": 15,
+ "schedule_day": null,
+ "schedule_frame": null,
+ "schedule_hour": null,
+ "schedule_type": "hourly"
+ }
+ },
+ "engine": "postgres",
+ "refingerprint": null,
+ "created_at": "2021-12-13T17:45:04.72",
+ "points_of_interest": null
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/metabase/setup/session.json b/metadata-ingestion/tests/integration/metabase/setup/session.json
new file mode 100644
index 00000000000000..576a52874d21c5
--- /dev/null
+++ b/metadata-ingestion/tests/integration/metabase/setup/session.json
@@ -0,0 +1 @@
+{"id": "12345abv6789"}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/metabase/setup/table_21.json b/metadata-ingestion/tests/integration/metabase/setup/table_21.json
new file mode 100644
index 00000000000000..db748d0102fc16
--- /dev/null
+++ b/metadata-ingestion/tests/integration/metabase/setup/table_21.json
@@ -0,0 +1,50 @@
+{
+ "description": null,
+ "entity_type": "entity/GenericTable",
+ "schema": "public",
+ "db": {
+ "description": null,
+ "features": ["full-join", "basic-aggregations", "standard-deviation-aggregations", "expression-aggregations", "percentile-aggregations", "foreign-keys", "right-join", "left-join", "native-parameters", "nested-queries", "expressions", "set-timezone", "regex", "case-sensitivity-string-filter-options", "binning", "inner-join", "advanced-math-expressions"],
+ "cache_field_values_schedule": "0 0 5 * * ? *",
+ "timezone": "UTC",
+ "auto_run_queries": true,
+ "metadata_sync_schedule": "0 15 * * * ? *",
+ "name": "DVD Rental",
+ "caveats": null,
+ "is_full_sync": true,
+ "updated_at": "2021-12-13T17:45:04.871",
+ "cache_ttl": null,
+ "details": {
+ "host": "172.17.0.1",
+ "port": 5432,
+ "dbname": "dvdrental",
+ "user": "postgres",
+ "password": "**MetabasePass**",
+ "ssl": true,
+ "additional-options": null,
+ "tunnel-enabled": false
+ },
+ "is_sample": false,
+ "id": 2,
+ "is_on_demand": false,
+ "options": null,
+ "engine": "postgres",
+ "refingerprint": null,
+ "created_at": "2021-12-13T17:45:04.72",
+ "points_of_interest": null
+ },
+ "show_in_getting_started": false,
+ "name": "film",
+ "caveats": null,
+ "updated_at": "2021-12-13T17:45:10.52",
+ "pk_field": 128,
+ "entity_name": null,
+ "active": true,
+ "id": 21,
+ "db_id": 2,
+ "visibility_type": null,
+ "field_order": "database",
+ "display_name": "Film",
+ "created_at": "2021-12-13T17:45:04.991",
+ "points_of_interest": null
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/metabase/setup/user.json b/metadata-ingestion/tests/integration/metabase/setup/user.json
new file mode 100644
index 00000000000000..5cb63eabb8de8f
--- /dev/null
+++ b/metadata-ingestion/tests/integration/metabase/setup/user.json
@@ -0,0 +1,18 @@
+{
+ "email": "admin@metabase.com",
+ "ldap_auth": false,
+ "first_name": "FirstName",
+ "locale": null,
+ "last_login": "2021-12-13T18:51:32.999",
+ "is_active": true,
+ "is_qbnewb": true,
+ "updated_at": "2021-12-13T18:51:32.999",
+ "group_ids": [1, 2],
+ "is_superuser": true,
+ "login_attributes": null,
+ "id": 1,
+ "last_name": "LastName",
+ "date_joined": "2021-12-13T07:34:21.806",
+ "common_name": "FirstName LastName",
+ "google_auth": false
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/metabase/test_metabase.py b/metadata-ingestion/tests/integration/metabase/test_metabase.py
new file mode 100644
index 00000000000000..8ee2f67bc532f0
--- /dev/null
+++ b/metadata-ingestion/tests/integration/metabase/test_metabase.py
@@ -0,0 +1,165 @@
+import json
+from unittest.mock import patch
+
+from freezegun import freeze_time
+from requests.models import HTTPError
+
+from datahub.configuration.common import PipelineExecutionError
+from datahub.ingestion.run.pipeline import Pipeline
+from tests.test_helpers import mce_helpers
+
+FROZEN_TIME = "2021-11-11 07:00:00"
+
+JSON_RESPONSE_MAP = {
+ "http://localhost:3000/api/session": "session.json",
+ "http://localhost:3000/api/user/current": "user.json",
+ "http://localhost:3000/api/dashboard": "dashboard.json",
+ "http://localhost:3000/api/dashboard/1": "dashboard_1.json",
+ "http://localhost:3000/api/user/1": "user.json",
+ "http://localhost:3000/api/card": "card.json",
+ "http://localhost:3000/api/database/2": "database.json",
+ "http://localhost:3000/api/card/1": "card_1.json",
+ "http://localhost:3000/api/card/2": "card_2.json",
+ "http://localhost:3000/api/table/21": "table_21.json",
+}
+
+RESPONSE_ERROR_LIST = ["http://localhost:3000/api/dashboard"]
+
+test_resources_dir = None
+
+
+class MockResponse:
+ def __init__(self, url, data=None, jsond=None, error_list=None):
+ self.json_data = data
+ self.url = url
+ self.jsond = jsond
+ self.error_list = error_list
+ self.headers = {}
+ self.auth = None
+ self.status_code = 200
+
+ def json(self):
+ response_json_path = (
+ f"{test_resources_dir}/setup/{JSON_RESPONSE_MAP.get(self.url)}"
+ )
+ with open(response_json_path) as file:
+ data = json.loads(file.read())
+ self.json_data = data
+ return self.json_data
+
+ def get(self, url):
+ self.url = url
+ return self
+
+ def raise_for_status(self):
+ if self.error_list is not None and self.url in self.error_list:
+ http_error_msg = "%s Client Error: %s for url: %s" % (
+ 400,
+ "Simulate error",
+ self.url,
+ )
+ raise HTTPError(http_error_msg, response=self)
+
+
+def mocked_requests_sucess(*args, **kwargs):
+ return MockResponse(None)
+
+
+def mocked_requests_failure(*args, **kwargs):
+ return MockResponse(None, error_list=RESPONSE_ERROR_LIST)
+
+
+def mocked_requests_session_post(url, data, json):
+ return MockResponse(url, data, json)
+
+
+def mocked_requests_session_delete(url, headers):
+ return MockResponse(url, data=None, jsond=headers)
+
+
+@freeze_time(FROZEN_TIME)
+def test_mode_ingest_success(pytestconfig, tmp_path):
+ with patch(
+ "datahub.ingestion.source.metabase.requests.session",
+ side_effect=mocked_requests_sucess,
+ ), patch(
+ "datahub.ingestion.source.metabase.requests.post",
+ side_effect=mocked_requests_session_post,
+ ), patch(
+ "datahub.ingestion.source.metabase.requests.delete",
+ side_effect=mocked_requests_session_delete,
+ ):
+ global test_resources_dir
+ test_resources_dir = pytestconfig.rootpath / "tests/integration/metabase"
+
+ pipeline = Pipeline.create(
+ {
+ "run_id": "metabase-test",
+ "source": {
+ "type": "metabase",
+ "config": {
+ "username": "xxxx",
+ "password": "xxxx",
+ "connect_uri": "http://localhost:3000/",
+ },
+ },
+ "sink": {
+ "type": "file",
+ "config": {
+ "filename": f"{tmp_path}/metabase_mces.json",
+ },
+ },
+ }
+ )
+ pipeline.run()
+ pipeline.raise_from_status()
+
+ mce_helpers.check_golden_file(
+ pytestconfig,
+ output_path=f"{tmp_path}/metabase_mces.json",
+ golden_path=test_resources_dir / "metabase_mces_golden.json",
+ ignore_paths=mce_helpers.IGNORE_PATH_TIMESTAMPS,
+ )
+
+
+@freeze_time(FROZEN_TIME)
+def test_mode_ingest_failure(pytestconfig, tmp_path):
+ with patch(
+ "datahub.ingestion.source.metabase.requests.session",
+ side_effect=mocked_requests_failure,
+ ), patch(
+ "datahub.ingestion.source.metabase.requests.post",
+ side_effect=mocked_requests_session_post,
+ ), patch(
+ "datahub.ingestion.source.metabase.requests.delete",
+ side_effect=mocked_requests_session_delete,
+ ):
+ global test_resources_dir
+ test_resources_dir = pytestconfig.rootpath / "tests/integration/metabase"
+
+ pipeline = Pipeline.create(
+ {
+ "run_id": "metabase-test",
+ "source": {
+ "type": "metabase",
+ "config": {
+ "username": "xxxx",
+ "password": "xxxx",
+ "connect_uri": "http://localhost:3000/",
+ },
+ },
+ "sink": {
+ "type": "file",
+ "config": {
+ "filename": f"{tmp_path}/metabase_mces.json",
+ },
+ },
+ }
+ )
+ pipeline.run()
+ try:
+ pipeline.raise_from_status()
+ except PipelineExecutionError as exec_error:
+ assert exec_error.args[0] == "Source reported errors"
+ assert len(exec_error.args[1].failures) == 1
+ assert list(exec_error.args[1].failures.keys())[0] == "metabase-dashboard"
diff --git a/metadata-ingestion/tests/unit/test_usage_common.py b/metadata-ingestion/tests/unit/test_usage_common.py
index 3c21bd98d6dcc8..021a988422688a 100644
--- a/metadata-ingestion/tests/unit/test_usage_common.py
+++ b/metadata-ingestion/tests/unit/test_usage_common.py
@@ -12,9 +12,9 @@
)
from datahub.metadata.schema_classes import DatasetUsageStatisticsClass
-TestTableRef = str
+_TestTableRef = str
-TestAggregatedDataset = GenericAggregatedDataset[TestTableRef]
+_TestAggregatedDataset = GenericAggregatedDataset[_TestTableRef]
def test_add_one_query_without_columns():
@@ -27,7 +27,7 @@ def test_add_one_query_without_columns():
resource = "test_db.test_schema.test_table"
- ta = TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource)
+ ta = _TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource)
ta.add_read_entry(
test_email,
test_query,
@@ -52,7 +52,7 @@ def test_multiple_query_without_columns():
resource = "test_db.test_schema.test_table"
- ta = TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource)
+ ta = _TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource)
ta.add_read_entry(
test_email,
test_query,
@@ -88,7 +88,7 @@ def test_make_usage_workunit():
resource = "test_db.test_schema.test_table"
- ta = TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource)
+ ta = _TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource)
ta.add_read_entry(
test_email,
test_query,
@@ -117,7 +117,7 @@ def test_query_trimming():
resource = "test_db.test_schema.test_table"
- ta = TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource)
+ ta = _TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource)
ta.total_budget_for_query_list = total_budget_for_query_list
ta.add_read_entry(
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java
index b074d89050e961..30eb17f6911e60 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java
@@ -1,5 +1,6 @@
package com.linkedin.metadata.entity;
+import com.codahale.metrics.Timer;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Streams;
import com.linkedin.common.AuditStamp;
@@ -26,6 +27,7 @@
import com.linkedin.metadata.utils.EntityKeyUtils;
import com.linkedin.metadata.utils.GenericAspectUtils;
import com.linkedin.metadata.utils.PegasusUtils;
+import com.linkedin.metadata.utils.metrics.MetricUtils;
import com.linkedin.mxe.MetadataAuditOperation;
import com.linkedin.mxe.MetadataChangeLog;
import com.linkedin.mxe.MetadataChangeProposal;
@@ -38,9 +40,13 @@
import java.util.Map;
import java.util.Optional;
import java.util.Set;
+import java.util.function.Function;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
+import lombok.Getter;
+import lombok.Setter;
+import lombok.Value;
import lombok.extern.slf4j.Slf4j;
import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION;
@@ -88,7 +94,10 @@ public abstract class EntityService {
private final EntityEventProducer _producer;
private final EntityRegistry _entityRegistry;
private final Map> _entityToValidAspects;
- private Boolean _emitAspectSpecificAuditEvent = false;
+ @Getter
+ @Setter
+ private RetentionService retentionService;
+ private Boolean _alwaysEmitAuditEvent = false;
public static final String DEFAULT_RUN_ID = "no-run-id-provided";
public static final String BROWSE_PATHS = "browsePaths";
public static final String DATA_PLATFORM_INSTANCE = "dataPlatformInstance";
@@ -107,7 +116,7 @@ protected EntityService(@Nonnull final EntityEventProducer producer, @Nonnull fi
* @param aspectNames aspects to fetch for each urn in urns set
* @return a map of provided {@link Urn} to a List containing the requested aspects.
*/
- protected abstract Map> getLatestAspects(@Nonnull final Set urns,
+ public abstract Map> getLatestAspects(@Nonnull final Set urns,
@Nonnull final Set aspectNames);
/**
@@ -146,6 +155,22 @@ public abstract VersionedAspect getVersionedAspect(@Nonnull final Urn urn, @Nonn
public abstract ListResult listLatestAspects(@Nonnull final String entityName,
@Nonnull final String aspectName, final int start, int count);
+ /**
+ * Checks whether there is an actual update to the aspect by applying the updateLambda
+ * If there is an update, push the new version into the local DB.
+ * Otherwise, do not push the new version, but just update the system metadata.
+ *
+ * @param urn an urn associated with the new aspect
+ * @param aspectName name of the aspect being inserted
+ * @param updateLambda Function to apply to the latest version of the aspect to get the updated version
+ * @param auditStamp an {@link AuditStamp} containing metadata about the writer & current time * @param providedSystemMetadata
+ * @return Details about the new and old version of the aspect
+ */
+ @Nonnull
+ protected abstract UpdateAspectResult ingestAspectToLocalDB(@Nonnull final Urn urn, @Nonnull final String aspectName,
+ @Nonnull final Function, RecordTemplate> updateLambda,
+ @Nonnull final AuditStamp auditStamp, @Nonnull final SystemMetadata providedSystemMetadata);
+
/**
* Ingests (inserts) a new version of an entity aspect & emits a {@link com.linkedin.mxe.MetadataAuditEvent}.
*
@@ -159,8 +184,47 @@ public abstract ListResult listLatestAspects(@Nonnull final Stri
* @param systemMetadata
* @return the {@link RecordTemplate} representation of the written aspect object
*/
- public abstract RecordTemplate ingestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName,
- @Nonnull final RecordTemplate newValue, @Nonnull final AuditStamp auditStamp, SystemMetadata systemMetadata);
+ public RecordTemplate ingestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName,
+ @Nonnull final RecordTemplate newValue, @Nonnull final AuditStamp auditStamp, SystemMetadata systemMetadata) {
+
+ log.debug("Invoked ingestAspect with urn: {}, aspectName: {}, newValue: {}", urn, aspectName, newValue);
+
+ if (!urn.toString().trim().equals(urn.toString())) {
+ throw new IllegalArgumentException("Error: cannot provide an URN with leading or trailing whitespace");
+ }
+
+ Timer.Context ingestToLocalDBTimer = MetricUtils.timer(this.getClass(), "ingestAspectToLocalDB").time();
+ UpdateAspectResult result = ingestAspectToLocalDB(urn, aspectName, ignored -> newValue, auditStamp, systemMetadata);
+ ingestToLocalDBTimer.stop();
+
+ final RecordTemplate oldValue = result.getOldValue();
+ final RecordTemplate updatedValue = result.getNewValue();
+
+ // Apply retention policies asynchronously if there was an update to existing aspect value
+ if (oldValue != updatedValue && oldValue != null && retentionService != null) {
+ retentionService.applyRetention(urn, aspectName,
+ Optional.of(new RetentionService.RetentionContext(Optional.of(result.maxVersion))));
+ }
+
+ // Produce MAE after a successful update
+ if (oldValue != updatedValue || _alwaysEmitAuditEvent) {
+ log.debug(String.format("Producing MetadataAuditEvent for ingested aspect %s, urn %s", aspectName, urn));
+ Timer.Context produceMAETimer = MetricUtils.timer(this.getClass(), "produceMAE").time();
+ if (aspectName.equals(getKeyAspectName(urn))) {
+ produceMetadataAuditEventForKey(urn, result.getNewSystemMetadata());
+ } else {
+ produceMetadataAuditEvent(urn, oldValue, updatedValue, result.getOldSystemMetadata(),
+ result.getNewSystemMetadata(), MetadataAuditOperation.UPDATE);
+ }
+ produceMAETimer.stop();
+ } else {
+ log.debug(
+ String.format("Skipped producing MetadataAuditEvent for ingested aspect %s, urn %s. Aspect has not changed.",
+ aspectName, urn));
+ }
+
+ return updatedValue;
+ }
public RecordTemplate ingestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName,
@Nonnull final RecordTemplate newValue, @Nonnull final AuditStamp auditStamp) {
@@ -171,6 +235,105 @@ public RecordTemplate ingestAspect(@Nonnull final Urn urn, @Nonnull final String
return ingestAspect(urn, aspectName, newValue, auditStamp, generatedSystemMetadata);
}
+ public IngestProposalResult ingestProposal(@Nonnull MetadataChangeProposal metadataChangeProposal, AuditStamp auditStamp) {
+
+ log.debug("entity type = {}", metadataChangeProposal.getEntityType());
+ EntitySpec entitySpec = getEntityRegistry().getEntitySpec(metadataChangeProposal.getEntityType());
+ log.debug("entity spec = {}", entitySpec);
+
+ Urn entityUrn = EntityKeyUtils.getUrnFromProposal(metadataChangeProposal, entitySpec.getKeyAspectSpec());
+
+ if (metadataChangeProposal.getChangeType() != ChangeType.UPSERT) {
+ throw new UnsupportedOperationException("Only upsert operation is supported");
+ }
+
+ if (!metadataChangeProposal.hasAspectName() || !metadataChangeProposal.hasAspect()) {
+ throw new UnsupportedOperationException("Aspect and aspect name is required for create and update operations");
+ }
+
+ AspectSpec aspectSpec = entitySpec.getAspectSpec(metadataChangeProposal.getAspectName());
+
+ if (aspectSpec == null) {
+ throw new RuntimeException(
+ String.format("Unknown aspect %s for entity %s", metadataChangeProposal.getAspectName(),
+ metadataChangeProposal.getEntityType()));
+ }
+
+ log.debug("aspect spec = {}", aspectSpec);
+
+ RecordTemplate aspect;
+ try {
+ aspect = GenericAspectUtils.deserializeAspect(metadataChangeProposal.getAspect().getValue(),
+ metadataChangeProposal.getAspect().getContentType(), aspectSpec);
+ ValidationUtils.validateOrThrow(aspect);
+ } catch (ModelConversionException e) {
+ throw new RuntimeException(
+ String.format("Could not deserialize {} for aspect {}", metadataChangeProposal.getAspect().getValue(),
+ metadataChangeProposal.getAspectName()));
+ }
+ log.debug("aspect = {}", aspect);
+
+ SystemMetadata systemMetadata = metadataChangeProposal.getSystemMetadata();
+ if (systemMetadata == null) {
+ systemMetadata = new SystemMetadata();
+ systemMetadata.setRunId(DEFAULT_RUN_ID);
+ systemMetadata.setLastObserved(System.currentTimeMillis());
+ }
+ systemMetadata.setRegistryName(aspectSpec.getRegistryName());
+ systemMetadata.setRegistryVersion(aspectSpec.getRegistryVersion().toString());
+
+ RecordTemplate oldAspect = null;
+ SystemMetadata oldSystemMetadata = null;
+ RecordTemplate newAspect = aspect;
+ SystemMetadata newSystemMetadata = systemMetadata;
+
+ if (!aspectSpec.isTimeseries()) {
+ Timer.Context ingestToLocalDBTimer = MetricUtils.timer(this.getClass(), "ingestProposalToLocalDB").time();
+ UpdateAspectResult result =
+ ingestAspectToLocalDB(entityUrn, metadataChangeProposal.getAspectName(), ignored -> aspect, auditStamp,
+ systemMetadata);
+ ingestToLocalDBTimer.stop();
+ oldAspect = result.getOldValue();
+ oldSystemMetadata = result.getOldSystemMetadata();
+ newAspect = result.getNewValue();
+ newSystemMetadata = result.getNewSystemMetadata();
+ // Apply retention policies asynchronously if there was an update to existing aspect value
+ if (oldAspect != newAspect && oldAspect != null && retentionService != null) {
+ retentionService.applyRetention(entityUrn, aspectSpec.getName(),
+ Optional.of(new RetentionService.RetentionContext(Optional.of(result.maxVersion))));
+ }
+ }
+
+ if (oldAspect != newAspect || getAlwaysEmitAuditEvent()) {
+ log.debug(String.format("Producing MetadataChangeLog for ingested aspect %s, urn %s",
+ metadataChangeProposal.getAspectName(), entityUrn));
+
+ final MetadataChangeLog metadataChangeLog = new MetadataChangeLog(metadataChangeProposal.data());
+ if (oldAspect != null) {
+ metadataChangeLog.setPreviousAspectValue(GenericAspectUtils.serializeAspect(oldAspect));
+ }
+ if (oldSystemMetadata != null) {
+ metadataChangeLog.setPreviousSystemMetadata(oldSystemMetadata);
+ }
+ if (newAspect != null) {
+ metadataChangeLog.setAspect(GenericAspectUtils.serializeAspect(newAspect));
+ }
+ if (newSystemMetadata != null) {
+ metadataChangeLog.setSystemMetadata(newSystemMetadata);
+ }
+
+ log.debug(String.format("Serialized MCL event: %s", metadataChangeLog));
+ // Since only timeseries aspects are ingested as of now, simply produce mae event for it
+ produceMetadataChangeLog(entityUrn, aspectSpec, metadataChangeLog);
+ } else {
+ log.debug(
+ String.format("Skipped producing MetadataAuditEvent for ingested aspect %s, urn %s. Aspect has not changed.",
+ metadataChangeProposal.getAspectName(), entityUrn));
+ }
+
+ return new IngestProposalResult(entityUrn, oldAspect != newAspect);
+ }
+
/**
* Updates a particular version of an aspect & optionally emits a {@link com.linkedin.mxe.MetadataAuditEvent}.
*
@@ -483,12 +646,12 @@ private Map> buildEntityToValidAspects(final EntityRegistry
entry -> entry.getAspectSpecs().stream().map(AspectSpec::getName).collect(Collectors.toSet())));
}
- public Boolean getEmitAspectSpecificAuditEvent() {
- return _emitAspectSpecificAuditEvent;
+ public Boolean getAlwaysEmitAuditEvent() {
+ return _alwaysEmitAuditEvent;
}
- public void setEmitAspectSpecificAuditEvent(Boolean emitAspectSpecificAuditEvent) {
- _emitAspectSpecificAuditEvent = emitAspectSpecificAuditEvent;
+ public void setAlwaysEmitAuditEvent(Boolean alwaysEmitAuditEvent) {
+ _alwaysEmitAuditEvent = alwaysEmitAuditEvent;
}
public EntityRegistry getEntityRegistry() {
@@ -505,8 +668,6 @@ protected Set getEntityAspectNames(final String entityName) {
public abstract void setWritable(boolean canWrite);
- public abstract Urn ingestProposal(MetadataChangeProposal metadataChangeProposal, AuditStamp auditStamp);
-
public RollbackRunResult rollbackRun(List aspectRows, String runId) {
return rollbackWithConditions(aspectRows, Collections.singletonMap("runId", runId));
}
@@ -517,4 +678,21 @@ public abstract RollbackRunResult rollbackWithConditions(List
public abstract RollbackRunResult deleteUrn(Urn urn);
public abstract Boolean exists(Urn urn);
+
+ @Value
+ public static class UpdateAspectResult {
+ Urn urn;
+ RecordTemplate oldValue;
+ RecordTemplate newValue;
+ SystemMetadata oldSystemMetadata;
+ SystemMetadata newSystemMetadata;
+ MetadataAuditOperation operation;
+ long maxVersion;
+ }
+
+ @Value
+ public static class IngestProposalResult {
+ Urn urn;
+ boolean didUpdate;
+ }
}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java
new file mode 100644
index 00000000000000..8c57c9de7e54f4
--- /dev/null
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java
@@ -0,0 +1,186 @@
+package com.linkedin.metadata.entity;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.linkedin.common.AuditStamp;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.data.template.RecordTemplate;
+import com.linkedin.events.metadata.ChangeType;
+import com.linkedin.metadata.Constants;
+import com.linkedin.metadata.key.DataHubRetentionKey;
+import com.linkedin.metadata.utils.EntityKeyUtils;
+import com.linkedin.metadata.utils.GenericAspectUtils;
+import com.linkedin.mxe.GenericAspect;
+import com.linkedin.mxe.MetadataChangeProposal;
+import com.linkedin.retention.DataHubRetentionConfig;
+import com.linkedin.retention.Retention;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.CompletableFuture;
+import java.util.stream.Collectors;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import lombok.SneakyThrows;
+import lombok.Value;
+
+
+/**
+ * Service coupled with an entity service to handle retention
+ */
+public abstract class RetentionService {
+ protected static final String ALL = "*";
+ protected static final String DATAHUB_RETENTION_ENTITY = "dataHubRetention";
+ protected static final String DATAHUB_RETENTION_ASPECT = "dataHubRetentionConfig";
+ protected static final String DATAHUB_RETENTION_KEY_ASPECT = "dataHubRetentionKey";
+
+ protected abstract EntityService getEntityService();
+
+ /**
+ * Fetch retention policies given the entityName and aspectName
+ * Uses the entity service to fetch the latest retention policies set for the input entity and aspect
+ *
+ * @param entityName Name of the entity
+ * @param aspectName Name of the aspect
+ * @return retention policies to apply to the input entity and aspect
+ */
+ public Retention getRetention(@Nonnull String entityName, @Nonnull String aspectName) {
+ // Prioritized list of retention keys to fetch
+ List retentionUrns = getRetentionKeys(entityName, aspectName);
+ Map> fetchedAspects =
+ getEntityService().getLatestAspects(new HashSet<>(retentionUrns), ImmutableSet.of(DATAHUB_RETENTION_ASPECT));
+ // Find the first retention info that is set among the prioritized list of retention keys above
+ Optional retentionInfo = retentionUrns.stream()
+ .flatMap(urn -> fetchedAspects.getOrDefault(urn, Collections.emptyList())
+ .stream()
+ .filter(aspect -> aspect instanceof DataHubRetentionConfig))
+ .map(retention -> (DataHubRetentionConfig) retention)
+ .findFirst();
+ return retentionInfo.map(DataHubRetentionConfig::getRetention).orElse(new Retention());
+ }
+
+ // Get list of datahub retention keys that match the input entity name and aspect name
+ protected List getRetentionKeys(@Nonnull String entityName, @Nonnull String aspectName) {
+ return ImmutableList.of(new DataHubRetentionKey().setEntityName(entityName).setAspectName(aspectName),
+ new DataHubRetentionKey().setEntityName(entityName).setAspectName(ALL),
+ new DataHubRetentionKey().setEntityName(ALL).setAspectName(aspectName),
+ new DataHubRetentionKey().setEntityName(ALL).setAspectName(ALL))
+ .stream()
+ .map(key -> EntityKeyUtils.convertEntityKeyToUrn(key, DATAHUB_RETENTION_ENTITY))
+ .collect(Collectors.toList());
+ }
+
+ /**
+ * Set retention policy for given entity and aspect. If entity or aspect names are null, the policy is set as default
+ *
+ * @param entityName Entity name to apply policy to. If null, set as "*",
+ * meaning it will be the default for any entities without specified policy
+ * @param aspectName Aspect name to apply policy to. If null, set as "*",
+ * meaning it will be the default for any aspects without specified policy
+ * @param retentionConfig Retention policy
+ */
+ @SneakyThrows
+ public boolean setRetention(@Nullable String entityName, @Nullable String aspectName,
+ @Nonnull DataHubRetentionConfig retentionConfig) {
+ validateRetention(retentionConfig.getRetention());
+ DataHubRetentionKey retentionKey = new DataHubRetentionKey();
+ retentionKey.setEntityName(entityName != null ? entityName : ALL);
+ retentionKey.setAspectName(aspectName != null ? aspectName : ALL);
+ Urn retentionUrn = EntityKeyUtils.convertEntityKeyToUrn(retentionKey, DATAHUB_RETENTION_ENTITY);
+ MetadataChangeProposal keyProposal = new MetadataChangeProposal();
+ GenericAspect keyAspect = GenericAspectUtils.serializeAspect(retentionKey);
+ keyProposal.setAspect(keyAspect);
+ keyProposal.setAspectName(DATAHUB_RETENTION_KEY_ASPECT);
+ keyProposal.setEntityType(DATAHUB_RETENTION_ENTITY);
+ keyProposal.setChangeType(ChangeType.UPSERT);
+ keyProposal.setEntityUrn(retentionUrn);
+ AuditStamp auditStamp =
+ new AuditStamp().setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis());
+ getEntityService().ingestProposal(keyProposal, auditStamp);
+ MetadataChangeProposal aspectProposal = keyProposal.clone();
+ GenericAspect retentionAspect = GenericAspectUtils.serializeAspect(retentionConfig);
+ aspectProposal.setAspect(retentionAspect);
+ aspectProposal.setAspectName(DATAHUB_RETENTION_ASPECT);
+ return getEntityService().ingestProposal(aspectProposal, auditStamp).isDidUpdate();
+ }
+
+ /**
+ * Delete the retention policy set for given entity and aspect.
+ *
+ * @param entityName Entity name to apply policy to. If null, set as "*",
+ * meaning it will delete the default policy for any entities without specified policy
+ * @param aspectName Aspect name to apply policy to. If null, set as "*",
+ * meaning it will delete the default policy for any aspects without specified policy
+ */
+ public void deleteRetention(@Nullable String entityName, @Nullable String aspectName) {
+ DataHubRetentionKey retentionKey = new DataHubRetentionKey();
+ retentionKey.setEntityName(entityName != null ? entityName : ALL);
+ retentionKey.setAspectName(aspectName != null ? aspectName : ALL);
+ Urn retentionUrn = EntityKeyUtils.convertEntityKeyToUrn(retentionKey, DATAHUB_RETENTION_ENTITY);
+ getEntityService().deleteUrn(retentionUrn);
+ }
+
+ private void validateRetention(Retention retention) {
+ if (retention.hasVersion()) {
+ if (retention.getVersion().getMaxVersions() <= 0) {
+ throw new IllegalArgumentException("Invalid maxVersions: " + retention.getVersion().getMaxVersions());
+ }
+ }
+ if (retention.hasTime()) {
+ if (retention.getTime().getMaxAgeInSeconds() <= 0) {
+ throw new IllegalArgumentException("Invalid maxAgeInSeconds: " + retention.getTime().getMaxAgeInSeconds());
+ }
+ }
+ }
+
+ /**
+ * Apply retention policies given the urn and aspect name asynchronously
+ *
+ * @param urn Urn of the entity
+ * @param aspectName Name of the aspect
+ * @param context Additional context that could be used to apply retention
+ */
+ public void applyRetentionAsync(@Nonnull Urn urn, @Nonnull String aspectName, Optional context) {
+ CompletableFuture.runAsync(() -> applyRetention(urn, aspectName, context));
+ }
+
+ /**
+ * Apply retention policies given the urn and aspect name
+ *
+ * @param urn Urn of the entity
+ * @param aspectName Name of the aspect
+ * @param context Additional context that could be used to apply retention
+ */
+ public void applyRetention(@Nonnull Urn urn, @Nonnull String aspectName, Optional context) {
+ Retention retentionPolicy = getRetention(urn.getEntityType(), aspectName);
+ if (retentionPolicy.data().isEmpty()) {
+ return;
+ }
+ applyRetention(urn, aspectName, retentionPolicy, context);
+ }
+
+ /**
+ * Apply retention policies given the urn and aspect name and policies
+ * @param urn Urn of the entity
+ * @param aspectName Name of the aspect
+ * @param retentionPolicy Retention policies to apply
+ * @param context Additional context that could be used to apply retention
+ */
+ public abstract void applyRetention(@Nonnull Urn urn, @Nonnull String aspectName, Retention retentionPolicy,
+ Optional context);
+
+ /**
+ * Batch apply retention to all records that match the input entityName and aspectName
+ *
+ * @param entityName Name of the entity to apply retention to. If null, applies to all entities
+ * @param aspectName Name of the aspect to apply retention to. If null, applies to all aspects
+ */
+ public abstract void batchApplyRetention(@Nullable String entityName, @Nullable String aspectName);
+
+ @Value
+ public static class RetentionContext {
+ Optional maxVersion;
+ }
+}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
index 71d5fe82bc8e28..ab3323f5ac920f 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
@@ -4,10 +4,6 @@
import com.linkedin.common.urn.Urn;
import com.linkedin.metadata.dao.exception.ModelConversionException;
import com.linkedin.metadata.dao.exception.RetryLimitReached;
-import com.linkedin.metadata.dao.retention.IndefiniteRetention;
-import com.linkedin.metadata.dao.retention.Retention;
-import com.linkedin.metadata.dao.retention.TimeBasedRetention;
-import com.linkedin.metadata.dao.retention.VersionBasedRetention;
import com.linkedin.metadata.dao.utils.QueryUtils;
import com.linkedin.metadata.entity.AspectStorageValidationUtil;
import com.linkedin.metadata.entity.ListResult;
@@ -45,11 +41,8 @@
@Slf4j
public class EbeanAspectDao {
- private static final IndefiniteRetention INDEFINITE_RETENTION = new IndefiniteRetention();
-
private final EbeanServer _server;
private boolean _connectionValidated = false;
- private final Map _aspectRetentionMap = new HashMap<>();
private final Clock _clock = Clock.systemUTC();
// Why 375? From tuning, this seems to be about the largest size we can get without having ebean batch issues.
@@ -124,9 +117,6 @@ public long saveLatestAspect(
// Save newValue as the latest version (v0)
saveAspect(urn, aspectName, newAspectMetadata, newActor, newImpersonator, newTime, newSystemMetadata, ASPECT_LATEST_VERSION, oldAspectMetadata == null);
- // Apply retention policy
- applyRetention(urn, aspectName, getRetention(aspectName), largestVersion);
-
return largestVersion;
}
@@ -171,12 +161,11 @@ protected EbeanAspectV2 getLatestAspect(@Nonnull final String urn, @Nonnull fina
return _server.find(EbeanAspectV2.class, key);
}
- @Nullable
public long getMaxVersion(@Nonnull final String urn, @Nonnull final String aspectName) {
- validateConnection();
List result = _server.find(EbeanAspectV2.class)
.where()
- .eq("urn", urn).eq("aspect", aspectName)
+ .eq("urn", urn)
+ .eq("aspect", aspectName)
.orderBy()
.desc("version")
.findList();
@@ -443,15 +432,6 @@ public ListResult listAspectMetadata(
return toListResult(aspects, listResultMetadata, pagedList, start);
}
- @Nonnull
- public Retention getRetention(@Nonnull final String aspectName) {
- return _aspectRetentionMap.getOrDefault(aspectName, INDEFINITE_RETENTION);
- }
-
- public void setRetention(@Nonnull final String aspectName, @Nonnull final Retention retention) {
- _aspectRetentionMap.put(aspectName, retention);
- }
-
@Nonnull
public T runInTransactionWithRetry(@Nonnull final Supplier block, final int maxTransactionRetry) {
validateConnection();
@@ -477,58 +457,6 @@ public T runInTransactionWithRetry(@Nonnull final Supplier block, final i
return result;
}
-
- private void applyRetention(
- @Nonnull final String urn,
- @Nonnull final String aspectName,
- @Nonnull final Retention retention,
- long largestVersion) {
- if (retention instanceof IndefiniteRetention) {
- return;
- }
-
- if (retention instanceof VersionBasedRetention) {
- applyVersionBasedRetention(urn, aspectName, (VersionBasedRetention) retention, largestVersion);
- return;
- }
-
- if (retention instanceof TimeBasedRetention) {
- applyTimeBasedRetention(urn, aspectName, (TimeBasedRetention) retention, _clock.millis());
- return;
- }
- }
-
- protected void applyVersionBasedRetention(
- @Nonnull final String urn,
- @Nonnull final String aspectName,
- @Nonnull final VersionBasedRetention retention,
- long largestVersion) {
- validateConnection();
-
- _server.find(EbeanAspectV2.class)
- .where()
- .eq(EbeanAspectV2.URN_COLUMN, urn)
- .eq(EbeanAspectV2.ASPECT_COLUMN, aspectName)
- .ne(EbeanAspectV2.VERSION_COLUMN, ASPECT_LATEST_VERSION)
- .le(EbeanAspectV2.VERSION_COLUMN, largestVersion - retention.getMaxVersionsToRetain() + 1)
- .delete();
- }
-
- protected void applyTimeBasedRetention(
- @Nonnull final String urn,
- @Nonnull final String aspectName,
- @Nonnull final TimeBasedRetention retention,
- long currentTime) {
- validateConnection();
-
- _server.find(EbeanAspectV2.class)
- .where()
- .eq(EbeanAspectV2.URN_COLUMN, urn.toString())
- .eq(EbeanAspectV2.ASPECT_COLUMN, aspectName)
- .lt(EbeanAspectV2.CREATED_ON_COLUMN, new Timestamp(currentTime - retention.getMaxAgeToRetain()))
- .delete();
- }
-
private long getNextVersion(@Nonnull final String urn, @Nonnull final String aspectName) {
validateConnection();
final List result = _server.find(EbeanAspectV2.class)
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanEntityService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanEntityService.java
index 912fa05a12f62f..16c9072f3b4d47 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanEntityService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanEntityService.java
@@ -1,6 +1,5 @@
package com.linkedin.metadata.entity.ebean;
-import com.codahale.metrics.Timer;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
@@ -13,28 +12,20 @@
import com.linkedin.events.metadata.ChangeType;
import com.linkedin.metadata.aspect.Aspect;
import com.linkedin.metadata.aspect.VersionedAspect;
-import com.linkedin.metadata.dao.exception.ModelConversionException;
import com.linkedin.metadata.dao.utils.RecordUtils;
import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.entity.ListResult;
import com.linkedin.metadata.entity.RollbackResult;
import com.linkedin.metadata.entity.RollbackRunResult;
-import com.linkedin.metadata.entity.ValidationUtils;
import com.linkedin.metadata.event.EntityEventProducer;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.query.ListUrnsResult;
import com.linkedin.metadata.run.AspectRowSummary;
-import com.linkedin.metadata.utils.EntityKeyUtils;
-import com.linkedin.metadata.utils.GenericAspectUtils;
import com.linkedin.metadata.utils.PegasusUtils;
-import com.linkedin.metadata.utils.metrics.MetricUtils;
import com.linkedin.mxe.MetadataAuditOperation;
-import com.linkedin.mxe.MetadataChangeLog;
-import com.linkedin.mxe.MetadataChangeProposal;
import com.linkedin.mxe.SystemMetadata;
-import io.opentelemetry.extension.annotations.WithSpan;
import java.net.URISyntaxException;
import java.sql.Timestamp;
import java.util.ArrayList;
@@ -50,11 +41,12 @@
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
-import lombok.Value;
import lombok.extern.slf4j.Slf4j;
-import static com.linkedin.metadata.Constants.*;
-import static com.linkedin.metadata.entity.ebean.EbeanUtils.*;
+import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION;
+import static com.linkedin.metadata.entity.ebean.EbeanUtils.parseSystemMetadata;
+import static com.linkedin.metadata.entity.ebean.EbeanUtils.toAspectRecord;
+import static com.linkedin.metadata.entity.ebean.EbeanUtils.toJsonAspect;
/**
@@ -68,8 +60,6 @@ public class EbeanEntityService extends EntityService {
private final EbeanAspectDao _entityDao;
private final JacksonDataTemplateCodec _dataTemplateCodec = new JacksonDataTemplateCodec();
- private Boolean _alwaysEmitAuditEvent = false;
-
public EbeanEntityService(@Nonnull final EbeanAspectDao entityDao, @Nonnull final EntityEventProducer eventProducer,
@Nonnull final EntityRegistry entityRegistry) {
@@ -206,50 +196,9 @@ public ListResult listLatestAspects(@Nonnull final String entity
@Override
@Nonnull
- @WithSpan
- public RecordTemplate ingestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName,
- @Nonnull final RecordTemplate newValue, @Nonnull final AuditStamp auditStamp,
- @Nonnull final SystemMetadata systemMetadata) {
-
- log.debug("Invoked ingestAspect with urn: {}, aspectName: {}, newValue: {}", urn, aspectName, newValue);
-
- if (!urn.toString().trim().equals(urn.toString())) {
- throw new IllegalArgumentException("Error: cannot provide an URN with leading or trailing whitespace");
- }
-
- Timer.Context ingestToLocalDBTimer = MetricUtils.timer(this.getClass(), "ingestAspectToLocalDB").time();
- UpdateAspectResult result = ingestAspectToLocalDB(urn, aspectName, ignored -> newValue, auditStamp, systemMetadata,
- DEFAULT_MAX_TRANSACTION_RETRY);
- ingestToLocalDBTimer.stop();
-
- final RecordTemplate oldValue = result.getOldValue();
- final RecordTemplate updatedValue = result.getNewValue();
-
- // 5. Produce MAE after a successful update
- if (oldValue != updatedValue || _alwaysEmitAuditEvent) {
- log.debug(String.format("Producing MetadataAuditEvent for ingested aspect %s, urn %s", aspectName, urn));
- Timer.Context produceMAETimer = MetricUtils.timer(this.getClass(), "produceMAE").time();
- if (aspectName.equals(getKeyAspectName(urn))) {
- produceMetadataAuditEventForKey(urn, result.getNewSystemMetadata());
- } else {
- produceMetadataAuditEvent(urn, oldValue, updatedValue, result.getOldSystemMetadata(),
- result.getNewSystemMetadata(), MetadataAuditOperation.UPDATE);
- }
- produceMAETimer.stop();
- } else {
- log.debug(
- String.format("Skipped producing MetadataAuditEvent for ingested aspect %s, urn %s. Aspect has not changed.",
- aspectName, urn));
- }
-
- return updatedValue;
- }
-
- @Nonnull
- private UpdateAspectResult ingestAspectToLocalDB(@Nonnull final Urn urn, @Nonnull final String aspectName,
+ protected UpdateAspectResult ingestAspectToLocalDB(@Nonnull final Urn urn, @Nonnull final String aspectName,
@Nonnull final Function, RecordTemplate> updateLambda,
- @Nonnull final AuditStamp auditStamp, @Nonnull final SystemMetadata providedSystemMetadata,
- final int maxTransactionRetry) {
+ @Nonnull final AuditStamp auditStamp, @Nonnull final SystemMetadata providedSystemMetadata) {
return _entityDao.runInTransactionWithRetry(() -> {
@@ -274,12 +223,12 @@ private UpdateAspectResult ingestAspectToLocalDB(@Nonnull final Urn urn, @Nonnul
return new UpdateAspectResult(urn, oldValue, oldValue,
EbeanUtils.parseSystemMetadata(latest.getSystemMetadata()), latestSystemMetadata,
- MetadataAuditOperation.UPDATE);
+ MetadataAuditOperation.UPDATE, 0);
}
// 4. Save the newValue as the latest version
log.debug(String.format("Ingesting aspect with name %s, urn %s", aspectName, urn));
- _entityDao.saveLatestAspect(urn.toString(), aspectName, latest == null ? null : toJsonAspect(oldValue),
+ long versionOfOld = _entityDao.saveLatestAspect(urn.toString(), aspectName, latest == null ? null : toJsonAspect(oldValue),
latest == null ? null : latest.getCreatedBy(), latest == null ? null : latest.getCreatedFor(),
latest == null ? null : latest.getCreatedOn(), latest == null ? null : latest.getSystemMetadata(),
toJsonAspect(newValue), auditStamp.getActor().toString(),
@@ -288,8 +237,8 @@ private UpdateAspectResult ingestAspectToLocalDB(@Nonnull final Urn urn, @Nonnul
return new UpdateAspectResult(urn, oldValue, newValue,
latest == null ? null : EbeanUtils.parseSystemMetadata(latest.getSystemMetadata()), providedSystemMetadata,
- MetadataAuditOperation.UPDATE);
- }, maxTransactionRetry);
+ MetadataAuditOperation.UPDATE, versionOfOld);
+ }, DEFAULT_MAX_TRANSACTION_RETRY);
}
@Override
@@ -329,7 +278,7 @@ private RecordTemplate updateAspect(@Nonnull final Urn urn, @Nonnull final Strin
new Timestamp(auditStamp.getTime()), toJsonAspect(newSystemMetadata), version, oldAspect == null);
return new UpdateAspectResult(urn, oldValue, value, oldSystemMetadata, newSystemMetadata,
- MetadataAuditOperation.UPDATE);
+ MetadataAuditOperation.UPDATE, version);
}, maxTransactionRetry);
final RecordTemplate oldValue = result.getOldValue();
@@ -337,8 +286,8 @@ private RecordTemplate updateAspect(@Nonnull final Urn urn, @Nonnull final Strin
if (emitMae) {
log.debug(String.format("Producing MetadataAuditEvent for updated aspect %s, urn %s", aspectName, urn));
- produceMetadataChangeLog(urn, entityName, aspectName, aspectSpec, oldValue, newValue, result.oldSystemMetadata,
- result.newSystemMetadata, ChangeType.UPSERT);
+ produceMetadataChangeLog(urn, entityName, aspectName, aspectSpec, oldValue, newValue,
+ result.getOldSystemMetadata(), result.getNewSystemMetadata(), ChangeType.UPSERT);
} else {
log.debug(String.format("Skipped producing MetadataAuditEvent for updated aspect %s, urn %s. emitMAE is false.",
aspectName, urn));
@@ -347,114 +296,11 @@ private RecordTemplate updateAspect(@Nonnull final Urn urn, @Nonnull final Strin
return newValue;
}
- public Boolean getAlwaysEmitAuditEvent() {
- return _alwaysEmitAuditEvent;
- }
-
- public void setAlwaysEmitAuditEvent(Boolean alwaysEmitAuditEvent) {
- _alwaysEmitAuditEvent = alwaysEmitAuditEvent;
- }
-
public void setWritable(boolean canWrite) {
log.debug("Enabling writes");
_entityDao.setWritable(canWrite);
}
- @Override
- public Urn ingestProposal(@Nonnull MetadataChangeProposal metadataChangeProposal, AuditStamp auditStamp) {
-
- log.debug("entity type = {}", metadataChangeProposal.getEntityType());
- EntitySpec entitySpec = getEntityRegistry().getEntitySpec(metadataChangeProposal.getEntityType());
- log.debug("entity spec = {}", entitySpec);
-
- Urn entityUrn = EntityKeyUtils.getUrnFromProposal(metadataChangeProposal, entitySpec.getKeyAspectSpec());
-
- if (metadataChangeProposal.getChangeType() != ChangeType.UPSERT) {
- throw new UnsupportedOperationException("Only upsert operation is supported");
- }
-
- if (!metadataChangeProposal.hasAspectName() || !metadataChangeProposal.hasAspect()) {
- throw new UnsupportedOperationException("Aspect and aspect name is required for create and update operations");
- }
-
- AspectSpec aspectSpec = entitySpec.getAspectSpec(metadataChangeProposal.getAspectName());
-
- if (aspectSpec == null) {
- throw new RuntimeException(
- String.format("Unknown aspect %s for entity %s", metadataChangeProposal.getAspectName(),
- metadataChangeProposal.getEntityType()));
- }
-
- log.debug("aspect spec = {}", aspectSpec);
-
- RecordTemplate aspect;
- try {
- aspect = GenericAspectUtils.deserializeAspect(metadataChangeProposal.getAspect().getValue(),
- metadataChangeProposal.getAspect().getContentType(), aspectSpec);
- ValidationUtils.validateOrThrow(aspect);
- } catch (ModelConversionException e) {
- throw new RuntimeException(
- String.format("Could not deserialize {} for aspect {}", metadataChangeProposal.getAspect().getValue(),
- metadataChangeProposal.getAspectName()));
- }
- log.debug("aspect = {}", aspect);
-
- SystemMetadata systemMetadata = metadataChangeProposal.getSystemMetadata();
- if (systemMetadata == null) {
- systemMetadata = new SystemMetadata();
- systemMetadata.setRunId(DEFAULT_RUN_ID);
- systemMetadata.setLastObserved(System.currentTimeMillis());
- }
- systemMetadata.setRegistryName(aspectSpec.getRegistryName());
- systemMetadata.setRegistryVersion(aspectSpec.getRegistryVersion().toString());
-
- RecordTemplate oldAspect = null;
- SystemMetadata oldSystemMetadata = null;
- RecordTemplate newAspect = aspect;
- SystemMetadata newSystemMetadata = systemMetadata;
-
- if (!aspectSpec.isTimeseries()) {
- Timer.Context ingestToLocalDBTimer = MetricUtils.timer(this.getClass(), "ingestProposalToLocalDB").time();
- UpdateAspectResult result =
- ingestAspectToLocalDB(entityUrn, metadataChangeProposal.getAspectName(), ignored -> aspect, auditStamp,
- systemMetadata, DEFAULT_MAX_TRANSACTION_RETRY);
- ingestToLocalDBTimer.stop();
- oldAspect = result.oldValue;
- oldSystemMetadata = result.oldSystemMetadata;
- newAspect = result.newValue;
- newSystemMetadata = result.newSystemMetadata;
- }
-
- if (oldAspect != newAspect || _alwaysEmitAuditEvent) {
- log.debug(String.format("Producing MetadataChangeLog for ingested aspect %s, urn %s",
- metadataChangeProposal.getAspectName(), entityUrn));
-
- final MetadataChangeLog metadataChangeLog = new MetadataChangeLog(metadataChangeProposal.data());
- if (oldAspect != null) {
- metadataChangeLog.setPreviousAspectValue(GenericAspectUtils.serializeAspect(oldAspect));
- }
- if (oldSystemMetadata != null) {
- metadataChangeLog.setPreviousSystemMetadata(oldSystemMetadata);
- }
- if (newAspect != null) {
- metadataChangeLog.setAspect(GenericAspectUtils.serializeAspect(newAspect));
- }
- if (newSystemMetadata != null) {
- metadataChangeLog.setSystemMetadata(newSystemMetadata);
- }
-
- log.debug(String.format("Serialized MCL event: %s", metadataChangeLog));
- // Since only timeseries aspects are ingested as of now, simply produce mae event for it
- produceMetadataChangeLog(entityUrn, aspectSpec, metadataChangeLog);
- } else {
- log.debug(
- String.format("Skipped producing MetadataAuditEvent for ingested aspect %s, urn %s. Aspect has not changed.",
- metadataChangeProposal.getAspectName(), entityUrn));
- }
-
- return entityUrn;
- }
-
private boolean filterMatch(SystemMetadata systemMetadata, Map conditions) {
String runIdCondition = conditions.getOrDefault("runId", null);
if (runIdCondition != null) {
@@ -585,7 +431,7 @@ public RollbackResult deleteAspect(String urn, String aspectName, Map aspectRows, String runId) {
@@ -694,14 +540,4 @@ public ListUrnsResult listUrns(@Nonnull final String entityName, final int start
result.setEntities(entityUrns);
return result;
}
-
- @Value
- private static class UpdateAspectResult {
- Urn urn;
- RecordTemplate oldValue;
- RecordTemplate newValue;
- SystemMetadata oldSystemMetadata;
- SystemMetadata newSystemMetadata;
- MetadataAuditOperation operation;
- }
}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java
new file mode 100644
index 00000000000000..f119787b570061
--- /dev/null
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java
@@ -0,0 +1,194 @@
+package com.linkedin.metadata.entity.ebean;
+
+import com.linkedin.common.urn.Urn;
+import com.linkedin.metadata.dao.utils.RecordUtils;
+import com.linkedin.metadata.entity.EntityService;
+import com.linkedin.metadata.entity.RetentionService;
+import com.linkedin.retention.DataHubRetentionConfig;
+import com.linkedin.retention.Retention;
+import com.linkedin.retention.TimeBasedRetention;
+import com.linkedin.retention.VersionBasedRetention;
+import io.ebean.EbeanServer;
+import io.ebean.Expression;
+import io.ebean.ExpressionList;
+import io.ebean.PagedList;
+import io.ebean.Transaction;
+import io.ebeaninternal.server.expression.Op;
+import io.ebeaninternal.server.expression.SimpleExpression;
+import io.opentelemetry.extension.annotations.WithSpan;
+import java.sql.Timestamp;
+import java.time.Clock;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION;
+
+
+@Slf4j
+@RequiredArgsConstructor
+public class EbeanRetentionService extends RetentionService {
+ private final EntityService _entityService;
+ private final EbeanServer _server;
+ private final int _batchSize;
+
+ private final Clock _clock = Clock.systemUTC();
+
+ public EntityService getEntityService() {
+ return _entityService;
+ }
+
+ @Override
+ @WithSpan
+ public void applyRetention(@Nonnull Urn urn, @Nonnull String aspectName, Retention retentionPolicy,
+ Optional retentionContext) {
+ log.debug("Applying retention to urn {}, aspectName {}", urn, aspectName);
+ // If no policies are set or has indefinite policy set, do not apply any retention
+ if (retentionPolicy.data().isEmpty()) {
+ return;
+ }
+ ExpressionList deleteQuery = _server.find(EbeanAspectV2.class)
+ .where()
+ .eq(EbeanAspectV2.URN_COLUMN, urn.toString())
+ .eq(EbeanAspectV2.ASPECT_COLUMN, aspectName)
+ .ne(EbeanAspectV2.VERSION_COLUMN, ASPECT_LATEST_VERSION)
+ .or();
+
+ List filterList = new ArrayList<>();
+ if (retentionPolicy.hasVersion()) {
+ getVersionBasedRetentionQuery(urn, aspectName, retentionPolicy.getVersion(),
+ retentionContext.flatMap(RetentionService.RetentionContext::getMaxVersion)).ifPresent(filterList::add);
+ }
+ if (retentionPolicy.hasTime()) {
+ filterList.add(getTimeBasedRetentionQuery(retentionPolicy.getTime()));
+ }
+
+ // Only run delete if at least one of the retention policies are applicable
+ if (!filterList.isEmpty()) {
+ filterList.forEach(deleteQuery::add);
+ deleteQuery.endOr().delete();
+ }
+ }
+
+ private long getMaxVersion(@Nonnull final String urn, @Nonnull final String aspectName) {
+ List result = _server.find(EbeanAspectV2.class)
+ .where()
+ .eq("urn", urn)
+ .eq("aspect", aspectName)
+ .orderBy()
+ .desc("version")
+ .findList();
+ if (result.size() == 0) {
+ return -1;
+ }
+ return result.get(0).getKey().getVersion();
+ }
+
+ private Optional getVersionBasedRetentionQuery(@Nonnull Urn urn, @Nonnull String aspectName,
+ @Nonnull final VersionBasedRetention retention, final Optional maxVersionFromUpdate) {
+ long largestVersion = maxVersionFromUpdate.orElseGet(() -> getMaxVersion(urn.toString(), aspectName));
+
+ if (largestVersion < retention.getMaxVersions()) {
+ return Optional.empty();
+ }
+ return Optional.of(
+ new SimpleExpression(EbeanAspectV2.VERSION_COLUMN, Op.LT, largestVersion - retention.getMaxVersions() + 1));
+ }
+
+ private Expression getTimeBasedRetentionQuery(@Nonnull final TimeBasedRetention retention) {
+ return new SimpleExpression(EbeanAspectV2.CREATED_ON_COLUMN, Op.LT,
+ new Timestamp(_clock.millis() - retention.getMaxAgeInSeconds() * 1000));
+ }
+
+ @Override
+ @WithSpan
+ public void batchApplyRetention(@Nullable String entityName, @Nullable String aspectName) {
+ log.debug("Applying retention to all records");
+ int numCandidates = queryCandidates(entityName, aspectName).findCount();
+ log.info("Found {} urn, aspect pair with more than 1 version", numCandidates);
+ Map retentionPolicyMap = getAllRetentionPolicies();
+
+ int start = 0;
+ while (start < numCandidates) {
+ log.info("Applying retention to pairs {} through {}", start, start + _batchSize);
+ PagedList rows = getPagedAspects(entityName, aspectName, start, _batchSize);
+
+ try (Transaction transaction = _server.beginTransaction()) {
+ transaction.setBatchMode(true);
+ transaction.setBatchSize(_batchSize);
+ for (EbeanAspectV2 row : rows.getList()) {
+ // Only run for cases where there's multiple versions of the aspect
+ if (row.getVersion() == 0) {
+ continue;
+ }
+ // 1. Extract an Entity type from the entity Urn
+ Urn urn;
+ try {
+ urn = Urn.createFromString(row.getUrn());
+ } catch (Exception e) {
+ log.error("Failed to serialize urn {}", row.getUrn(), e);
+ continue;
+ }
+ final String aspectNameFromRecord = row.getAspect();
+ // Get the retention policies to apply from the local retention policy map
+ Optional retentionPolicy = getRetentionKeys(urn.getEntityType(), aspectNameFromRecord).stream()
+ .map(key -> retentionPolicyMap.get(key.toString()))
+ .filter(Objects::nonNull)
+ .findFirst()
+ .map(DataHubRetentionConfig::getRetention);
+ retentionPolicy.ifPresent(retention -> applyRetention(urn, aspectNameFromRecord, retention,
+ Optional.of(new RetentionContext(Optional.of(row.getVersion())))));
+ }
+ transaction.commit();
+ }
+
+ start += _batchSize;
+ }
+
+ log.info("Finished applying retention to all records");
+ }
+
+ private Map getAllRetentionPolicies() {
+ return _server.find(EbeanAspectV2.class)
+ .select(String.format("%s, %s, %s", EbeanAspectV2.URN_COLUMN, EbeanAspectV2.ASPECT_COLUMN,
+ EbeanAspectV2.METADATA_COLUMN))
+ .where()
+ .eq(EbeanAspectV2.ASPECT_COLUMN, DATAHUB_RETENTION_ASPECT)
+ .eq(EbeanAspectV2.VERSION_COLUMN, ASPECT_LATEST_VERSION)
+ .findList()
+ .stream()
+ .collect(Collectors.toMap(EbeanAspectV2::getUrn,
+ row -> RecordUtils.toRecordTemplate(DataHubRetentionConfig.class, row.getMetadata())));
+ }
+
+ private ExpressionList queryCandidates(@Nullable String entityName, @Nullable String aspectName) {
+ ExpressionList query = _server.find(EbeanAspectV2.class)
+ .setDistinct(true)
+ .select(String.format("%s, %s, max(%s)", EbeanAspectV2.URN_COLUMN, EbeanAspectV2.ASPECT_COLUMN,
+ EbeanAspectV2.VERSION_COLUMN))
+ .where();
+ if (entityName != null) {
+ query.like(EbeanAspectV2.URN_COLUMN, String.format("urn:li:%s%%", entityName));
+ }
+ if (aspectName != null) {
+ query.eq(EbeanAspectV2.ASPECT_COLUMN, aspectName);
+ }
+ return query;
+ }
+
+ private PagedList getPagedAspects(@Nullable String entityName, @Nullable String aspectName,
+ final int start, final int pageSize) {
+ return queryCandidates(entityName, aspectName).orderBy(
+ EbeanAspectV2.URN_COLUMN + ", " + EbeanAspectV2.ASPECT_COLUMN)
+ .setFirstRow(start)
+ .setMaxRows(pageSize)
+ .findPagedList();
+ }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
index ff30110214ae32..438d9e2d7f4542 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
@@ -3,6 +3,7 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.AuditStamp;
+import com.linkedin.common.Status;
import com.linkedin.common.urn.CorpuserUrn;
import com.linkedin.common.urn.Urn;
import com.linkedin.data.ByteString;
@@ -19,6 +20,7 @@
import com.linkedin.metadata.entity.ebean.EbeanAspectDao;
import com.linkedin.metadata.entity.ebean.EbeanAspectV2;
import com.linkedin.metadata.entity.ebean.EbeanEntityService;
+import com.linkedin.metadata.entity.ebean.EbeanRetentionService;
import com.linkedin.metadata.entity.ebean.EbeanUtils;
import com.linkedin.metadata.event.EntityEventProducer;
import com.linkedin.metadata.key.CorpUserKey;
@@ -37,6 +39,9 @@
import com.linkedin.mxe.MetadataAuditOperation;
import com.linkedin.mxe.MetadataChangeProposal;
import com.linkedin.mxe.SystemMetadata;
+import com.linkedin.retention.DataHubRetentionConfig;
+import com.linkedin.retention.Retention;
+import com.linkedin.retention.VersionBasedRetention;
import io.ebean.EbeanServer;
import io.ebean.EbeanServerFactory;
import io.ebean.config.ServerConfig;
@@ -56,6 +61,7 @@
import static org.mockito.Mockito.verifyNoMoreInteractions;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
@@ -71,6 +77,7 @@ public class EbeanEntityServiceTest {
private EbeanAspectDao _aspectDao;
private EbeanServer _server;
private EntityEventProducer _mockProducer;
+ private EbeanRetentionService _retentionService;
public EbeanEntityServiceTest() throws EntityRegistryException {
}
@@ -107,6 +114,8 @@ public void setupTest() {
_aspectDao = new EbeanAspectDao(_server);
_aspectDao.setConnectionValidated(true);
_entityService = new EbeanEntityService(_aspectDao, _mockProducer, _testEntityRegistry);
+ _retentionService = new EbeanRetentionService(_entityService, _server, 1000);
+ _entityService.setRetentionService(_retentionService);
}
@Test
@@ -650,6 +659,61 @@ public void testIngestListUrns() throws Exception {
assertEquals(entityUrn3.toString(), batch2.getEntities().get(0).toString());
}
+ @Test
+ public void testRetention() throws Exception {
+ Urn entityUrn = Urn.createFromString("urn:li:corpuser:test1");
+
+ SystemMetadata metadata1 = new SystemMetadata();
+ metadata1.setLastObserved(1625792689);
+ metadata1.setRunId("run-123");
+
+ String aspectName = PegasusUtils.getAspectNameFromSchema(new CorpUserInfo().schema());
+
+ // Ingest CorpUserInfo Aspect
+ CorpUserInfo writeAspect1 = createCorpUserInfo("email@test.com");
+ _entityService.ingestAspect(entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1);
+ CorpUserInfo writeAspect1a = createCorpUserInfo("email_a@test.com");
+ _entityService.ingestAspect(entityUrn, aspectName, writeAspect1a, TEST_AUDIT_STAMP, metadata1);
+ CorpUserInfo writeAspect1b = createCorpUserInfo("email_b@test.com");
+ _entityService.ingestAspect(entityUrn, aspectName, writeAspect1b, TEST_AUDIT_STAMP, metadata1);
+
+ String aspectName2 = PegasusUtils.getAspectNameFromSchema(new Status().schema());
+ // Ingest Status Aspect
+ Status writeAspect2 = new Status().setRemoved(true);
+ _entityService.ingestAspect(entityUrn, aspectName2, writeAspect2, TEST_AUDIT_STAMP, metadata1);
+ Status writeAspect2a = new Status().setRemoved(false);
+ _entityService.ingestAspect(entityUrn, aspectName2, writeAspect2a, TEST_AUDIT_STAMP, metadata1);
+ Status writeAspect2b = new Status().setRemoved(true);
+ _entityService.ingestAspect(entityUrn, aspectName2, writeAspect2b, TEST_AUDIT_STAMP, metadata1);
+
+ assertEquals(_entityService.getAspect(entityUrn, aspectName, 1), writeAspect1);
+ assertEquals(_entityService.getAspect(entityUrn, aspectName2, 1), writeAspect2);
+
+ _retentionService.setRetention(null, null, new DataHubRetentionConfig().setRetention(
+ new Retention().setVersion(new VersionBasedRetention().setMaxVersions(2))));
+ _retentionService.setRetention("corpuser", "status", new DataHubRetentionConfig().setRetention(
+ new Retention().setVersion(new VersionBasedRetention().setMaxVersions(4))));
+
+ // Ingest CorpUserInfo Aspect again
+ CorpUserInfo writeAspect1c = createCorpUserInfo("email_c@test.com");
+ _entityService.ingestAspect(entityUrn, aspectName, writeAspect1c, TEST_AUDIT_STAMP, metadata1);
+ // Ingest Status Aspect again
+ Status writeAspect2c = new Status().setRemoved(false);
+ _entityService.ingestAspect(entityUrn, aspectName2, writeAspect2c, TEST_AUDIT_STAMP, metadata1);
+
+ assertNull(_entityService.getAspect(entityUrn, aspectName, 1));
+ assertEquals(_entityService.getAspect(entityUrn, aspectName2, 1), writeAspect2);
+
+ // Reset retention policies
+ _retentionService.setRetention(null, null, new DataHubRetentionConfig().setRetention(
+ new Retention().setVersion(new VersionBasedRetention().setMaxVersions(1))));
+ _retentionService.deleteRetention("corpuser", "status");
+ // Invoke batch apply
+ _retentionService.batchApplyRetention(null, null);
+ assertEquals(_entityService.listLatestAspects(entityUrn.getEntityType(), aspectName, 0, 10).getTotalCount(), 1);
+ assertEquals(_entityService.listLatestAspects(entityUrn.getEntityType(), aspectName2, 0, 10).getTotalCount(), 1);
+ }
+
@Nonnull
private com.linkedin.entity.Entity createCorpUserEntity(Urn entityUrn, String email) throws Exception {
CorpuserUrn corpuserUrn = CorpuserUrn.createFromUrn(entityUrn);
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataAuditEventsProcessor.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataAuditEventsProcessor.java
index 20e9358b451b76..0ff4105704e931 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataAuditEventsProcessor.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataAuditEventsProcessor.java
@@ -1,5 +1,6 @@
package com.linkedin.metadata.kafka;
+import com.codahale.metrics.Timer;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.MetricRegistry;
import com.linkedin.common.urn.Urn;
@@ -92,7 +93,7 @@ public void consume(final ConsumerRecord consumerRecord)
final GenericRecord record = consumerRecord.value();
log.debug("Got MAE");
- try {
+ try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "maeProcess").time()) {
final MetadataAuditEvent event = EventUtils.avroToPegasusMAE(record);
final RecordTemplate snapshot = RecordUtils.getSelectedRecordTemplateFromUnion(event.getNewSnapshot());
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/DataHubRetentionAspect.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/DataHubRetentionAspect.pdl
new file mode 100644
index 00000000000000..bb470cee158e02
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/DataHubRetentionAspect.pdl
@@ -0,0 +1,12 @@
+namespace com.linkedin.metadata.aspect
+
+import com.linkedin.metadata.key.DataHubRetentionKey
+import com.linkedin.retention.DataHubRetentionConfig
+
+/**
+ * A union of all supported metadata aspects for a DataHub access policy.
+ */
+typeref DataHubRetentionAspect = union[
+ DataHubRetentionKey,
+ DataHubRetentionConfig
+]
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataHubRetentionKey.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataHubRetentionKey.pdl
new file mode 100644
index 00000000000000..39c5c97cd51f48
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataHubRetentionKey.pdl
@@ -0,0 +1,19 @@
+namespace com.linkedin.metadata.key
+
+/**
+ * Key for a DataHub Retention
+ */
+@Aspect = {
+ "name": "dataHubRetentionKey"
+}
+record DataHubRetentionKey {
+ /**
+ * Entity name to apply retention to. * (or empty) for applying defaults.
+ */
+ entityName: string
+
+ /**
+ * Aspect name to apply retention to. * (or empty) for applying defaults.
+ */
+ aspectName: string
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/DataHubRetentionSnapshot.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/DataHubRetentionSnapshot.pdl
new file mode 100644
index 00000000000000..313b0f79b3b5db
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/DataHubRetentionSnapshot.pdl
@@ -0,0 +1,24 @@
+namespace com.linkedin.metadata.snapshot
+
+import com.linkedin.common.Urn
+import com.linkedin.metadata.aspect.DataHubRetentionAspect
+
+/**
+ * A metadata snapshot for DataHub Access Policy data.
+ */
+@Entity = {
+ "name": "dataHubRetention",
+ "keyAspect": "dataHubRetentionKey"
+}
+record DataHubRetentionSnapshot {
+
+ /**
+ * URN for the entity the metadata snapshot is associated with.
+ */
+ urn: Urn
+
+ /**
+ * The list of metadata aspects associated with the DataHub access policy.
+ */
+ aspects: array[DataHubRetentionAspect]
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/Snapshot.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/Snapshot.pdl
index 44104970acf934..91993724afbadc 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/Snapshot.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/Snapshot.pdl
@@ -24,4 +24,5 @@ typeref Snapshot = union[
GlossaryNodeSnapshot,
DataHubPolicySnapshot,
SchemaFieldSnapshot,
+ DataHubRetentionSnapshot,
]
diff --git a/metadata-models/src/main/pegasus/com/linkedin/retention/DataHubRetentionConfig.pdl b/metadata-models/src/main/pegasus/com/linkedin/retention/DataHubRetentionConfig.pdl
new file mode 100644
index 00000000000000..3e1977760257ff
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/retention/DataHubRetentionConfig.pdl
@@ -0,0 +1,8 @@
+namespace com.linkedin.retention
+
+@Aspect = {
+ "name": "dataHubRetentionConfig"
+}
+record DataHubRetentionConfig {
+ retention: Retention
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/retention/Retention.pdl b/metadata-models/src/main/pegasus/com/linkedin/retention/Retention.pdl
new file mode 100644
index 00000000000000..61bd896490c0ee
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/retention/Retention.pdl
@@ -0,0 +1,10 @@
+namespace com.linkedin.retention
+
+/**
+ * Base class that encapsulates different retention policies.
+ * Only one of the fields should be set
+ */
+record Retention {
+ version: optional VersionBasedRetention
+ time: optional TimeBasedRetention
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/retention/TimeBasedRetention.pdl b/metadata-models/src/main/pegasus/com/linkedin/retention/TimeBasedRetention.pdl
new file mode 100644
index 00000000000000..9db4fc8fd42523
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/retention/TimeBasedRetention.pdl
@@ -0,0 +1,8 @@
+namespace com.linkedin.retention
+
+/**
+ * Keep records that are less than X seconds old
+ */
+record TimeBasedRetention {
+ maxAgeInSeconds: int
+}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/retention/VersionBasedRetention.pdl b/metadata-models/src/main/pegasus/com/linkedin/retention/VersionBasedRetention.pdl
new file mode 100644
index 00000000000000..1a51a2ca009dc0
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/retention/VersionBasedRetention.pdl
@@ -0,0 +1,8 @@
+namespace com.linkedin.retention
+
+/**
+ * Keep max N latest records
+ */
+record VersionBasedRetention {
+ maxVersions: int
+}
diff --git a/metadata-service/auth-impl/build.gradle b/metadata-service/auth-impl/build.gradle
index f6ed0033667fa6..f87b2cd2b2c892 100644
--- a/metadata-service/auth-impl/build.gradle
+++ b/metadata-service/auth-impl/build.gradle
@@ -7,7 +7,7 @@ dependencies {
implementation 'io.jsonwebtoken:jjwt-api:0.11.2'
runtimeOnly 'io.jsonwebtoken:jjwt-impl:0.11.2',
- 'io.jsonwebtoken:jjwt-jackson:0.11.2' // or 'io.jsonwebtoken:jjwt-gson:0.11.2' for gson
+ 'io.jsonwebtoken:jjwt-jackson:0.11.2'
compile externalDependency.lombok
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/TokenService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/TokenService.java
index 419c192175c0a3..4b844ed07048f6 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/TokenService.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/TokenService.java
@@ -7,6 +7,7 @@
import io.jsonwebtoken.JwtBuilder;
import io.jsonwebtoken.Jwts;
import io.jsonwebtoken.SignatureAlgorithm;
+import java.nio.charset.StandardCharsets;
import java.security.Key;
import java.util.ArrayList;
import java.util.Base64;
@@ -104,7 +105,7 @@ public String generateAccessToken(@Nonnull final String sub, @Nonnull final Map<
if (this.iss != null) {
builder.setIssuer(this.iss);
}
- byte[] apiKeySecretBytes = Base64.getDecoder().decode(this.signingKey); // Key must be base64'd.
+ byte [] apiKeySecretBytes = this.signingKey.getBytes(StandardCharsets.UTF_8);
final Key signingKey = new SecretKeySpec(apiKeySecretBytes, this.signingAlgorithm.getJcaName());
return builder.signWith(signingKey, this.signingAlgorithm).compact();
}
@@ -118,8 +119,10 @@ public String generateAccessToken(@Nonnull final String sub, @Nonnull final Map<
public TokenClaims validateAccessToken(@Nonnull final String accessToken) throws TokenException {
Objects.requireNonNull(accessToken);
try {
+ byte [] apiKeySecretBytes = this.signingKey.getBytes(StandardCharsets.UTF_8);
+ final String base64Key = Base64.getEncoder().encodeToString(apiKeySecretBytes);
final Claims claims = (Claims) Jwts.parserBuilder()
- .setSigningKey(this.signingKey)
+ .setSigningKey(base64Key)
.build()
.parse(accessToken)
.getBody();
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java
new file mode 100644
index 00000000000000..00ab3bb1c0baa9
--- /dev/null
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java
@@ -0,0 +1,42 @@
+package com.linkedin.gms.factory.entity;
+
+import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.entity.EntityService;
+import com.linkedin.metadata.entity.RetentionService;
+import com.linkedin.metadata.entity.ebean.EbeanRetentionService;
+import io.ebean.EbeanServer;
+import javax.annotation.Nonnull;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.DependsOn;
+import org.springframework.context.annotation.PropertySource;
+
+
+@Configuration
+@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
+public class RetentionServiceFactory {
+
+ @Autowired
+ @Qualifier("entityService")
+ private EntityService _entityService;
+
+ @Autowired
+ @Qualifier("ebeanServer")
+ private EbeanServer _server;
+
+ @Value("${RETENTION_APPLICATION_BATCH_SIZE:1000}")
+ private Integer _batchSize;
+
+
+ @Bean(name = "retentionService")
+ @DependsOn({"ebeanServer", "entityService"})
+ @Nonnull
+ protected RetentionService createInstance() {
+ RetentionService retentionService = new EbeanRetentionService(_entityService, _server, _batchSize);
+ _entityService.setRetentionService(retentionService);
+ return retentionService;
+ }
+}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java
index 1a60f6538e5273..150e1e48f39afb 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java
@@ -1,6 +1,6 @@
package com.linkedin.gms.factory.entityregistry;
-//import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
import com.linkedin.metadata.models.registry.PluginEntityRegistryLoader;
import java.io.FileNotFoundException;
import java.net.MalformedURLException;
@@ -8,14 +8,14 @@
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
-//import org.springframework.context.annotation.PropertySource;
+import org.springframework.context.annotation.PropertySource;
@Configuration
-//@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
+@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
public class PluginEntityRegistryFactory {
- @Value("${ENTITY_REGISTRY_PLUGIN_PATH:/etc/datahub/plugins/models}")
+ @Value("${datahub.plugin.entityRegistry.path}")
private String pluginRegistryPath;
@Bean(name = "pluginEntityRegistry")
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapManagerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java
similarity index 77%
rename from metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapManagerFactory.java
rename to metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java
index b2a60259c0c7eb..6996e9277c14d0 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapManagerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java
@@ -1,10 +1,12 @@
-package com.linkedin.metadata.boot;
+package com.linkedin.metadata.boot.factories;
import com.google.common.collect.ImmutableList;
import com.linkedin.gms.factory.entity.EntityServiceFactory;
+import com.linkedin.metadata.boot.BootstrapManager;
import com.linkedin.metadata.boot.steps.IngestDataPlatformInstancesStep;
import com.linkedin.metadata.boot.steps.IngestDataPlatformsStep;
import com.linkedin.metadata.boot.steps.IngestPoliciesStep;
+import com.linkedin.metadata.boot.steps.IngestRetentionPoliciesStep;
import com.linkedin.metadata.boot.steps.IngestRootUserStep;
import com.linkedin.metadata.entity.EntityService;
import io.ebean.EbeanServer;
@@ -29,6 +31,10 @@ public class BootstrapManagerFactory {
@Qualifier("ebeanServer")
private EbeanServer _server;
+ @Autowired
+ @Qualifier("ingestRetentionPoliciesStep")
+ private IngestRetentionPoliciesStep _ingestRetentionPoliciesStep;
+
@Bean(name = "bootstrapManager")
@Scope("singleton")
@Nonnull
@@ -38,11 +44,7 @@ protected BootstrapManager createInstance() {
final IngestDataPlatformsStep ingestDataPlatformsStep = new IngestDataPlatformsStep(_entityService);
final IngestDataPlatformInstancesStep ingestDataPlatformInstancesStep =
new IngestDataPlatformInstancesStep(_entityService, _server);
- return new BootstrapManager(
- ImmutableList.of(
- ingestRootUserStep,
- ingestPoliciesStep,
- ingestDataPlatformsStep,
- ingestDataPlatformInstancesStep));
+ return new BootstrapManager(ImmutableList.of(ingestRootUserStep, ingestPoliciesStep, ingestDataPlatformsStep,
+ ingestDataPlatformInstancesStep, _ingestRetentionPoliciesStep));
}
}
\ No newline at end of file
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java
new file mode 100644
index 00000000000000..a5525cfa2c94b0
--- /dev/null
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java
@@ -0,0 +1,39 @@
+package com.linkedin.metadata.boot.factories;
+
+import com.linkedin.gms.factory.entity.RetentionServiceFactory;
+import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.boot.steps.IngestRetentionPoliciesStep;
+import com.linkedin.metadata.entity.RetentionService;
+import javax.annotation.Nonnull;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Import;
+import org.springframework.context.annotation.PropertySource;
+import org.springframework.context.annotation.Scope;
+
+
+@Configuration
+@Import({RetentionServiceFactory.class})
+@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
+public class IngestRetentionPoliciesStepFactory {
+
+ @Autowired
+ @Qualifier("retentionService")
+ private RetentionService _retentionService;
+
+ @Value("${entityService.retention.enabled}")
+ private Boolean _enableRetention;
+
+ @Value("${datahub.plugin.retention.path}")
+ private String _pluginRegistryPath;
+
+ @Bean(name = "ingestRetentionPoliciesStep")
+ @Scope("singleton")
+ @Nonnull
+ protected IngestRetentionPoliciesStep createInstance() {
+ return new IngestRetentionPoliciesStep(_retentionService, _enableRetention, _pluginRegistryPath);
+ }
+}
\ No newline at end of file
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStep.java
new file mode 100644
index 00000000000000..088e955203ecdd
--- /dev/null
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStep.java
@@ -0,0 +1,151 @@
+package com.linkedin.metadata.boot.steps;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
+import com.linkedin.metadata.boot.BootstrapStep;
+import com.linkedin.metadata.dao.utils.RecordUtils;
+import com.linkedin.metadata.entity.RetentionService;
+import com.linkedin.metadata.key.DataHubRetentionKey;
+import com.linkedin.retention.DataHubRetentionConfig;
+import java.io.File;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import javax.annotation.Nonnull;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.core.io.ClassPathResource;
+
+
+@Slf4j
+@RequiredArgsConstructor
+public class IngestRetentionPoliciesStep implements BootstrapStep {
+
+ private final RetentionService _retentionService;
+ private final boolean _enableRetention;
+ private final String pluginPath;
+
+ private static final ObjectMapper YAML_MAPPER = new ObjectMapper(new YAMLFactory());
+
+ @Nonnull
+ @Override
+ public ExecutionMode getExecutionMode() {
+ return ExecutionMode.ASYNC;
+ }
+
+ @Override
+ public String name() {
+ return "IngestRetentionPoliciesStep";
+ }
+
+ @Override
+ public void execute() throws IOException, URISyntaxException {
+ // 0. Execute preflight check to see whether we need to ingest policies
+ log.info("Ingesting default retention...");
+
+ // If retention is disabled, skip step
+ if (!_enableRetention) {
+ log.info("IngestRetentionPolicies disabled. Skipping.");
+ return;
+ }
+
+ // 1. Read default retention config
+ final Map retentionPolicyMap =
+ parseFileOrDir(new ClassPathResource("./boot/retention.yaml").getFile());
+
+ // 2. Read plugin retention config files from input path and overlay
+ retentionPolicyMap.putAll(parseFileOrDir(new File(pluginPath)));
+
+ // 4. Set the specified retention policies
+ log.info("Setting {} policies", retentionPolicyMap.size());
+ boolean hasUpdate = false;
+ for (DataHubRetentionKey key : retentionPolicyMap.keySet()) {
+ if (_retentionService.setRetention(key.getEntityName(), key.getAspectName(), retentionPolicyMap.get(key))) {
+ hasUpdate = true;
+ }
+ }
+
+ // 5. If there were updates on any of the retention policies, apply retention to all records
+ if (hasUpdate) {
+ log.info("Applying policies to all records");
+ _retentionService.batchApplyRetention(null, null);
+ }
+ }
+
+ // Parse input yaml file or yaml files in the input directory to generate a retention policy map
+ private Map parseFileOrDir(File retentionFileOrDir) throws IOException {
+ // If path does not exist return empty
+ if (!retentionFileOrDir.exists()) {
+ return Collections.emptyMap();
+ }
+
+ // If directory, parse the yaml files under the directory
+ if (retentionFileOrDir.isDirectory()) {
+ Map result = new HashMap<>();
+
+ for (File retentionFile : retentionFileOrDir.listFiles()) {
+ if (!retentionFile.isFile()) {
+ log.info("Element {} in plugin directory {} is not a file. Skipping", retentionFile.getPath(),
+ retentionFileOrDir.getPath());
+ continue;
+ }
+ result.putAll(parseFileOrDir(retentionFile));
+ }
+ return result;
+ }
+ // If file, parse the yaml file and return result;
+ if (!retentionFileOrDir.getPath().endsWith(".yaml") && retentionFileOrDir.getPath().endsWith(".yml")) {
+ log.info("File {} is not a YAML file. Skipping", retentionFileOrDir.getPath());
+ return Collections.emptyMap();
+ }
+ return parseYamlRetentionConfig(retentionFileOrDir);
+ }
+
+ /**
+ * Parse yaml retention config
+ *
+ * The structure of yaml must be a list of retention policies where each element specifies the entity, aspect
+ * to apply the policy to and the policy definition. The policy definition is converted into the
+ * {@link com.linkedin.retention.DataHubRetentionConfig} class.
+ */
+ private Map parseYamlRetentionConfig(File retentionConfigFile)
+ throws IOException {
+ final JsonNode retentionPolicies = YAML_MAPPER.readTree(retentionConfigFile);
+ if (!retentionPolicies.isArray()) {
+ throw new IllegalArgumentException("Retention config file must contain an array of retention policies");
+ }
+
+ Map retentionPolicyMap = new HashMap<>();
+
+ for (JsonNode retentionPolicy : retentionPolicies) {
+ DataHubRetentionKey key = new DataHubRetentionKey();
+ if (retentionPolicy.has("entity")) {
+ key.setEntityName(retentionPolicy.get("entity").asText());
+ } else {
+ throw new IllegalArgumentException(
+ "Each element in the retention config must contain field entity. Set to * for setting defaults");
+ }
+
+ if (retentionPolicy.has("aspect")) {
+ key.setAspectName(retentionPolicy.get("aspect").asText());
+ } else {
+ throw new IllegalArgumentException(
+ "Each element in the retention config must contain field aspect. Set to * for setting defaults");
+ }
+
+ DataHubRetentionConfig retentionInfo;
+ if (retentionPolicy.has("config")) {
+ retentionInfo =
+ RecordUtils.toRecordTemplate(DataHubRetentionConfig.class, retentionPolicy.get("config").toString());
+ } else {
+ throw new IllegalArgumentException("Each element in the retention config must contain field config");
+ }
+
+ retentionPolicyMap.put(key, retentionInfo);
+ }
+ return retentionPolicyMap;
+ }
+}
diff --git a/metadata-service/factories/src/main/resources/application.yml b/metadata-service/factories/src/main/resources/application.yml
index 71c80f1a846762..62faf6ad2bc1c1 100644
--- a/metadata-service/factories/src/main/resources/application.yml
+++ b/metadata-service/factories/src/main/resources/application.yml
@@ -32,6 +32,16 @@ datahub:
sslContext:
protocol: ${DATAHUB_GMS_SSL_PROTOCOL:${GMS_SSL_PROTOCOL:#{null}}}
+ plugin:
+ entityRegistry:
+ path: ${ENTITY_REGISTRY_PLUGIN_PATH:/etc/datahub/plugins/models}
+ retention:
+ path: ${RETENTION_PLUGIN_PATH:/etc/datahub/plugins/retention}
+
+entityService:
+ retention:
+ enabled: ${ENTITY_SERVICE_ENABLE_RETENTION:false}
+
graphService:
type: ${GRAPH_SERVICE_IMPL:elasticsearch}
@@ -42,9 +52,6 @@ configEntityRegistry:
# TODO: Change to read from resources on classpath.
path: ${ENTITY_REGISTRY_CONFIG_PATH:../../metadata-models/src/main/resources/entity-registry.yml}
-pluginEntityRegistry:
- path: ${ENTITY_REGISTRY_PLUGIN_PATH:$HOME/.datahub/plugins/models}
-
authorizationManager:
enabled: ${AUTH_POLICIES_ENABLED:true}
cacheRefreshIntervalSecs: ${POLICY_CACHE_REFRESH_INTERVAL_SECONDS:120}
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
index b89becccc8d317..5cdb05c3f89f4b 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
@@ -708,7 +708,7 @@
"Searchable" : {
"addToFilters" : true,
"fieldName" : "tags",
- "fieldType" : "URN_PARTIAL",
+ "fieldType" : "URN",
"filterNameOverride" : "Tag",
"hasValuesFieldName" : "hasTags"
}
@@ -785,7 +785,7 @@
"Searchable" : {
"addToFilters" : true,
"fieldName" : "glossaryTerms",
- "fieldType" : "URN_PARTIAL",
+ "fieldType" : "URN",
"filterNameOverride" : "Glossary Term"
}
} ]
@@ -2027,20 +2027,21 @@
"items" : {
"type" : "record",
"name" : "SchemaField",
- "doc" : "SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema",
+ "doc" : "SchemaField to describe metadata related to dataset schema.",
"fields" : [ {
"name" : "fieldPath",
"type" : "com.linkedin.dataset.SchemaFieldPath",
- "doc" : "Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above.",
+ "doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
"fieldName" : "fieldPaths",
- "fieldType" : "TEXT_PARTIAL"
+ "fieldType" : "TEXT"
}
}, {
"name" : "jsonPath",
"type" : "string",
"doc" : "Flattened name of a field in JSON Path notation.",
- "optional" : true
+ "optional" : true,
+ "Deprecated" : true
}, {
"name" : "nullable",
"type" : "boolean",
@@ -2178,7 +2179,7 @@
"/tags/*/tag" : {
"boostScore" : 0.5,
"fieldName" : "fieldTags",
- "fieldType" : "URN_PARTIAL"
+ "fieldType" : "URN"
}
}
}, {
@@ -2190,7 +2191,7 @@
"/terms/*/urn" : {
"boostScore" : 0.5,
"fieldName" : "fieldGlossaryTerms",
- "fieldType" : "URN_PARTIAL"
+ "fieldType" : "URN"
}
}
}, {
@@ -2348,7 +2349,7 @@
"/tags/*/tag" : {
"boostScore" : 0.5,
"fieldName" : "editedFieldTags",
- "fieldType" : "URN_PARTIAL"
+ "fieldType" : "URN"
}
}
}, {
@@ -2360,7 +2361,7 @@
"/terms/*/urn" : {
"boostScore" : 0.5,
"fieldName" : "editedFieldGlossaryTerms",
- "fieldType" : "URN_PARTIAL"
+ "fieldType" : "URN"
}
}
} ]
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
index de46ad6ed6f097..e9852ba46ede08 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
@@ -776,7 +776,7 @@
"Searchable" : {
"addToFilters" : true,
"fieldName" : "tags",
- "fieldType" : "URN_PARTIAL",
+ "fieldType" : "URN",
"filterNameOverride" : "Tag",
"hasValuesFieldName" : "hasTags"
}
@@ -853,7 +853,7 @@
"Searchable" : {
"addToFilters" : true,
"fieldName" : "glossaryTerms",
- "fieldType" : "URN_PARTIAL",
+ "fieldType" : "URN",
"filterNameOverride" : "Glossary Term"
}
} ]
@@ -2505,20 +2505,21 @@
"items" : {
"type" : "record",
"name" : "SchemaField",
- "doc" : "SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema",
+ "doc" : "SchemaField to describe metadata related to dataset schema.",
"fields" : [ {
"name" : "fieldPath",
"type" : "com.linkedin.dataset.SchemaFieldPath",
- "doc" : "Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above.",
+ "doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
"fieldName" : "fieldPaths",
- "fieldType" : "TEXT_PARTIAL"
+ "fieldType" : "TEXT"
}
}, {
"name" : "jsonPath",
"type" : "string",
"doc" : "Flattened name of a field in JSON Path notation.",
- "optional" : true
+ "optional" : true,
+ "Deprecated" : true
}, {
"name" : "nullable",
"type" : "boolean",
@@ -2656,7 +2657,7 @@
"/tags/*/tag" : {
"boostScore" : 0.5,
"fieldName" : "fieldTags",
- "fieldType" : "URN_PARTIAL"
+ "fieldType" : "URN"
}
}
}, {
@@ -2668,7 +2669,7 @@
"/terms/*/urn" : {
"boostScore" : 0.5,
"fieldName" : "fieldGlossaryTerms",
- "fieldType" : "URN_PARTIAL"
+ "fieldType" : "URN"
}
}
}, {
@@ -2826,7 +2827,7 @@
"/tags/*/tag" : {
"boostScore" : 0.5,
"fieldName" : "editedFieldTags",
- "fieldType" : "URN_PARTIAL"
+ "fieldType" : "URN"
}
}
}, {
@@ -2838,7 +2839,7 @@
"/terms/*/urn" : {
"boostScore" : 0.5,
"fieldName" : "editedFieldGlossaryTerms",
- "fieldType" : "URN_PARTIAL"
+ "fieldType" : "URN"
}
}
} ]
@@ -4463,10 +4464,93 @@
"keyAspect" : "schemaFieldKey",
"name" : "schemaField"
}
+ }, {
+ "type" : "record",
+ "name" : "DataHubRetentionSnapshot",
+ "doc" : "A metadata snapshot for DataHub Access Policy data.",
+ "fields" : [ {
+ "name" : "urn",
+ "type" : "com.linkedin.common.Urn",
+ "doc" : "URN for the entity the metadata snapshot is associated with."
+ }, {
+ "name" : "aspects",
+ "type" : {
+ "type" : "array",
+ "items" : {
+ "type" : "typeref",
+ "name" : "DataHubRetentionAspect",
+ "namespace" : "com.linkedin.metadata.aspect",
+ "doc" : "A union of all supported metadata aspects for a DataHub access policy.",
+ "ref" : [ {
+ "type" : "record",
+ "name" : "DataHubRetentionKey",
+ "namespace" : "com.linkedin.metadata.key",
+ "doc" : "Key for a DataHub Retention",
+ "fields" : [ {
+ "name" : "entityName",
+ "type" : "string",
+ "doc" : "Entity name to apply retention to. * (or empty) for applying defaults."
+ }, {
+ "name" : "aspectName",
+ "type" : "string",
+ "doc" : "Aspect name to apply retention to. * (or empty) for applying defaults."
+ } ],
+ "Aspect" : {
+ "name" : "dataHubRetentionKey"
+ }
+ }, {
+ "type" : "record",
+ "name" : "DataHubRetentionConfig",
+ "namespace" : "com.linkedin.retention",
+ "fields" : [ {
+ "name" : "retention",
+ "type" : {
+ "type" : "record",
+ "name" : "Retention",
+ "doc" : "Base class that encapsulates different retention policies.\nOnly one of the fields should be set",
+ "fields" : [ {
+ "name" : "version",
+ "type" : {
+ "type" : "record",
+ "name" : "VersionBasedRetention",
+ "doc" : "Keep max N latest records",
+ "fields" : [ {
+ "name" : "maxVersions",
+ "type" : "int"
+ } ]
+ },
+ "optional" : true
+ }, {
+ "name" : "time",
+ "type" : {
+ "type" : "record",
+ "name" : "TimeBasedRetention",
+ "doc" : "Keep records that are less than X seconds old",
+ "fields" : [ {
+ "name" : "maxAgeInSeconds",
+ "type" : "int"
+ } ]
+ },
+ "optional" : true
+ } ]
+ }
+ } ],
+ "Aspect" : {
+ "name" : "dataHubRetentionConfig"
+ }
+ } ]
+ }
+ },
+ "doc" : "The list of metadata aspects associated with the DataHub access policy."
+ } ],
+ "Entity" : {
+ "keyAspect" : "dataHubRetentionKey",
+ "name" : "dataHubRetention"
+ }
} ]
}
} ]
- }, "com.linkedin.glossary.GlossaryNodeInfo", "com.linkedin.glossary.GlossaryRelatedTerms", "com.linkedin.glossary.GlossaryTermInfo", "com.linkedin.identity.CorpGroupInfo", "com.linkedin.identity.CorpUserEditableInfo", "com.linkedin.identity.CorpUserInfo", "com.linkedin.identity.CorpUserStatus", "com.linkedin.identity.GroupMembership", "com.linkedin.metadata.aspect.ChartAspect", "com.linkedin.metadata.aspect.CorpGroupAspect", "com.linkedin.metadata.aspect.CorpUserAspect", "com.linkedin.metadata.aspect.DashboardAspect", "com.linkedin.metadata.aspect.DataFlowAspect", "com.linkedin.metadata.aspect.DataHubPolicyAspect", "com.linkedin.metadata.aspect.DataJobAspect", "com.linkedin.metadata.aspect.DataPlatformAspect", "com.linkedin.metadata.aspect.DataProcessAspect", "com.linkedin.metadata.aspect.DatasetAspect", "com.linkedin.metadata.aspect.GlossaryNodeAspect", "com.linkedin.metadata.aspect.GlossaryTermAspect", "com.linkedin.metadata.aspect.MLFeatureAspect", "com.linkedin.metadata.aspect.MLFeatureTableAspect", "com.linkedin.metadata.aspect.MLModelAspect", "com.linkedin.metadata.aspect.MLModelDeploymentAspect", "com.linkedin.metadata.aspect.MLModelGroupAspect", "com.linkedin.metadata.aspect.MLPrimaryKeyAspect", "com.linkedin.metadata.aspect.SchemaFieldAspect", "com.linkedin.metadata.aspect.TagAspect", {
+ }, "com.linkedin.glossary.GlossaryNodeInfo", "com.linkedin.glossary.GlossaryRelatedTerms", "com.linkedin.glossary.GlossaryTermInfo", "com.linkedin.identity.CorpGroupInfo", "com.linkedin.identity.CorpUserEditableInfo", "com.linkedin.identity.CorpUserInfo", "com.linkedin.identity.CorpUserStatus", "com.linkedin.identity.GroupMembership", "com.linkedin.metadata.aspect.ChartAspect", "com.linkedin.metadata.aspect.CorpGroupAspect", "com.linkedin.metadata.aspect.CorpUserAspect", "com.linkedin.metadata.aspect.DashboardAspect", "com.linkedin.metadata.aspect.DataFlowAspect", "com.linkedin.metadata.aspect.DataHubPolicyAspect", "com.linkedin.metadata.aspect.DataHubRetentionAspect", "com.linkedin.metadata.aspect.DataJobAspect", "com.linkedin.metadata.aspect.DataPlatformAspect", "com.linkedin.metadata.aspect.DataProcessAspect", "com.linkedin.metadata.aspect.DatasetAspect", "com.linkedin.metadata.aspect.GlossaryNodeAspect", "com.linkedin.metadata.aspect.GlossaryTermAspect", "com.linkedin.metadata.aspect.MLFeatureAspect", "com.linkedin.metadata.aspect.MLFeatureTableAspect", "com.linkedin.metadata.aspect.MLModelAspect", "com.linkedin.metadata.aspect.MLModelDeploymentAspect", "com.linkedin.metadata.aspect.MLModelGroupAspect", "com.linkedin.metadata.aspect.MLPrimaryKeyAspect", "com.linkedin.metadata.aspect.SchemaFieldAspect", "com.linkedin.metadata.aspect.TagAspect", {
"type" : "record",
"name" : "BrowseResult",
"namespace" : "com.linkedin.metadata.browse",
@@ -4550,7 +4634,7 @@
"type" : "int",
"doc" : "The total number of elements (entities + groups) directly under queried path"
} ]
- }, "com.linkedin.metadata.browse.BrowseResultEntity", "com.linkedin.metadata.browse.BrowseResultGroup", "com.linkedin.metadata.browse.BrowseResultMetadata", "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataHubPolicyKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.DataPlatformKey", "com.linkedin.metadata.key.DataProcessKey", "com.linkedin.metadata.key.DatasetKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLFeatureTableKey", "com.linkedin.metadata.key.MLModelDeploymentKey", "com.linkedin.metadata.key.MLModelGroupKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.MLPrimaryKeyKey", "com.linkedin.metadata.key.SchemaFieldKey", "com.linkedin.metadata.key.TagKey", {
+ }, "com.linkedin.metadata.browse.BrowseResultEntity", "com.linkedin.metadata.browse.BrowseResultGroup", "com.linkedin.metadata.browse.BrowseResultMetadata", "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataHubPolicyKey", "com.linkedin.metadata.key.DataHubRetentionKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.DataPlatformKey", "com.linkedin.metadata.key.DataProcessKey", "com.linkedin.metadata.key.DatasetKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLFeatureTableKey", "com.linkedin.metadata.key.MLModelDeploymentKey", "com.linkedin.metadata.key.MLModelGroupKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.MLPrimaryKeyKey", "com.linkedin.metadata.key.SchemaFieldKey", "com.linkedin.metadata.key.TagKey", {
"type" : "record",
"name" : "AutoCompleteResult",
"namespace" : "com.linkedin.metadata.query",
@@ -4889,7 +4973,7 @@
"type" : "int",
"doc" : "The total number of entities directly under searched path"
} ]
- }, "com.linkedin.metadata.search.SearchResultMetadata", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", {
+ }, "com.linkedin.metadata.search.SearchResultMetadata", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", {
"type" : "record",
"name" : "SystemMetadata",
"namespace" : "com.linkedin.mxe",
@@ -4925,7 +5009,7 @@
"doc" : "Additional properties",
"optional" : true
} ]
- }, "com.linkedin.policy.DataHubActorFilter", "com.linkedin.policy.DataHubPolicyInfo", "com.linkedin.policy.DataHubResourceFilter", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ],
+ }, "com.linkedin.policy.DataHubActorFilter", "com.linkedin.policy.DataHubPolicyInfo", "com.linkedin.policy.DataHubResourceFilter", "com.linkedin.retention.DataHubRetentionConfig", "com.linkedin.retention.Retention", "com.linkedin.retention.TimeBasedRetention", "com.linkedin.retention.VersionBasedRetention", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ],
"schema" : {
"name" : "entities",
"namespace" : "com.linkedin.entity",
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
index 79480a199a7c6d..4dbbeaec6bf5dc 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
@@ -556,7 +556,7 @@
"Searchable" : {
"addToFilters" : true,
"fieldName" : "tags",
- "fieldType" : "URN_PARTIAL",
+ "fieldType" : "URN",
"filterNameOverride" : "Tag",
"hasValuesFieldName" : "hasTags"
}
@@ -633,7 +633,7 @@
"Searchable" : {
"addToFilters" : true,
"fieldName" : "glossaryTerms",
- "fieldType" : "URN_PARTIAL",
+ "fieldType" : "URN",
"filterNameOverride" : "Glossary Term"
}
} ]
@@ -1862,20 +1862,21 @@
"items" : {
"type" : "record",
"name" : "SchemaField",
- "doc" : "SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema",
+ "doc" : "SchemaField to describe metadata related to dataset schema.",
"fields" : [ {
"name" : "fieldPath",
"type" : "com.linkedin.dataset.SchemaFieldPath",
- "doc" : "Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above.",
+ "doc" : "Flattened name of the field. Field is computed from jsonPath field.",
"Searchable" : {
"fieldName" : "fieldPaths",
- "fieldType" : "TEXT_PARTIAL"
+ "fieldType" : "TEXT"
}
}, {
"name" : "jsonPath",
"type" : "string",
"doc" : "Flattened name of a field in JSON Path notation.",
- "optional" : true
+ "optional" : true,
+ "Deprecated" : true
}, {
"name" : "nullable",
"type" : "boolean",
@@ -2013,7 +2014,7 @@
"/tags/*/tag" : {
"boostScore" : 0.5,
"fieldName" : "fieldTags",
- "fieldType" : "URN_PARTIAL"
+ "fieldType" : "URN"
}
}
}, {
@@ -2025,7 +2026,7 @@
"/terms/*/urn" : {
"boostScore" : 0.5,
"fieldName" : "fieldGlossaryTerms",
- "fieldType" : "URN_PARTIAL"
+ "fieldType" : "URN"
}
}
}, {
@@ -2183,7 +2184,7 @@
"/tags/*/tag" : {
"boostScore" : 0.5,
"fieldName" : "editedFieldTags",
- "fieldType" : "URN_PARTIAL"
+ "fieldType" : "URN"
}
}
}, {
@@ -2195,7 +2196,7 @@
"/terms/*/urn" : {
"boostScore" : 0.5,
"fieldName" : "editedFieldGlossaryTerms",
- "fieldType" : "URN_PARTIAL"
+ "fieldType" : "URN"
}
}
} ]
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java
index e294fe4ca3692f..2fa428aa37046e 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java
@@ -355,7 +355,7 @@ public String ingestProposal(@Nonnull MetadataChangeProposal metadataChangePropo
final List additionalChanges =
AspectUtils.getAdditionalChanges(metadataChangeProposal, _entityService);
- Urn urn = _entityService.ingestProposal(metadataChangeProposal, auditStamp);
+ Urn urn = _entityService.ingestProposal(metadataChangeProposal, auditStamp).getUrn();
additionalChanges.forEach(proposal -> _entityService.ingestProposal(proposal, auditStamp));
return urn.toString();
}
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
index e61a2b818855d5..eb6c0428b5b8c1 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
@@ -132,7 +132,7 @@ public Task ingestProposal(
return RestliUtil.toTask(() -> {
log.debug("Proposal: {}", metadataChangeProposal);
try {
- Urn urn = _entityService.ingestProposal(metadataChangeProposal, auditStamp);
+ Urn urn = _entityService.ingestProposal(metadataChangeProposal, auditStamp).getUrn();
additionalChanges.forEach(proposal -> _entityService.ingestProposal(proposal, auditStamp));
return urn.toString();
} catch (ValidationException e) {
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java
index 10a5df70c458bb..ccbadb931482ed 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java
@@ -58,9 +58,22 @@
import lombok.extern.slf4j.Slf4j;
import org.apache.maven.artifact.versioning.ComparableVersion;
-import static com.linkedin.metadata.entity.ValidationUtils.*;
-import static com.linkedin.metadata.restli.RestliConstants.*;
-import static com.linkedin.metadata.utils.PegasusUtils.*;
+import static com.linkedin.metadata.entity.ValidationUtils.validateOrThrow;
+import static com.linkedin.metadata.restli.RestliConstants.ACTION_AUTOCOMPLETE;
+import static com.linkedin.metadata.restli.RestliConstants.ACTION_BROWSE;
+import static com.linkedin.metadata.restli.RestliConstants.ACTION_GET_BROWSE_PATHS;
+import static com.linkedin.metadata.restli.RestliConstants.ACTION_INGEST;
+import static com.linkedin.metadata.restli.RestliConstants.PARAM_ASPECTS;
+import static com.linkedin.metadata.restli.RestliConstants.PARAM_FIELD;
+import static com.linkedin.metadata.restli.RestliConstants.PARAM_FILTER;
+import static com.linkedin.metadata.restli.RestliConstants.PARAM_INPUT;
+import static com.linkedin.metadata.restli.RestliConstants.PARAM_LIMIT;
+import static com.linkedin.metadata.restli.RestliConstants.PARAM_PATH;
+import static com.linkedin.metadata.restli.RestliConstants.PARAM_QUERY;
+import static com.linkedin.metadata.restli.RestliConstants.PARAM_SORT;
+import static com.linkedin.metadata.restli.RestliConstants.PARAM_START;
+import static com.linkedin.metadata.restli.RestliConstants.PARAM_URN;
+import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName;
/**
diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java
index 5717d24c9b6f67..86da4b37dda156 100644
--- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java
+++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java
@@ -24,6 +24,7 @@
public class Config extends HttpServlet {
Map config = new HashMap() {{
put("noCode", "true");
+ put("retention", "true");
}};
ObjectMapper objectMapper = new ObjectMapper().setSerializationInclusion(JsonInclude.Include.NON_NULL);
@@ -51,8 +52,7 @@ protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws IO
PrintWriter out = resp.getWriter();
try {
- Map config = new HashMap<>();
- config.put("noCode", "true");
+ Map config = new HashMap<>(this.config);
Map> pluginTree =
getPluginModels(req.getServletContext());
config.put("models", pluginTree);
diff --git a/metadata-service/war/src/main/resources/boot/retention.yaml b/metadata-service/war/src/main/resources/boot/retention.yaml
new file mode 100644
index 00000000000000..e564a9124aa09e
--- /dev/null
+++ b/metadata-service/war/src/main/resources/boot/retention.yaml
@@ -0,0 +1,14 @@
+- entity: "*"
+ aspect: "*"
+ config:
+ retention:
+ version:
+ maxVersions: 20
+#- entity: dataset
+# aspect: datasetProperties
+# config:
+# retention:
+# version:
+# maxVersions: 10
+# time:
+# maxAgeInSeconds: 2592000 # 30 days
\ No newline at end of file
diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/Constants.java b/metadata-utils/src/main/java/com/linkedin/metadata/Constants.java
index f8f4ffe7891b22..07b8e9dfb23b61 100644
--- a/metadata-utils/src/main/java/com/linkedin/metadata/Constants.java
+++ b/metadata-utils/src/main/java/com/linkedin/metadata/Constants.java
@@ -29,11 +29,11 @@ public class Constants {
public static final String CORP_USER_STATUS_ASPECT_NAME = "corpUserStatus";
public static final String CORP_USER_KEY_ASPECT_NAME = "corpUserKey";
-
/**
* User Status
*/
public static final String CORP_USER_STATUS_ACTIVE = "ACTIVE";
- private Constants() { }
+ private Constants() {
+ }
}
diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityKeyUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityKeyUtils.java
index b6ddcb82cd707b..14b2bfd7056232 100644
--- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityKeyUtils.java
+++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityKeyUtils.java
@@ -139,7 +139,7 @@ public static Urn convertEntityKeyToUrn(@Nonnull final RecordTemplate keyAspect,
final List urnParts = new ArrayList<>();
for (RecordDataSchema.Field field : keyAspect.schema().getFields()) {
Object value = keyAspect.data().get(field.getName());
- String valueString = value.toString();
+ String valueString = value == null ? "" : value.toString();
urnParts.add(valueString); // TODO: Determine whether all fields, including urns, should be URL encoded.
}
return Urn.createFromTuple(entityName, urnParts);
diff --git a/perf-test/locustfiles/ingest.py b/perf-test/locustfiles/ingest.py
index 0f7535a68476c7..0aa15a3b78b198 100644
--- a/perf-test/locustfiles/ingest.py
+++ b/perf-test/locustfiles/ingest.py
@@ -10,11 +10,15 @@
)
from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProperties
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
-from locust import HttpUser, between, task
+from locust import HttpUser, constant, task
class IngestUser(HttpUser):
- wait_time = between(1, 5)
+ wait_time = constant(1)
+
+ @task
+ def config(self):
+ self.client.get("/config")
@task
def ingest(self):
diff --git a/settings.gradle b/settings.gradle
index 36d3b3d8c499b4..f654dd1275ebbe 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -39,3 +39,4 @@ include 'metadata-perf'
include 'docs-website'
include 'metadata-models-custom'
include 'entity-registry:custom-test-model'
+include 'spark-lineage'
diff --git a/smoke-test/smoke.sh b/smoke-test/smoke.sh
index 4169c1709eae56..d6c69f8b6ca134 100755
--- a/smoke-test/smoke.sh
+++ b/smoke-test/smoke.sh
@@ -23,4 +23,6 @@ datahub docker quickstart \
--quickstart-compose-file ../docker/docker-compose.dev.yml \
--dump-logs-on-failure
+(cd tests/cypress ; yarn install)
+
pytest -vv --continue-on-collection-errors --junit-xml=junit.smoke.xml
diff --git a/metadata-ingestion/src/datahub/integrations/__init__.py b/smoke-test/tests/cypress/__init__.py
similarity index 100%
rename from metadata-ingestion/src/datahub/integrations/__init__.py
rename to smoke-test/tests/cypress/__init__.py
diff --git a/smoke-test/tests/cypress/cypress.json b/smoke-test/tests/cypress/cypress.json
new file mode 100644
index 00000000000000..33003d2939f4dd
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress.json
@@ -0,0 +1,3 @@
+{
+ "baseUrl": "http://localhost:9002"
+}
diff --git a/smoke-test/tests/cypress/cypress/fixtures/example.json b/smoke-test/tests/cypress/cypress/fixtures/example.json
new file mode 100644
index 00000000000000..02e4254378e978
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/fixtures/example.json
@@ -0,0 +1,5 @@
+{
+ "name": "Using fixtures to represent data",
+ "email": "hello@cypress.io",
+ "body": "Fixtures are a great way to mock data for responses to routes"
+}
diff --git a/smoke-test/tests/cypress/cypress/integration/login/login.js b/smoke-test/tests/cypress/cypress/integration/login/login.js
new file mode 100644
index 00000000000000..d5799c52c9ab58
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/integration/login/login.js
@@ -0,0 +1,9 @@
+describe('login', () => {
+ it('logs in', () => {
+ cy.visit('/');
+ cy.get('input[placeholder=Username]').type('datahub');
+ cy.get('input[placeholder=Password]').type('datahub');
+ cy.contains('Log in').click();
+ cy.contains('Welcome back, datahub');
+ });
+})
diff --git a/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js b/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js
new file mode 100644
index 00000000000000..4a5b4eb8924c4a
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js
@@ -0,0 +1,40 @@
+describe('mutations', () => {
+ it('can create and add a tag to dataset and visit new tag page', () => {
+ cy.deleteUrn('urn:li:tag:CypressTestAddTag')
+ cy.login();
+ cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)');
+ cy.contains('cypress_logging_events');
+
+ cy.contains('Add Tag').click();
+
+ cy.focused().type('CypressTestAddTag');
+
+ cy.contains('Create CypressTestAddTag').click();
+
+ cy.get('textarea').type('CypressTestAddTag Test Description');
+
+ cy.contains(/Create$/).click();
+
+ // go to tag page
+ cy.get('a[href="/tag/urn:li:tag:CypressTestAddTag"]').click();
+
+ // title of tag page
+ cy.contains('CypressTestAddTag');
+
+ // description of tag page
+ cy.contains('CypressTestAddTag Test Description');
+
+ // used by panel - click to search
+ cy.contains('1 Datasets').click();
+
+ // verify dataset shows up in search now
+ cy.contains('of 1 result').click();
+ cy.contains('cypress_logging_events').click();
+ cy.get('a[href="/tag/urn:li:tag:CypressTestAddTag"]').within(() => cy.get('span[aria-label=close]').click());
+ cy.contains('Yes').click();
+
+ cy.get('a[href="/tag/urn:li:tag:CypressTestAddTag"]').should('not.exist');
+
+ cy.deleteUrn('urn:li:tag:CypressTestAddTag')
+ });
+})
diff --git a/smoke-test/tests/cypress/cypress/integration/search/search.js b/smoke-test/tests/cypress/cypress/integration/search/search.js
new file mode 100644
index 00000000000000..a4d0a6238da246
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/integration/search/search.js
@@ -0,0 +1,44 @@
+describe('search', () => {
+ it('can hit all entities search, see some results (testing this any more is tricky because it is cached for now)', () => {
+ cy.login();
+ cy.visit('/');
+ cy.get('input[data-testid=search-input]').type('*{enter}');
+ cy.contains('of 0 results').should('not.exist');
+ cy.contains(/of [0-9]+ results/);
+ });
+
+ it('can hit all entities search with an impossible query and find 0 results', () => {
+ cy.login();
+ cy.visit('/');
+ // random string that is unlikely to accidentally have a match
+ cy.get('input[data-testid=search-input]').type('zzzzzzzzzzzzzqqqqqqqqqqqqqzzzzzzqzqzqzqzq{enter}');
+ cy.contains('of 0 results');
+ });
+
+ it('can search, find a result, and visit the dataset page', () => {
+ cy.login();
+ cy.visit('http://localhost:9002/search?filter_entity=DATASET&filter_tags=urn%3Ali%3Atag%3ACypress&page=1&query=users_created')
+ cy.contains('of 1 result');
+
+ cy.contains('Cypress')
+
+ cy.contains('fct_cypress_users_created').click();
+
+ // platform
+ cy.contains('Hive');
+
+ // entity type
+ cy.contains('Dataset');
+
+ // entity name
+ cy.contains('fct_cypress_users_created');
+
+ // column name
+ cy.contains('user_id');
+ // column description
+ cy.contains('Id of the user');
+
+ // table description
+ cy.contains('table containing all the users created on a single day');
+ });
+})
\ No newline at end of file
diff --git a/smoke-test/tests/cypress/cypress/plugins/index.js b/smoke-test/tests/cypress/cypress/plugins/index.js
new file mode 100644
index 00000000000000..59b2bab6e4e605
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/plugins/index.js
@@ -0,0 +1,22 @@
+///
+// ***********************************************************
+// This example plugins/index.js can be used to load plugins
+//
+// You can change the location of this file or turn off loading
+// the plugins file with the 'pluginsFile' configuration option.
+//
+// You can read more here:
+// https://on.cypress.io/plugins-guide
+// ***********************************************************
+
+// This function is called when a project is opened or re-opened (e.g. due to
+// the project's config changing)
+
+/**
+ * @type {Cypress.PluginConfig}
+ */
+// eslint-disable-next-line no-unused-vars
+module.exports = (on, config) => {
+ // `on` is used to hook into various events Cypress emits
+ // `config` is the resolved Cypress config
+}
diff --git a/smoke-test/tests/cypress/cypress/support/commands.js b/smoke-test/tests/cypress/cypress/support/commands.js
new file mode 100644
index 00000000000000..74b071682c3f0d
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/support/commands.js
@@ -0,0 +1,39 @@
+// ***********************************************
+// This example commands.js shows you how to
+// create various custom commands and overwrite
+// existing commands.
+//
+// For more comprehensive examples of custom
+// commands please read more here:
+// https://on.cypress.io/custom-commands
+// ***********************************************
+//
+//
+// -- This is a parent command --
+Cypress.Commands.add('login', () => {
+ cy.request('POST', '/logIn', {
+ username: 'datahub',
+ password: 'datahub',
+ })
+})
+
+Cypress.Commands.add('deleteUrn', (urn) => {
+ cy.request({ method: 'POST', url: 'http://localhost:8080/entities?action=delete', body: {
+ urn
+ }, headers: {
+ "X-RestLi-Protocol-Version": "2.0.0",
+ "Content-Type": "application/json",
+ }})
+})
+//
+//
+// -- This is a child command --
+// Cypress.Commands.add('drag', { prevSubject: 'element'}, (subject, options) => { ... })
+//
+//
+// -- This is a dual command --
+// Cypress.Commands.add('dismiss', { prevSubject: 'optional'}, (subject, options) => { ... })
+//
+//
+// -- This will overwrite an existing command --
+// Cypress.Commands.overwrite('visit', (originalFn, url, options) => { ... })
diff --git a/smoke-test/tests/cypress/cypress/support/index.js b/smoke-test/tests/cypress/cypress/support/index.js
new file mode 100644
index 00000000000000..d68db96df2697e
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/support/index.js
@@ -0,0 +1,20 @@
+// ***********************************************************
+// This example support/index.js is processed and
+// loaded automatically before your test files.
+//
+// This is a great place to put global configuration and
+// behavior that modifies Cypress.
+//
+// You can change the location of this file or turn off
+// automatically serving support files with the
+// 'supportFile' configuration option.
+//
+// You can read more here:
+// https://on.cypress.io/configuration
+// ***********************************************************
+
+// Import commands.js using ES2015 syntax:
+import './commands'
+
+// Alternatively you can use CommonJS syntax:
+// require('./commands')
diff --git a/smoke-test/tests/cypress/data.json b/smoke-test/tests/cypress/data.json
new file mode 100644
index 00000000000000..22362837df6635
--- /dev/null
+++ b/smoke-test/tests/cypress/data.json
@@ -0,0 +1,1560 @@
+[
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.BrowsePaths": {
+ "paths": ["/prod/kafka/SampleKafkaDataset"]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "description": null,
+ "uri": null,
+ "tags": [],
+ "customProperties": {
+ "prop1": "fakeprop",
+ "prop2": "pikachu"
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:jdoe",
+ "type": "DATAOWNER",
+ "source": null
+ },
+ {
+ "owner": "urn:li:corpuser:datahub",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.InstitutionalMemory": {
+ "elements": [
+ {
+ "url": "https://www.linkedin.com",
+ "description": "Sample doc",
+ "createStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "SampleKafkaSchema",
+ "platform": "urn:li:dataPlatform:kafka",
+ "version": 0,
+ "created": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "deleted": null,
+ "dataset": null,
+ "cluster": null,
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.KafkaSchema": {
+ "documentSchema": "{\"type\":\"record\",\"name\":\"SampleKafkaSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample Kafka dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}"
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "[version=2.0].[type=boolean].field_foo_2",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Foo field description"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "globalTags": {
+ "tags": [{ "tag": "urn:li:tag:NeedsDocumentation" }]
+ },
+ "recursive": false
+ },
+ {
+ "fieldPath": "[version=2.0].[type=boolean].field_bar",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Bar field description"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "boolean",
+ "recursive": false
+ },
+ {
+ "fieldPath": "[version=2.0].[key=True].[type=int].id",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Id specifying which partition the message should go to"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "boolean",
+ "recursive": false
+ }
+ ],
+ "primaryKeys": null,
+ "foreignKeysSpecs": null
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.GlobalTags": {
+ "tags": [{ "tag": "urn:li:tag:Cypress" }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.BrowsePaths": {
+ "paths": ["/prod/hdfs/SampleCypressHdfsDataset"]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:jdoe",
+ "type": "DATAOWNER",
+ "source": null
+ },
+ {
+ "owner": "urn:li:corpuser:datahub",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+ "upstreams": [
+ {
+ "auditStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)",
+ "type": "TRANSFORMED"
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.InstitutionalMemory": {
+ "elements": [
+ {
+ "url": "https://www.linkedin.com",
+ "description": "Sample doc",
+ "createStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.EditableSchemaMetadata": {
+ "created": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "deleted": null,
+ "editableSchemaFieldInfo": [
+ {
+ "fieldPath": "shipment_info",
+ "globalTags": { "tags": [{ "tag": "urn:li:tag:Legacy" }] }
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "SampleHdfsSchema",
+ "platform": "urn:li:dataPlatform:hdfs",
+ "version": 0,
+ "created": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "deleted": null,
+ "dataset": null,
+ "cluster": null,
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.KafkaSchema": {
+ "documentSchema": "{\"type\":\"record\",\"name\":\"SampleHdfsSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample HDFS dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}"
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "shipment_info",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Shipment info description"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.RecordType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "recursive": false
+ },
+ {
+ "fieldPath": "shipment_info.date",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Shipment info date description"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.DateType": {}
+ }
+ },
+ "nativeDataType": "Date",
+ "recursive": false
+ },
+ {
+ "fieldPath": "shipment_info.target",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Shipment info target description"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "text",
+ "recursive": false
+ },
+ {
+ "fieldPath": "shipment_info.destination",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Shipment info destination description"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "recursive": false
+ },
+ {
+ "fieldPath": "shipment_info.geo_info",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Shipment info geo_info description"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.RecordType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "recursive": false
+ },
+ {
+ "fieldPath": "shipment_info.geo_info.lat",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Shipment info geo_info lat"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "float",
+ "recursive": false
+ },
+ {
+ "fieldPath": "shipment_info.geo_info.lng",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Shipment info geo_info lng"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "float",
+ "recursive": false
+ }
+ ],
+ "primaryKeys": null,
+ "foreignKeysSpecs": null
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.GlobalTags": {
+ "tags": [{ "tag": "urn:li:tag:Cypress" }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:jdoe",
+ "type": "DATAOWNER",
+ "source": null
+ },
+ {
+ "owner": "urn:li:corpuser:datahub",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+ "upstreams": [
+ {
+ "auditStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)",
+ "type": "TRANSFORMED"
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.InstitutionalMemory": {
+ "elements": [
+ {
+ "url": "https://www.linkedin.com",
+ "description": "Sample doc",
+ "createStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "SampleHiveSchema",
+ "platform": "urn:li:dataPlatform:hive",
+ "version": 0,
+ "created": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "deleted": null,
+ "dataset": null,
+ "cluster": null,
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.KafkaSchema": {
+ "documentSchema": "{\"type\":\"record\",\"name\":\"SampleHiveSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample Hive dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}"
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "field_foo",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Foo field description"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "recursive": false,
+ "isPartOfKey": true
+ },
+ {
+ "fieldPath": "field_bar",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Bar field description"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "boolean",
+ "recursive": false
+ }
+ ],
+ "primaryKeys": null,
+ "foreignKeysSpecs": null
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.GlobalTags": {
+ "tags": [{ "tag": "urn:li:tag:Cypress" }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "description": "table where each row represents a single log event",
+ "uri": null,
+ "tags": [],
+ "customProperties": {
+ "encoding": "utf-8"
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:jdoe",
+ "type": "DATAOWNER",
+ "source": null
+ },
+ {
+ "owner": "urn:li:corpuser:datahub",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+ "upstreams": [
+ {
+ "auditStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)",
+ "type": "TRANSFORMED"
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.InstitutionalMemory": {
+ "elements": [
+ {
+ "url": "https://www.linkedin.com",
+ "description": "Sample doc",
+ "createStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "SampleHiveSchema",
+ "platform": "urn:li:dataPlatform:hive",
+ "version": 0,
+ "created": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "deleted": null,
+ "dataset": null,
+ "cluster": null,
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.KafkaSchema": {
+ "documentSchema": "{\"type\":\"record\",\"name\":\"SampleHiveSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample Hive dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}"
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "event_name",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Name of your logging event"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "recursive": false
+ },
+ {
+ "fieldPath": "event_data",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Data of your event"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "boolean",
+ "recursive": false
+ },
+ {
+ "fieldPath": "timestamp",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "TS the event was ingested"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "boolean",
+ "recursive": false
+ },
+ {
+ "fieldPath": "browser",
+ "jsonPath": null,
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false
+ }
+ ],
+ "primaryKeys": null,
+ "foreignKeysSpecs": null
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.GlobalTags": {
+ "tags": [{ "tag": "urn:li:tag:Cypress" }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created_no_tag,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "description": "table containing all the users created on a single day",
+ "uri": null,
+ "tags": [],
+ "customProperties": {
+ "encoding": "utf-8"
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:jdoe",
+ "type": "DATAOWNER",
+ "source": null
+ },
+ {
+ "owner": "urn:li:corpuser:datahub",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+ "upstreams": [
+ {
+ "auditStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)",
+ "type": "TRANSFORMED"
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.InstitutionalMemory": {
+ "elements": [
+ {
+ "url": "https://www.linkedin.com",
+ "description": "Sample doc",
+ "createStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "SampleHiveSchema",
+ "platform": "urn:li:dataPlatform:hive",
+ "version": 0,
+ "created": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "deleted": null,
+ "dataset": null,
+ "cluster": null,
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.KafkaSchema": {
+ "documentSchema": "{\"type\":\"record\",\"name\":\"SampleHiveSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample Hive dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}"
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "user_id",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Id of the user created"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "recursive": false
+ },
+ {
+ "fieldPath": "user_name",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Name of the user who signed up"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "boolean",
+ "recursive": false
+ }
+ ],
+ "primaryKeys": null,
+ "foreignKeysSpecs": null,
+ "foreignKeys": [{
+ "name": "user id",
+ "foreignFields": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD),user_id)"
+ ],
+ "sourceFields": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD),user_id)"
+ ],
+ "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)"
+ }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "description": "table containing all the users created on a single day",
+ "uri": null,
+ "tags": [],
+ "customProperties": {
+ "encoding": "utf-8"
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:jdoe",
+ "type": "DATAOWNER",
+ "source": null
+ },
+ {
+ "owner": "urn:li:corpuser:datahub",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+ "upstreams": [
+ {
+ "auditStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)",
+ "type": "TRANSFORMED"
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.InstitutionalMemory": {
+ "elements": [
+ {
+ "url": "https://www.linkedin.com",
+ "description": "Sample doc",
+ "createStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "SampleHiveSchema",
+ "platform": "urn:li:dataPlatform:hive",
+ "version": 0,
+ "created": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "deleted": null,
+ "dataset": null,
+ "cluster": null,
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.KafkaSchema": {
+ "documentSchema": "{\"type\":\"record\",\"name\":\"SampleHiveSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample Hive dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}"
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "user_id",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Id of the user created"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "recursive": false
+ },
+ {
+ "fieldPath": "user_name",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Name of the user who signed up"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "boolean",
+ "recursive": false
+ }
+ ],
+ "primaryKeys": null,
+ "foreignKeysSpecs": null,
+ "foreignKeys": [{
+ "name": "user id",
+ "foreignFields": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD),user_id)"
+ ],
+ "sourceFields": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD),user_id)"
+ ],
+ "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)"
+ }]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.GlobalTags": {
+ "tags": [{ "tag": "urn:li:tag:Cypress" }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "description": "table containing all the users deleted on a single day",
+ "uri": null,
+ "tags": [],
+ "customProperties": {
+ "encoding": "utf-8"
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:jdoe",
+ "type": "DATAOWNER",
+ "source": null
+ },
+ {
+ "owner": "urn:li:corpuser:datahub",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+ "upstreams": [
+ {
+ "auditStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)",
+ "type": "TRANSFORMED"
+ },
+ {
+ "auditStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)",
+ "type": "TRANSFORMED"
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.InstitutionalMemory": {
+ "elements": [
+ {
+ "url": "https://www.linkedin.com",
+ "description": "Sample doc",
+ "createStamp": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "SampleHiveSchema",
+ "platform": "urn:li:dataPlatform:hive",
+ "version": 0,
+ "created": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "deleted": null,
+ "dataset": null,
+ "cluster": null,
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.KafkaSchema": {
+ "documentSchema": "{\"type\":\"record\",\"name\":\"SampleHiveSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample Hive dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}"
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "user_name",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Name of the user who was deleted"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "recursive": false
+ },
+ {
+ "fieldPath": "timestamp",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Timestamp user was deleted at"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "long",
+ "recursive": false
+ },
+ {
+ "fieldPath": "user_id",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Id of the user deleted"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "recursive": false
+ },
+ {
+ "fieldPath": "browser_id",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Cookie attached to identify the browser"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "recursive": false
+ },
+ {
+ "fieldPath": "session_id",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Cookie attached to identify the session"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "recursive": false
+ },
+ {
+ "fieldPath": "deletion_reason",
+ "jsonPath": null,
+ "nullable": false,
+ "description": {
+ "string": "Why the user chose to deactivate"
+ },
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "varchar(100)",
+ "recursive": false
+ }
+ ],
+ "primaryKeys": ["user_name"],
+ "foreignKeysSpecs": null,
+ "foreignKeys": [{
+ "name": "user session",
+ "foreignFields": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD),user_id)"
+ ],
+ "sourceFields": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD),user_id)"
+ ],
+ "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)"
+ }]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.GlobalTags": {
+ "tags": [{ "tag": "urn:li:tag:Cypress" }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": {
+ "urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_123)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:datahub",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:datahub",
+ "impersonator": null
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.datajob.DataJobInfo": {
+ "name": "User Creations",
+ "description": "Constructs the fct_users_created from logging_events",
+ "type": "SQL",
+ "flowUrn": "urn:li:dataFlow:(airflow,cypress_dag_abc,PROD)"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": {
+ "inputDatasets": [
+ "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"
+ ],
+ "outputDatasets": [
+ "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)"
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.GlobalTags": {
+ "tags": [{ "tag": "urn:li:tag:Cypress" }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": {
+ "urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_456)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:datahub",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:datahub",
+ "impersonator": null
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.datajob.DataJobInfo": {
+ "name": "User Deletions",
+ "description": "Constructs the fct_users_deleted from logging_events",
+ "type": "SQL",
+ "flowUrn": "urn:li:dataFlow:(airflow,cypress_dag_abc,PROD)"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": {
+ "inputDatasets": [
+ "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"
+ ],
+ "outputDatasets": [
+ "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)"
+ ],
+ "inputDatajobs": [
+ "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_123)"
+ ]
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.GlobalTags": {
+ "tags": [{ "tag": "urn:li:tag:Cypress" }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": {
+ "urn": "urn:li:dataFlow:(airflow,cypress_dag_abc,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:datahub",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:datahub",
+ "impersonator": null
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.datajob.DataFlowInfo": {
+ "name": "Users",
+ "description": "Constructs the fct_users_deleted and fct_users_created tables",
+ "project": null
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.GlobalTags": {
+ "tags": [{ "tag": "urn:li:tag:Cypress" }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
+ "urn": "urn:li:chart:(looker,cypress_baz1)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.chart.ChartInfo": {
+ "title": "Baz Chart 1",
+ "description": "Baz Chart 1",
+ "lastModified": {
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:datahub",
+ "impersonator": null
+ },
+ "deleted": null
+ },
+ "chartUrl": null,
+ "inputs": [
+ "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)"
+ ],
+ "type": null,
+ "access": null,
+ "lastRefreshed": null
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.GlobalTags": {
+ "tags": [{ "tag": "urn:li:tag:Cypress" }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
+ "urn": "urn:li:chart:(looker,cypress_baz2)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.chart.ChartInfo": {
+ "title": "Baz Chart 2",
+ "description": "Baz Chart 2",
+ "lastModified": {
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:datahub",
+ "impersonator": null
+ },
+ "deleted": null
+ },
+ "chartUrl": null,
+ "inputs": {
+ "array": [
+ {
+ "string": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)"
+ }
+ ]
+ },
+ "type": null,
+ "access": null,
+ "lastRefreshed": null
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.GlobalTags": {
+ "tags": [{ "tag": "urn:li:tag:Cypress" }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": {
+ "urn": "urn:li:dashboard:(looker,cypress_baz)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpGroup:bfoo",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dashboard.DashboardInfo": {
+ "title": "Baz Dashboard",
+ "description": "Baz Dashboard",
+ "customProperties": {
+ "prop1": "fakeprop",
+ "prop2": "pikachu"
+ },
+ "charts": [
+ "urn:li:chart:(looker,cypress_baz1)",
+ "urn:li:chart:(looker,cypress_baz2)"
+ ],
+ "lastModified": {
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:datahub",
+ "impersonator": null
+ },
+ "deleted": null
+ },
+ "dashboardUrl": null,
+ "access": null,
+ "lastRefreshed": null
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.GlobalTags": {
+ "tags": [{ "tag": "urn:li:tag:Cypress" }]
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
+ "urn": "urn:li:glossaryTerm:CypressNode.CypressTerm",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
+ "definition": "a product provided to consumers and businesses by a bank or similar depository institution such as a checking account, savings account, certificate of deposit, debit or pre-paid card, or credit card",
+ "parentNode": "urn:li:glossaryNode:CypressNode",
+ "sourceRef": "FIBO",
+ "termSource": "EXTERNAL",
+ "sourceUrl": "https://spec.edmcouncil.org/fibo/ontology/FBC/FunctionalEntities/FinancialServicesEntities/BankingProduct",
+ "customProperties": {
+ "FQDN": "SavingAccount"
+ }
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [{
+ "owner": "urn:li:corpuser:jdoe",
+ "type": "DATAOWNER"
+ }],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": {
+ "urn": "urn:li:glossaryNode:CypressNode",
+ "aspects": [{
+ "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": {
+ "definition": "Provides basic concepts such as account, account holder, account provider, relationship manager that are commonly used by financial services providers to describe customers and to determine counterparty identities"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [{
+ "owner": "urn:li:corpuser:jdoe",
+ "type": "DATAOWNER"
+ }],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe"
+ }
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ },
+ {
+ "auditHeader": null,
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
+ "urn": "urn:li:tag:Cypress",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.tag.TagProperties": {
+ "name": "Cypress",
+ "description": "Indicates the entity is for cypress integration test purposes"
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.common.Ownership": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:jdoe",
+ "type": "DATAOWNER",
+ "source": null
+ }
+ ],
+ "lastModified": {
+ "time": 1581407189000,
+ "actor": "urn:li:corpuser:jdoe",
+ "impersonator": null
+ }
+ }
+ }
+ ]
+ }
+ },
+ "proposedDelta": null
+ }
+]
\ No newline at end of file
diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py
new file mode 100644
index 00000000000000..2eb716fbf75b12
--- /dev/null
+++ b/smoke-test/tests/cypress/integration_test.py
@@ -0,0 +1,28 @@
+import pytest
+import subprocess
+
+from tests.utils import ingest_file_via_rest
+from tests.utils import delete_urns_from_file
+
+
+@pytest.fixture(scope="module", autouse=True)
+def ingest_cleanup_data():
+ print("ingesting test data")
+ ingest_file_via_rest("tests/cypress/data.json")
+ yield
+ print("removing test data")
+ delete_urns_from_file("tests/cypress/data.json")
+
+
+def test_run_cypress(frontend_session, wait_for_healthchecks):
+ command = f"npx cypress run"
+ print('starting?')
+ proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd="tests/cypress")
+ stdout = proc.stdout.read()
+ stderr = proc.stderr.read()
+ return_code = proc.wait()
+ print(stdout.decode("utf-8"))
+ print('stderr output:')
+ print(stderr.decode("utf-8"))
+ print('return code', return_code)
+ assert(return_code == 0)
diff --git a/smoke-test/tests/cypress/package.json b/smoke-test/tests/cypress/package.json
new file mode 100644
index 00000000000000..499b4854f0aefe
--- /dev/null
+++ b/smoke-test/tests/cypress/package.json
@@ -0,0 +1,9 @@
+{
+ "name": "smoke-test",
+ "version": "1.0.0",
+ "main": "index.js",
+ "license": "MIT",
+ "devDependencies": {
+ "cypress": "^9.1.0"
+ }
+}
diff --git a/smoke-test/tests/cypress/yarn.lock b/smoke-test/tests/cypress/yarn.lock
new file mode 100644
index 00000000000000..e4a05529966139
--- /dev/null
+++ b/smoke-test/tests/cypress/yarn.lock
@@ -0,0 +1,1112 @@
+# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
+# yarn lockfile v1
+
+
+"@cypress/request@^2.88.7":
+ version "2.88.10"
+ resolved "https://registry.yarnpkg.com/@cypress/request/-/request-2.88.10.tgz#b66d76b07f860d3a4b8d7a0604d020c662752cce"
+ integrity sha512-Zp7F+R93N0yZyG34GutyTNr+okam7s/Fzc1+i3kcqOP8vk6OuajuE9qZJ6Rs+10/1JFtXFYMdyarnU1rZuJesg==
+ dependencies:
+ aws-sign2 "~0.7.0"
+ aws4 "^1.8.0"
+ caseless "~0.12.0"
+ combined-stream "~1.0.6"
+ extend "~3.0.2"
+ forever-agent "~0.6.1"
+ form-data "~2.3.2"
+ http-signature "~1.3.6"
+ is-typedarray "~1.0.0"
+ isstream "~0.1.2"
+ json-stringify-safe "~5.0.1"
+ mime-types "~2.1.19"
+ performance-now "^2.1.0"
+ qs "~6.5.2"
+ safe-buffer "^5.1.2"
+ tough-cookie "~2.5.0"
+ tunnel-agent "^0.6.0"
+ uuid "^8.3.2"
+
+"@cypress/xvfb@^1.2.4":
+ version "1.2.4"
+ resolved "https://registry.yarnpkg.com/@cypress/xvfb/-/xvfb-1.2.4.tgz#2daf42e8275b39f4aa53c14214e557bd14e7748a"
+ integrity sha512-skbBzPggOVYCbnGgV+0dmBdW/s77ZkAOXIC1knS8NagwDjBrNC1LuXtQJeiN6l+m7lzmHtaoUw/ctJKdqkG57Q==
+ dependencies:
+ debug "^3.1.0"
+ lodash.once "^4.1.1"
+
+"@types/node@*":
+ version "16.11.11"
+ resolved "https://registry.yarnpkg.com/@types/node/-/node-16.11.11.tgz#6ea7342dfb379ea1210835bada87b3c512120234"
+ integrity sha512-KB0sixD67CeecHC33MYn+eYARkqTheIRNuu97y2XMjR7Wu3XibO1vaY6VBV6O/a89SPI81cEUIYT87UqUWlZNw==
+
+"@types/node@^14.14.31":
+ version "14.17.34"
+ resolved "https://registry.yarnpkg.com/@types/node/-/node-14.17.34.tgz#fe4b38b3f07617c0fa31ae923fca9249641038f0"
+ integrity sha512-USUftMYpmuMzeWobskoPfzDi+vkpe0dvcOBRNOscFrGxVp4jomnRxWuVohgqBow2xyIPC0S3gjxV/5079jhmDg==
+
+"@types/sinonjs__fake-timers@^6.0.2":
+ version "6.0.4"
+ resolved "https://registry.yarnpkg.com/@types/sinonjs__fake-timers/-/sinonjs__fake-timers-6.0.4.tgz#0ecc1b9259b76598ef01942f547904ce61a6a77d"
+ integrity sha512-IFQTJARgMUBF+xVd2b+hIgXWrZEjND3vJtRCvIelcFB5SIXfjV4bOHbHJ0eXKh+0COrBRc8MqteKAz/j88rE0A==
+
+"@types/sizzle@^2.3.2":
+ version "2.3.3"
+ resolved "https://registry.yarnpkg.com/@types/sizzle/-/sizzle-2.3.3.tgz#ff5e2f1902969d305225a047c8a0fd5c915cebef"
+ integrity sha512-JYM8x9EGF163bEyhdJBpR2QX1R5naCJHC8ucJylJ3w9/CVBaskdQ8WqBf8MmQrd1kRvp/a4TS8HJ+bxzR7ZJYQ==
+
+"@types/yauzl@^2.9.1":
+ version "2.9.2"
+ resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.9.2.tgz#c48e5d56aff1444409e39fa164b0b4d4552a7b7a"
+ integrity sha512-8uALY5LTvSuHgloDVUvWP3pIauILm+8/0pDMokuDYIoNsOkSwd5AiHBTSEJjKTDcZr5z8UpgOWZkxBF4iJftoA==
+ dependencies:
+ "@types/node" "*"
+
+aggregate-error@^3.0.0:
+ version "3.1.0"
+ resolved "https://registry.yarnpkg.com/aggregate-error/-/aggregate-error-3.1.0.tgz#92670ff50f5359bdb7a3e0d40d0ec30c5737687a"
+ integrity sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==
+ dependencies:
+ clean-stack "^2.0.0"
+ indent-string "^4.0.0"
+
+ansi-colors@^4.1.1:
+ version "4.1.1"
+ resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-4.1.1.tgz#cbb9ae256bf750af1eab344f229aa27fe94ba348"
+ integrity sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==
+
+ansi-escapes@^4.3.0:
+ version "4.3.2"
+ resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-4.3.2.tgz#6b2291d1db7d98b6521d5f1efa42d0f3a9feb65e"
+ integrity sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==
+ dependencies:
+ type-fest "^0.21.3"
+
+ansi-regex@^5.0.1:
+ version "5.0.1"
+ resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304"
+ integrity sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==
+
+ansi-styles@^4.0.0, ansi-styles@^4.1.0:
+ version "4.3.0"
+ resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937"
+ integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==
+ dependencies:
+ color-convert "^2.0.1"
+
+arch@^2.2.0:
+ version "2.2.0"
+ resolved "https://registry.yarnpkg.com/arch/-/arch-2.2.0.tgz#1bc47818f305764f23ab3306b0bfc086c5a29d11"
+ integrity sha512-Of/R0wqp83cgHozfIYLbBMnej79U/SVGOOyuB3VVFv1NRM/PSFMK12x9KVtiYzJqmnU5WR2qp0Z5rHb7sWGnFQ==
+
+asn1@~0.2.3:
+ version "0.2.6"
+ resolved "https://registry.yarnpkg.com/asn1/-/asn1-0.2.6.tgz#0d3a7bb6e64e02a90c0303b31f292868ea09a08d"
+ integrity sha512-ix/FxPn0MDjeyJ7i/yoHGFt/EX6LyNbxSEhPPXODPL+KB0VPk86UYfL0lMdy+KCnv+fmvIzySwaK5COwqVbWTQ==
+ dependencies:
+ safer-buffer "~2.1.0"
+
+assert-plus@1.0.0, assert-plus@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/assert-plus/-/assert-plus-1.0.0.tgz#f12e0f3c5d77b0b1cdd9146942e4e96c1e4dd525"
+ integrity sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=
+
+astral-regex@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/astral-regex/-/astral-regex-2.0.0.tgz#483143c567aeed4785759c0865786dc77d7d2e31"
+ integrity sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==
+
+async@^3.2.0:
+ version "3.2.2"
+ resolved "https://registry.yarnpkg.com/async/-/async-3.2.2.tgz#2eb7671034bb2194d45d30e31e24ec7e7f9670cd"
+ integrity sha512-H0E+qZaDEfx/FY4t7iLRv1W2fFI6+pyCeTw1uN20AQPiwqwM6ojPxHxdLv4z8hi2DtnW9BOckSspLucW7pIE5g==
+
+asynckit@^0.4.0:
+ version "0.4.0"
+ resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
+ integrity sha1-x57Zf380y48robyXkLzDZkdLS3k=
+
+at-least-node@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/at-least-node/-/at-least-node-1.0.0.tgz#602cd4b46e844ad4effc92a8011a3c46e0238dc2"
+ integrity sha512-+q/t7Ekv1EDY2l6Gda6LLiX14rU9TV20Wa3ofeQmwPFZbOMo9DXrLbOjFaaclkXKWidIaopwAObQDqwWtGUjqg==
+
+aws-sign2@~0.7.0:
+ version "0.7.0"
+ resolved "https://registry.yarnpkg.com/aws-sign2/-/aws-sign2-0.7.0.tgz#b46e890934a9591f2d2f6f86d7e6a9f1b3fe76a8"
+ integrity sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg=
+
+aws4@^1.8.0:
+ version "1.11.0"
+ resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.11.0.tgz#d61f46d83b2519250e2784daf5b09479a8b41c59"
+ integrity sha512-xh1Rl34h6Fi1DC2WWKfxUTVqRsNnr6LsKz2+hfwDxQJWmrx8+c7ylaqBMcHfl1U1r2dsifOvKX3LQuLNZ+XSvA==
+
+balanced-match@^1.0.0:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee"
+ integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==
+
+bcrypt-pbkdf@^1.0.0:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz#a4301d389b6a43f9b67ff3ca11a3f6637e360e9e"
+ integrity sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=
+ dependencies:
+ tweetnacl "^0.14.3"
+
+blob-util@^2.0.2:
+ version "2.0.2"
+ resolved "https://registry.yarnpkg.com/blob-util/-/blob-util-2.0.2.tgz#3b4e3c281111bb7f11128518006cdc60b403a1eb"
+ integrity sha512-T7JQa+zsXXEa6/8ZhHcQEW1UFfVM49Ts65uBkFL6fz2QmrElqmbajIDJvuA0tEhRe5eIjpV9ZF+0RfZR9voJFQ==
+
+bluebird@3.7.2:
+ version "3.7.2"
+ resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.7.2.tgz#9f229c15be272454ffa973ace0dbee79a1b0c36f"
+ integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==
+
+brace-expansion@^1.1.7:
+ version "1.1.11"
+ resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
+ integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==
+ dependencies:
+ balanced-match "^1.0.0"
+ concat-map "0.0.1"
+
+buffer-crc32@~0.2.3:
+ version "0.2.13"
+ resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242"
+ integrity sha1-DTM+PwDqxQqhRUq9MO+MKl2ackI=
+
+cachedir@^2.3.0:
+ version "2.3.0"
+ resolved "https://registry.yarnpkg.com/cachedir/-/cachedir-2.3.0.tgz#0c75892a052198f0b21c7c1804d8331edfcae0e8"
+ integrity sha512-A+Fezp4zxnit6FanDmv9EqXNAi3vt9DWp51/71UEhXukb7QUuvtv9344h91dyAxuTLoSYJFU299qzR3tzwPAhw==
+
+caseless@~0.12.0:
+ version "0.12.0"
+ resolved "https://registry.yarnpkg.com/caseless/-/caseless-0.12.0.tgz#1b681c21ff84033c826543090689420d187151dc"
+ integrity sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=
+
+chalk@^4.1.0:
+ version "4.1.2"
+ resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.2.tgz#aac4e2b7734a740867aeb16bf02aad556a1e7a01"
+ integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==
+ dependencies:
+ ansi-styles "^4.1.0"
+ supports-color "^7.1.0"
+
+check-more-types@^2.24.0:
+ version "2.24.0"
+ resolved "https://registry.yarnpkg.com/check-more-types/-/check-more-types-2.24.0.tgz#1420ffb10fd444dcfc79b43891bbfffd32a84600"
+ integrity sha1-FCD/sQ/URNz8ebQ4kbv//TKoRgA=
+
+ci-info@^3.2.0:
+ version "3.3.0"
+ resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.3.0.tgz#b4ed1fb6818dea4803a55c623041f9165d2066b2"
+ integrity sha512-riT/3vI5YpVH6/qomlDnJow6TBee2PBKSEpx3O32EGPYbWGIRsIlGRms3Sm74wYE1JMo8RnO04Hb12+v1J5ICw==
+
+clean-stack@^2.0.0:
+ version "2.2.0"
+ resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-2.2.0.tgz#ee8472dbb129e727b31e8a10a427dee9dfe4008b"
+ integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==
+
+cli-cursor@^3.1.0:
+ version "3.1.0"
+ resolved "https://registry.yarnpkg.com/cli-cursor/-/cli-cursor-3.1.0.tgz#264305a7ae490d1d03bf0c9ba7c925d1753af307"
+ integrity sha512-I/zHAwsKf9FqGoXM4WWRACob9+SNukZTd94DWF57E4toouRulbCxcUh6RKUEOQlYTHJnzkPMySvPNaaSLNfLZw==
+ dependencies:
+ restore-cursor "^3.1.0"
+
+cli-table3@~0.6.0:
+ version "0.6.0"
+ resolved "https://registry.yarnpkg.com/cli-table3/-/cli-table3-0.6.0.tgz#b7b1bc65ca8e7b5cef9124e13dc2b21e2ce4faee"
+ integrity sha512-gnB85c3MGC7Nm9I/FkiasNBOKjOiO1RNuXXarQms37q4QMpWdlbBgD/VnOStA2faG1dpXMv31RFApjX1/QdgWQ==
+ dependencies:
+ object-assign "^4.1.0"
+ string-width "^4.2.0"
+ optionalDependencies:
+ colors "^1.1.2"
+
+cli-truncate@^2.1.0:
+ version "2.1.0"
+ resolved "https://registry.yarnpkg.com/cli-truncate/-/cli-truncate-2.1.0.tgz#c39e28bf05edcde5be3b98992a22deed5a2b93c7"
+ integrity sha512-n8fOixwDD6b/ObinzTrp1ZKFzbgvKZvuz/TvejnLn1aQfC6r52XEx85FmuC+3HI+JM7coBRXUvNqEU2PHVrHpg==
+ dependencies:
+ slice-ansi "^3.0.0"
+ string-width "^4.2.0"
+
+color-convert@^2.0.1:
+ version "2.0.1"
+ resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3"
+ integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==
+ dependencies:
+ color-name "~1.1.4"
+
+color-name@~1.1.4:
+ version "1.1.4"
+ resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
+ integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
+
+colorette@^2.0.16:
+ version "2.0.16"
+ resolved "https://registry.yarnpkg.com/colorette/-/colorette-2.0.16.tgz#713b9af84fdb000139f04546bd4a93f62a5085da"
+ integrity sha512-hUewv7oMjCp+wkBv5Rm0v87eJhq4woh5rSR+42YSQJKecCqgIqNkZ6lAlQms/BwHPJA5NKMRlpxPRv0n8HQW6g==
+
+colors@^1.1.2:
+ version "1.4.0"
+ resolved "https://registry.yarnpkg.com/colors/-/colors-1.4.0.tgz#c50491479d4c1bdaed2c9ced32cf7c7dc2360f78"
+ integrity sha512-a+UqTh4kgZg/SlGvfbzDHpgRu7AAQOmmqRHJnxhRZICKFUT91brVhNNt58CMWU9PsBbv3PDCZUHbVxuDiH2mtA==
+
+combined-stream@^1.0.6, combined-stream@~1.0.6:
+ version "1.0.8"
+ resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
+ integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
+ dependencies:
+ delayed-stream "~1.0.0"
+
+commander@^5.1.0:
+ version "5.1.0"
+ resolved "https://registry.yarnpkg.com/commander/-/commander-5.1.0.tgz#46abbd1652f8e059bddaef99bbdcb2ad9cf179ae"
+ integrity sha512-P0CysNDQ7rtVw4QIQtm+MRxV66vKFSvlsQvGYXZWR3qFU0jlMKHZZZgw8e+8DSah4UDKMqnknRDQz+xuQXQ/Zg==
+
+common-tags@^1.8.0:
+ version "1.8.2"
+ resolved "https://registry.yarnpkg.com/common-tags/-/common-tags-1.8.2.tgz#94ebb3c076d26032745fd54face7f688ef5ac9c6"
+ integrity sha512-gk/Z852D2Wtb//0I+kRFNKKE9dIIVirjoqPoA1wJU+XePVXZfGeBpk45+A1rKO4Q43prqWBNY/MiIeRLbPWUaA==
+
+concat-map@0.0.1:
+ version "0.0.1"
+ resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
+ integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=
+
+core-util-is@1.0.2:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.2.tgz#b5fd54220aa2bc5ab57aab7140c940754503c1a7"
+ integrity sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=
+
+cross-spawn@^7.0.0:
+ version "7.0.3"
+ resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
+ integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==
+ dependencies:
+ path-key "^3.1.0"
+ shebang-command "^2.0.0"
+ which "^2.0.1"
+
+cypress@^9.1.0:
+ version "9.1.0"
+ resolved "https://registry.yarnpkg.com/cypress/-/cypress-9.1.0.tgz#5d23c1b363b7d4853009c74a422a083a8ad2601c"
+ integrity sha512-fyXcCN51vixkPrz/vO/Qy6WL3hKYJzCQFeWofOpGOFewVVXrGfmfSOGFntXpzWBXsIwPn3wzW0HOFw51jZajNQ==
+ dependencies:
+ "@cypress/request" "^2.88.7"
+ "@cypress/xvfb" "^1.2.4"
+ "@types/node" "^14.14.31"
+ "@types/sinonjs__fake-timers" "^6.0.2"
+ "@types/sizzle" "^2.3.2"
+ arch "^2.2.0"
+ blob-util "^2.0.2"
+ bluebird "3.7.2"
+ cachedir "^2.3.0"
+ chalk "^4.1.0"
+ check-more-types "^2.24.0"
+ cli-cursor "^3.1.0"
+ cli-table3 "~0.6.0"
+ commander "^5.1.0"
+ common-tags "^1.8.0"
+ dayjs "^1.10.4"
+ debug "^4.3.2"
+ enquirer "^2.3.6"
+ eventemitter2 "^6.4.3"
+ execa "4.1.0"
+ executable "^4.1.1"
+ extract-zip "2.0.1"
+ figures "^3.2.0"
+ fs-extra "^9.1.0"
+ getos "^3.2.1"
+ is-ci "^3.0.0"
+ is-installed-globally "~0.4.0"
+ lazy-ass "^1.6.0"
+ listr2 "^3.8.3"
+ lodash "^4.17.21"
+ log-symbols "^4.0.0"
+ minimist "^1.2.5"
+ ospath "^1.2.2"
+ pretty-bytes "^5.6.0"
+ proxy-from-env "1.0.0"
+ request-progress "^3.0.0"
+ supports-color "^8.1.1"
+ tmp "~0.2.1"
+ untildify "^4.0.0"
+ url "^0.11.0"
+ yauzl "^2.10.0"
+
+dashdash@^1.12.0:
+ version "1.14.1"
+ resolved "https://registry.yarnpkg.com/dashdash/-/dashdash-1.14.1.tgz#853cfa0f7cbe2fed5de20326b8dd581035f6e2f0"
+ integrity sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA=
+ dependencies:
+ assert-plus "^1.0.0"
+
+dayjs@^1.10.4:
+ version "1.10.7"
+ resolved "https://registry.yarnpkg.com/dayjs/-/dayjs-1.10.7.tgz#2cf5f91add28116748440866a0a1d26f3a6ce468"
+ integrity sha512-P6twpd70BcPK34K26uJ1KT3wlhpuOAPoMwJzpsIWUxHZ7wpmbdZL/hQqBDfz7hGurYSa5PhzdhDHtt319hL3ig==
+
+debug@^3.1.0:
+ version "3.2.7"
+ resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a"
+ integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==
+ dependencies:
+ ms "^2.1.1"
+
+debug@^4.1.1, debug@^4.3.2:
+ version "4.3.3"
+ resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.3.tgz#04266e0b70a98d4462e6e288e38259213332b664"
+ integrity sha512-/zxw5+vh1Tfv+4Qn7a5nsbcJKPaSvCDhojn6FEl9vupwK2VCSDtEiEtqr8DFtzYFOdz63LBkxec7DYuc2jon6Q==
+ dependencies:
+ ms "2.1.2"
+
+delayed-stream@~1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
+ integrity sha1-3zrhmayt+31ECqrgsp4icrJOxhk=
+
+ecc-jsbn@~0.1.1:
+ version "0.1.2"
+ resolved "https://registry.yarnpkg.com/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz#3a83a904e54353287874c564b7549386849a98c9"
+ integrity sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=
+ dependencies:
+ jsbn "~0.1.0"
+ safer-buffer "^2.1.0"
+
+emoji-regex@^8.0.0:
+ version "8.0.0"
+ resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37"
+ integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==
+
+end-of-stream@^1.1.0:
+ version "1.4.4"
+ resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0"
+ integrity sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==
+ dependencies:
+ once "^1.4.0"
+
+enquirer@^2.3.6:
+ version "2.3.6"
+ resolved "https://registry.yarnpkg.com/enquirer/-/enquirer-2.3.6.tgz#2a7fe5dd634a1e4125a975ec994ff5456dc3734d"
+ integrity sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==
+ dependencies:
+ ansi-colors "^4.1.1"
+
+escape-string-regexp@^1.0.5:
+ version "1.0.5"
+ resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4"
+ integrity sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=
+
+eventemitter2@^6.4.3:
+ version "6.4.5"
+ resolved "https://registry.yarnpkg.com/eventemitter2/-/eventemitter2-6.4.5.tgz#97380f758ae24ac15df8353e0cc27f8b95644655"
+ integrity sha512-bXE7Dyc1i6oQElDG0jMRZJrRAn9QR2xyyFGmBdZleNmyQX0FqGYmhZIrIrpPfm/w//LTo4tVQGOGQcGCb5q9uw==
+
+execa@4.1.0:
+ version "4.1.0"
+ resolved "https://registry.yarnpkg.com/execa/-/execa-4.1.0.tgz#4e5491ad1572f2f17a77d388c6c857135b22847a"
+ integrity sha512-j5W0//W7f8UxAn8hXVnwG8tLwdiUy4FJLcSupCg6maBYZDpyBvTApK7KyuI4bKj8KOh1r2YH+6ucuYtJv1bTZA==
+ dependencies:
+ cross-spawn "^7.0.0"
+ get-stream "^5.0.0"
+ human-signals "^1.1.1"
+ is-stream "^2.0.0"
+ merge-stream "^2.0.0"
+ npm-run-path "^4.0.0"
+ onetime "^5.1.0"
+ signal-exit "^3.0.2"
+ strip-final-newline "^2.0.0"
+
+executable@^4.1.1:
+ version "4.1.1"
+ resolved "https://registry.yarnpkg.com/executable/-/executable-4.1.1.tgz#41532bff361d3e57af4d763b70582db18f5d133c"
+ integrity sha512-8iA79xD3uAch729dUG8xaaBBFGaEa0wdD2VkYLFHwlqosEj/jT66AzcreRDSgV7ehnNLBW2WR5jIXwGKjVdTLg==
+ dependencies:
+ pify "^2.2.0"
+
+extend@~3.0.2:
+ version "3.0.2"
+ resolved "https://registry.yarnpkg.com/extend/-/extend-3.0.2.tgz#f8b1136b4071fbd8eb140aff858b1019ec2915fa"
+ integrity sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==
+
+extract-zip@2.0.1:
+ version "2.0.1"
+ resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-2.0.1.tgz#663dca56fe46df890d5f131ef4a06d22bb8ba13a"
+ integrity sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==
+ dependencies:
+ debug "^4.1.1"
+ get-stream "^5.1.0"
+ yauzl "^2.10.0"
+ optionalDependencies:
+ "@types/yauzl" "^2.9.1"
+
+extsprintf@1.3.0:
+ version "1.3.0"
+ resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.3.0.tgz#96918440e3041a7a414f8c52e3c574eb3c3e1e05"
+ integrity sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=
+
+extsprintf@^1.2.0:
+ version "1.4.1"
+ resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.4.1.tgz#8d172c064867f235c0c84a596806d279bf4bcc07"
+ integrity sha512-Wrk35e8ydCKDj/ArClo1VrPVmN8zph5V4AtHwIuHhvMXsKf73UT3BOD+azBIW+3wOJ4FhEH7zyaJCFvChjYvMA==
+
+fd-slicer@~1.1.0:
+ version "1.1.0"
+ resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e"
+ integrity sha1-JcfInLH5B3+IkbvmHY85Dq4lbx4=
+ dependencies:
+ pend "~1.2.0"
+
+figures@^3.2.0:
+ version "3.2.0"
+ resolved "https://registry.yarnpkg.com/figures/-/figures-3.2.0.tgz#625c18bd293c604dc4a8ddb2febf0c88341746af"
+ integrity sha512-yaduQFRKLXYOGgEn6AZau90j3ggSOyiqXU0F9JZfeXYhNa+Jk4X+s45A2zg5jns87GAFa34BBm2kXw4XpNcbdg==
+ dependencies:
+ escape-string-regexp "^1.0.5"
+
+forever-agent@~0.6.1:
+ version "0.6.1"
+ resolved "https://registry.yarnpkg.com/forever-agent/-/forever-agent-0.6.1.tgz#fbc71f0c41adeb37f96c577ad1ed42d8fdacca91"
+ integrity sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=
+
+form-data@~2.3.2:
+ version "2.3.3"
+ resolved "https://registry.yarnpkg.com/form-data/-/form-data-2.3.3.tgz#dcce52c05f644f298c6a7ab936bd724ceffbf3a6"
+ integrity sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==
+ dependencies:
+ asynckit "^0.4.0"
+ combined-stream "^1.0.6"
+ mime-types "^2.1.12"
+
+fs-extra@^9.1.0:
+ version "9.1.0"
+ resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-9.1.0.tgz#5954460c764a8da2094ba3554bf839e6b9a7c86d"
+ integrity sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ==
+ dependencies:
+ at-least-node "^1.0.0"
+ graceful-fs "^4.2.0"
+ jsonfile "^6.0.1"
+ universalify "^2.0.0"
+
+fs.realpath@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f"
+ integrity sha1-FQStJSMVjKpA20onh8sBQRmU6k8=
+
+get-stream@^5.0.0, get-stream@^5.1.0:
+ version "5.2.0"
+ resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-5.2.0.tgz#4966a1795ee5ace65e706c4b7beb71257d6e22d3"
+ integrity sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==
+ dependencies:
+ pump "^3.0.0"
+
+getos@^3.2.1:
+ version "3.2.1"
+ resolved "https://registry.yarnpkg.com/getos/-/getos-3.2.1.tgz#0134d1f4e00eb46144c5a9c0ac4dc087cbb27dc5"
+ integrity sha512-U56CfOK17OKgTVqozZjUKNdkfEv6jk5WISBJ8SHoagjE6L69zOwl3Z+O8myjY9MEW3i2HPWQBt/LTbCgcC973Q==
+ dependencies:
+ async "^3.2.0"
+
+getpass@^0.1.1:
+ version "0.1.7"
+ resolved "https://registry.yarnpkg.com/getpass/-/getpass-0.1.7.tgz#5eff8e3e684d569ae4cb2b1282604e8ba62149fa"
+ integrity sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo=
+ dependencies:
+ assert-plus "^1.0.0"
+
+glob@^7.1.3:
+ version "7.2.0"
+ resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.0.tgz#d15535af7732e02e948f4c41628bd910293f6023"
+ integrity sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q==
+ dependencies:
+ fs.realpath "^1.0.0"
+ inflight "^1.0.4"
+ inherits "2"
+ minimatch "^3.0.4"
+ once "^1.3.0"
+ path-is-absolute "^1.0.0"
+
+global-dirs@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/global-dirs/-/global-dirs-3.0.0.tgz#70a76fe84ea315ab37b1f5576cbde7d48ef72686"
+ integrity sha512-v8ho2DS5RiCjftj1nD9NmnfaOzTdud7RRnVd9kFNOjqZbISlx5DQ+OrTkywgd0dIt7oFCvKetZSHoHcP3sDdiA==
+ dependencies:
+ ini "2.0.0"
+
+graceful-fs@^4.1.6, graceful-fs@^4.2.0:
+ version "4.2.8"
+ resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.8.tgz#e412b8d33f5e006593cbd3cee6df9f2cebbe802a"
+ integrity sha512-qkIilPUYcNhJpd33n0GBXTB1MMPp14TxEsEs0pTrsSVucApsYzW5V+Q8Qxhik6KU3evy+qkAAowTByymK0avdg==
+
+has-flag@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b"
+ integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==
+
+http-signature@~1.3.6:
+ version "1.3.6"
+ resolved "https://registry.yarnpkg.com/http-signature/-/http-signature-1.3.6.tgz#cb6fbfdf86d1c974f343be94e87f7fc128662cf9"
+ integrity sha512-3adrsD6zqo4GsTqtO7FyrejHNv+NgiIfAfv68+jVlFmSr9OGy7zrxONceFRLKvnnZA5jbxQBX1u9PpB6Wi32Gw==
+ dependencies:
+ assert-plus "^1.0.0"
+ jsprim "^2.0.2"
+ sshpk "^1.14.1"
+
+human-signals@^1.1.1:
+ version "1.1.1"
+ resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-1.1.1.tgz#c5b1cd14f50aeae09ab6c59fe63ba3395fe4dfa3"
+ integrity sha512-SEQu7vl8KjNL2eoGBLF3+wAjpsNfA9XMlXAYj/3EdaNfAlxKthD1xjEQfGOUhllCGGJVNY34bRr6lPINhNjyZw==
+
+indent-string@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251"
+ integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==
+
+inflight@^1.0.4:
+ version "1.0.6"
+ resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9"
+ integrity sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=
+ dependencies:
+ once "^1.3.0"
+ wrappy "1"
+
+inherits@2:
+ version "2.0.4"
+ resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
+ integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
+
+ini@2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/ini/-/ini-2.0.0.tgz#e5fd556ecdd5726be978fa1001862eacb0a94bc5"
+ integrity sha512-7PnF4oN3CvZF23ADhA5wRaYEQpJ8qygSkbtTXWBeXWXmEVRXK+1ITciHWwHhsjv1TmW0MgacIv6hEi5pX5NQdA==
+
+is-ci@^3.0.0:
+ version "3.0.1"
+ resolved "https://registry.yarnpkg.com/is-ci/-/is-ci-3.0.1.tgz#db6ecbed1bd659c43dac0f45661e7674103d1867"
+ integrity sha512-ZYvCgrefwqoQ6yTyYUbQu64HsITZ3NfKX1lzaEYdkTDcfKzzCI/wthRRYKkdjHKFVgNiXKAKm65Zo1pk2as/QQ==
+ dependencies:
+ ci-info "^3.2.0"
+
+is-fullwidth-code-point@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz#f116f8064fe90b3f7844a38997c0b75051269f1d"
+ integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==
+
+is-installed-globally@~0.4.0:
+ version "0.4.0"
+ resolved "https://registry.yarnpkg.com/is-installed-globally/-/is-installed-globally-0.4.0.tgz#9a0fd407949c30f86eb6959ef1b7994ed0b7b520"
+ integrity sha512-iwGqO3J21aaSkC7jWnHP/difazwS7SFeIqxv6wEtLU8Y5KlzFTjyqcSIT0d8s4+dDhKytsk9PJZ2BkS5eZwQRQ==
+ dependencies:
+ global-dirs "^3.0.0"
+ is-path-inside "^3.0.2"
+
+is-path-inside@^3.0.2:
+ version "3.0.3"
+ resolved "https://registry.yarnpkg.com/is-path-inside/-/is-path-inside-3.0.3.tgz#d231362e53a07ff2b0e0ea7fed049161ffd16283"
+ integrity sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ==
+
+is-stream@^2.0.0:
+ version "2.0.1"
+ resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-2.0.1.tgz#fac1e3d53b97ad5a9d0ae9cef2389f5810a5c077"
+ integrity sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==
+
+is-typedarray@~1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/is-typedarray/-/is-typedarray-1.0.0.tgz#e479c80858df0c1b11ddda6940f96011fcda4a9a"
+ integrity sha1-5HnICFjfDBsR3dppQPlgEfzaSpo=
+
+is-unicode-supported@^0.1.0:
+ version "0.1.0"
+ resolved "https://registry.yarnpkg.com/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz#3f26c76a809593b52bfa2ecb5710ed2779b522a7"
+ integrity sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==
+
+isexe@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
+ integrity sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=
+
+isstream@~0.1.2:
+ version "0.1.2"
+ resolved "https://registry.yarnpkg.com/isstream/-/isstream-0.1.2.tgz#47e63f7af55afa6f92e1500e690eb8b8529c099a"
+ integrity sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=
+
+jsbn@~0.1.0:
+ version "0.1.1"
+ resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-0.1.1.tgz#a5e654c2e5a2deb5f201d96cefbca80c0ef2f513"
+ integrity sha1-peZUwuWi3rXyAdls77yoDA7y9RM=
+
+json-schema@0.4.0:
+ version "0.4.0"
+ resolved "https://registry.yarnpkg.com/json-schema/-/json-schema-0.4.0.tgz#f7de4cf6efab838ebaeb3236474cbba5a1930ab5"
+ integrity sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==
+
+json-stringify-safe@~5.0.1:
+ version "5.0.1"
+ resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb"
+ integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=
+
+jsonfile@^6.0.1:
+ version "6.1.0"
+ resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-6.1.0.tgz#bc55b2634793c679ec6403094eb13698a6ec0aae"
+ integrity sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==
+ dependencies:
+ universalify "^2.0.0"
+ optionalDependencies:
+ graceful-fs "^4.1.6"
+
+jsprim@^2.0.2:
+ version "2.0.2"
+ resolved "https://registry.yarnpkg.com/jsprim/-/jsprim-2.0.2.tgz#77ca23dbcd4135cd364800d22ff82c2185803d4d"
+ integrity sha512-gqXddjPqQ6G40VdnI6T6yObEC+pDNvyP95wdQhkWkg7crHH3km5qP1FsOXEkzEQwnz6gz5qGTn1c2Y52wP3OyQ==
+ dependencies:
+ assert-plus "1.0.0"
+ extsprintf "1.3.0"
+ json-schema "0.4.0"
+ verror "1.10.0"
+
+lazy-ass@^1.6.0:
+ version "1.6.0"
+ resolved "https://registry.yarnpkg.com/lazy-ass/-/lazy-ass-1.6.0.tgz#7999655e8646c17f089fdd187d150d3324d54513"
+ integrity sha1-eZllXoZGwX8In90YfRUNMyTVRRM=
+
+listr2@^3.8.3:
+ version "3.13.5"
+ resolved "https://registry.yarnpkg.com/listr2/-/listr2-3.13.5.tgz#105a813f2eb2329c4aae27373a281d610ee4985f"
+ integrity sha512-3n8heFQDSk+NcwBn3CgxEibZGaRzx+pC64n3YjpMD1qguV4nWus3Al+Oo3KooqFKTQEJ1v7MmnbnyyNspgx3NA==
+ dependencies:
+ cli-truncate "^2.1.0"
+ colorette "^2.0.16"
+ log-update "^4.0.0"
+ p-map "^4.0.0"
+ rfdc "^1.3.0"
+ rxjs "^7.4.0"
+ through "^2.3.8"
+ wrap-ansi "^7.0.0"
+
+lodash.once@^4.1.1:
+ version "4.1.1"
+ resolved "https://registry.yarnpkg.com/lodash.once/-/lodash.once-4.1.1.tgz#0dd3971213c7c56df880977d504c88fb471a97ac"
+ integrity sha1-DdOXEhPHxW34gJd9UEyI+0cal6w=
+
+lodash@^4.17.21:
+ version "4.17.21"
+ resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
+ integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
+
+log-symbols@^4.0.0:
+ version "4.1.0"
+ resolved "https://registry.yarnpkg.com/log-symbols/-/log-symbols-4.1.0.tgz#3fbdbb95b4683ac9fc785111e792e558d4abd503"
+ integrity sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==
+ dependencies:
+ chalk "^4.1.0"
+ is-unicode-supported "^0.1.0"
+
+log-update@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/log-update/-/log-update-4.0.0.tgz#589ecd352471f2a1c0c570287543a64dfd20e0a1"
+ integrity sha512-9fkkDevMefjg0mmzWFBW8YkFP91OrizzkW3diF7CpG+S2EYdy4+TVfGwz1zeF8x7hCx1ovSPTOE9Ngib74qqUg==
+ dependencies:
+ ansi-escapes "^4.3.0"
+ cli-cursor "^3.1.0"
+ slice-ansi "^4.0.0"
+ wrap-ansi "^6.2.0"
+
+merge-stream@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/merge-stream/-/merge-stream-2.0.0.tgz#52823629a14dd00c9770fb6ad47dc6310f2c1f60"
+ integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==
+
+mime-db@1.51.0:
+ version "1.51.0"
+ resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.51.0.tgz#d9ff62451859b18342d960850dc3cfb77e63fb0c"
+ integrity sha512-5y8A56jg7XVQx2mbv1lu49NR4dokRnhZYTtL+KGfaa27uq4pSTXkwQkFJl4pkRMyNFz/EtYDSkiiEHx3F7UN6g==
+
+mime-types@^2.1.12, mime-types@~2.1.19:
+ version "2.1.34"
+ resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.34.tgz#5a712f9ec1503511a945803640fafe09d3793c24"
+ integrity sha512-6cP692WwGIs9XXdOO4++N+7qjqv0rqxxVvJ3VHPh/Sc9mVZcQP+ZGhkKiTvWMQRr2tbHkJP/Yn7Y0npb3ZBs4A==
+ dependencies:
+ mime-db "1.51.0"
+
+mimic-fn@^2.1.0:
+ version "2.1.0"
+ resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-2.1.0.tgz#7ed2c2ccccaf84d3ffcb7a69b57711fc2083401b"
+ integrity sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==
+
+minimatch@^3.0.4:
+ version "3.0.4"
+ resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083"
+ integrity sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==
+ dependencies:
+ brace-expansion "^1.1.7"
+
+minimist@^1.2.5:
+ version "1.2.5"
+ resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602"
+ integrity sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==
+
+ms@2.1.2:
+ version "2.1.2"
+ resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
+ integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==
+
+ms@^2.1.1:
+ version "2.1.3"
+ resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2"
+ integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==
+
+npm-run-path@^4.0.0:
+ version "4.0.1"
+ resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-4.0.1.tgz#b7ecd1e5ed53da8e37a55e1c2269e0b97ed748ea"
+ integrity sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==
+ dependencies:
+ path-key "^3.0.0"
+
+object-assign@^4.1.0:
+ version "4.1.1"
+ resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863"
+ integrity sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=
+
+once@^1.3.0, once@^1.3.1, once@^1.4.0:
+ version "1.4.0"
+ resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
+ integrity sha1-WDsap3WWHUsROsF9nFC6753Xa9E=
+ dependencies:
+ wrappy "1"
+
+onetime@^5.1.0:
+ version "5.1.2"
+ resolved "https://registry.yarnpkg.com/onetime/-/onetime-5.1.2.tgz#d0e96ebb56b07476df1dd9c4806e5237985ca45e"
+ integrity sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==
+ dependencies:
+ mimic-fn "^2.1.0"
+
+ospath@^1.2.2:
+ version "1.2.2"
+ resolved "https://registry.yarnpkg.com/ospath/-/ospath-1.2.2.tgz#1276639774a3f8ef2572f7fe4280e0ea4550c07b"
+ integrity sha1-EnZjl3Sj+O8lcvf+QoDg6kVQwHs=
+
+p-map@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/p-map/-/p-map-4.0.0.tgz#bb2f95a5eda2ec168ec9274e06a747c3e2904d2b"
+ integrity sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ==
+ dependencies:
+ aggregate-error "^3.0.0"
+
+path-is-absolute@^1.0.0:
+ version "1.0.1"
+ resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f"
+ integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18=
+
+path-key@^3.0.0, path-key@^3.1.0:
+ version "3.1.1"
+ resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375"
+ integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==
+
+pend@~1.2.0:
+ version "1.2.0"
+ resolved "https://registry.yarnpkg.com/pend/-/pend-1.2.0.tgz#7a57eb550a6783f9115331fcf4663d5c8e007a50"
+ integrity sha1-elfrVQpng/kRUzH89GY9XI4AelA=
+
+performance-now@^2.1.0:
+ version "2.1.0"
+ resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
+ integrity sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=
+
+pify@^2.2.0:
+ version "2.3.0"
+ resolved "https://registry.yarnpkg.com/pify/-/pify-2.3.0.tgz#ed141a6ac043a849ea588498e7dca8b15330e90c"
+ integrity sha1-7RQaasBDqEnqWISY59yosVMw6Qw=
+
+pretty-bytes@^5.6.0:
+ version "5.6.0"
+ resolved "https://registry.yarnpkg.com/pretty-bytes/-/pretty-bytes-5.6.0.tgz#356256f643804773c82f64723fe78c92c62beaeb"
+ integrity sha512-FFw039TmrBqFK8ma/7OL3sDz/VytdtJr044/QUJtH0wK9lb9jLq9tJyIxUwtQJHwar2BqtiA4iCWSwo9JLkzFg==
+
+proxy-from-env@1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.0.0.tgz#33c50398f70ea7eb96d21f7b817630a55791c7ee"
+ integrity sha1-M8UDmPcOp+uW0h97gXYwpVeRx+4=
+
+psl@^1.1.28:
+ version "1.8.0"
+ resolved "https://registry.yarnpkg.com/psl/-/psl-1.8.0.tgz#9326f8bcfb013adcc005fdff056acce020e51c24"
+ integrity sha512-RIdOzyoavK+hA18OGGWDqUTsCLhtA7IcZ/6NCs4fFJaHBDab+pDDmDIByWFRQJq2Cd7r1OoQxBGKOaztq+hjIQ==
+
+pump@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/pump/-/pump-3.0.0.tgz#b4a2116815bde2f4e1ea602354e8c75565107a64"
+ integrity sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==
+ dependencies:
+ end-of-stream "^1.1.0"
+ once "^1.3.1"
+
+punycode@1.3.2:
+ version "1.3.2"
+ resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.3.2.tgz#9653a036fb7c1ee42342f2325cceefea3926c48d"
+ integrity sha1-llOgNvt8HuQjQvIyXM7v6jkmxI0=
+
+punycode@^2.1.1:
+ version "2.1.1"
+ resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
+ integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==
+
+qs@~6.5.2:
+ version "6.5.2"
+ resolved "https://registry.yarnpkg.com/qs/-/qs-6.5.2.tgz#cb3ae806e8740444584ef154ce8ee98d403f3e36"
+ integrity sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==
+
+querystring@0.2.0:
+ version "0.2.0"
+ resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.0.tgz#b209849203bb25df820da756e747005878521620"
+ integrity sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA=
+
+request-progress@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/request-progress/-/request-progress-3.0.0.tgz#4ca754081c7fec63f505e4faa825aa06cd669dbe"
+ integrity sha1-TKdUCBx/7GP1BeT6qCWqBs1mnb4=
+ dependencies:
+ throttleit "^1.0.0"
+
+restore-cursor@^3.1.0:
+ version "3.1.0"
+ resolved "https://registry.yarnpkg.com/restore-cursor/-/restore-cursor-3.1.0.tgz#39f67c54b3a7a58cea5236d95cf0034239631f7e"
+ integrity sha512-l+sSefzHpj5qimhFSE5a8nufZYAM3sBSVMAPtYkmC+4EH2anSGaEMXSD0izRQbu9nfyQ9y5JrVmp7E8oZrUjvA==
+ dependencies:
+ onetime "^5.1.0"
+ signal-exit "^3.0.2"
+
+rfdc@^1.3.0:
+ version "1.3.0"
+ resolved "https://registry.yarnpkg.com/rfdc/-/rfdc-1.3.0.tgz#d0b7c441ab2720d05dc4cf26e01c89631d9da08b"
+ integrity sha512-V2hovdzFbOi77/WajaSMXk2OLm+xNIeQdMMuB7icj7bk6zi2F8GGAxigcnDFpJHbNyNcgyJDiP+8nOrY5cZGrA==
+
+rimraf@^3.0.0:
+ version "3.0.2"
+ resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a"
+ integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==
+ dependencies:
+ glob "^7.1.3"
+
+rxjs@^7.4.0:
+ version "7.4.0"
+ resolved "https://registry.yarnpkg.com/rxjs/-/rxjs-7.4.0.tgz#a12a44d7eebf016f5ff2441b87f28c9a51cebc68"
+ integrity sha512-7SQDi7xeTMCJpqViXh8gL/lebcwlp3d831F05+9B44A4B0WfsEwUQHR64gsH1kvJ+Ep/J9K2+n1hVl1CsGN23w==
+ dependencies:
+ tslib "~2.1.0"
+
+safe-buffer@^5.0.1, safe-buffer@^5.1.2:
+ version "5.2.1"
+ resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
+ integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
+
+safer-buffer@^2.0.2, safer-buffer@^2.1.0, safer-buffer@~2.1.0:
+ version "2.1.2"
+ resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a"
+ integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==
+
+shebang-command@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea"
+ integrity sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==
+ dependencies:
+ shebang-regex "^3.0.0"
+
+shebang-regex@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172"
+ integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==
+
+signal-exit@^3.0.2:
+ version "3.0.6"
+ resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.6.tgz#24e630c4b0f03fea446a2bd299e62b4a6ca8d0af"
+ integrity sha512-sDl4qMFpijcGw22U5w63KmD3cZJfBuFlVNbVMKje2keoKML7X2UzWbc4XrmEbDwg0NXJc3yv4/ox7b+JWb57kQ==
+
+slice-ansi@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-3.0.0.tgz#31ddc10930a1b7e0b67b08c96c2f49b77a789787"
+ integrity sha512-pSyv7bSTC7ig9Dcgbw9AuRNUb5k5V6oDudjZoMBSr13qpLBG7tB+zgCkARjq7xIUgdz5P1Qe8u+rSGdouOOIyQ==
+ dependencies:
+ ansi-styles "^4.0.0"
+ astral-regex "^2.0.0"
+ is-fullwidth-code-point "^3.0.0"
+
+slice-ansi@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-4.0.0.tgz#500e8dd0fd55b05815086255b3195adf2a45fe6b"
+ integrity sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==
+ dependencies:
+ ansi-styles "^4.0.0"
+ astral-regex "^2.0.0"
+ is-fullwidth-code-point "^3.0.0"
+
+sshpk@^1.14.1:
+ version "1.16.1"
+ resolved "https://registry.yarnpkg.com/sshpk/-/sshpk-1.16.1.tgz#fb661c0bef29b39db40769ee39fa70093d6f6877"
+ integrity sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg==
+ dependencies:
+ asn1 "~0.2.3"
+ assert-plus "^1.0.0"
+ bcrypt-pbkdf "^1.0.0"
+ dashdash "^1.12.0"
+ ecc-jsbn "~0.1.1"
+ getpass "^0.1.1"
+ jsbn "~0.1.0"
+ safer-buffer "^2.0.2"
+ tweetnacl "~0.14.0"
+
+string-width@^4.1.0, string-width@^4.2.0:
+ version "4.2.3"
+ resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
+ integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
+ dependencies:
+ emoji-regex "^8.0.0"
+ is-fullwidth-code-point "^3.0.0"
+ strip-ansi "^6.0.1"
+
+strip-ansi@^6.0.0, strip-ansi@^6.0.1:
+ version "6.0.1"
+ resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
+ integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
+ dependencies:
+ ansi-regex "^5.0.1"
+
+strip-final-newline@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad"
+ integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==
+
+supports-color@^7.1.0:
+ version "7.2.0"
+ resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da"
+ integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==
+ dependencies:
+ has-flag "^4.0.0"
+
+supports-color@^8.1.1:
+ version "8.1.1"
+ resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-8.1.1.tgz#cd6fc17e28500cff56c1b86c0a7fd4a54a73005c"
+ integrity sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==
+ dependencies:
+ has-flag "^4.0.0"
+
+throttleit@^1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/throttleit/-/throttleit-1.0.0.tgz#9e785836daf46743145a5984b6268d828528ac6c"
+ integrity sha1-nnhYNtr0Z0MUWlmEtiaNgoUorGw=
+
+through@^2.3.8:
+ version "2.3.8"
+ resolved "https://registry.yarnpkg.com/through/-/through-2.3.8.tgz#0dd4c9ffaabc357960b1b724115d7e0e86a2e1f5"
+ integrity sha1-DdTJ/6q8NXlgsbckEV1+Doai4fU=
+
+tmp@~0.2.1:
+ version "0.2.1"
+ resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.2.1.tgz#8457fc3037dcf4719c251367a1af6500ee1ccf14"
+ integrity sha512-76SUhtfqR2Ijn+xllcI5P1oyannHNHByD80W1q447gU3mp9G9PSpGdWmjUOHRDPiHYacIk66W7ubDTuPF3BEtQ==
+ dependencies:
+ rimraf "^3.0.0"
+
+tough-cookie@~2.5.0:
+ version "2.5.0"
+ resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.5.0.tgz#cd9fb2a0aa1d5a12b473bd9fb96fa3dcff65ade2"
+ integrity sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==
+ dependencies:
+ psl "^1.1.28"
+ punycode "^2.1.1"
+
+tslib@~2.1.0:
+ version "2.1.0"
+ resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.1.0.tgz#da60860f1c2ecaa5703ab7d39bc05b6bf988b97a"
+ integrity sha512-hcVC3wYEziELGGmEEXue7D75zbwIIVUMWAVbHItGPx0ziyXxrOMQx4rQEVEV45Ut/1IotuEvwqPopzIOkDMf0A==
+
+tunnel-agent@^0.6.0:
+ version "0.6.0"
+ resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd"
+ integrity sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0=
+ dependencies:
+ safe-buffer "^5.0.1"
+
+tweetnacl@^0.14.3, tweetnacl@~0.14.0:
+ version "0.14.5"
+ resolved "https://registry.yarnpkg.com/tweetnacl/-/tweetnacl-0.14.5.tgz#5ae68177f192d4456269d108afa93ff8743f4f64"
+ integrity sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=
+
+type-fest@^0.21.3:
+ version "0.21.3"
+ resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.21.3.tgz#d260a24b0198436e133fa26a524a6d65fa3b2e37"
+ integrity sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==
+
+universalify@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/universalify/-/universalify-2.0.0.tgz#75a4984efedc4b08975c5aeb73f530d02df25717"
+ integrity sha512-hAZsKq7Yy11Zu1DE0OzWjw7nnLZmJZYTDZZyEFHZdUhV8FkH5MCfoU1XMaxXovpyW5nq5scPqq0ZDP9Zyl04oQ==
+
+untildify@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/untildify/-/untildify-4.0.0.tgz#2bc947b953652487e4600949fb091e3ae8cd919b"
+ integrity sha512-KK8xQ1mkzZeg9inewmFVDNkg3l5LUhoq9kN6iWYB/CC9YMG8HA+c1Q8HwDe6dEX7kErrEVNVBO3fWsVq5iDgtw==
+
+url@^0.11.0:
+ version "0.11.0"
+ resolved "https://registry.yarnpkg.com/url/-/url-0.11.0.tgz#3838e97cfc60521eb73c525a8e55bfdd9e2e28f1"
+ integrity sha1-ODjpfPxgUh63PFJajlW/3Z4uKPE=
+ dependencies:
+ punycode "1.3.2"
+ querystring "0.2.0"
+
+uuid@^8.3.2:
+ version "8.3.2"
+ resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2"
+ integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==
+
+verror@1.10.0:
+ version "1.10.0"
+ resolved "https://registry.yarnpkg.com/verror/-/verror-1.10.0.tgz#3a105ca17053af55d6e270c1f8288682e18da400"
+ integrity sha1-OhBcoXBTr1XW4nDB+CiGguGNpAA=
+ dependencies:
+ assert-plus "^1.0.0"
+ core-util-is "1.0.2"
+ extsprintf "^1.2.0"
+
+which@^2.0.1:
+ version "2.0.2"
+ resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1"
+ integrity sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==
+ dependencies:
+ isexe "^2.0.0"
+
+wrap-ansi@^6.2.0:
+ version "6.2.0"
+ resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-6.2.0.tgz#e9393ba07102e6c91a3b221478f0257cd2856e53"
+ integrity sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==
+ dependencies:
+ ansi-styles "^4.0.0"
+ string-width "^4.1.0"
+ strip-ansi "^6.0.0"
+
+wrap-ansi@^7.0.0:
+ version "7.0.0"
+ resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
+ integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
+ dependencies:
+ ansi-styles "^4.0.0"
+ string-width "^4.1.0"
+ strip-ansi "^6.0.0"
+
+wrappy@1:
+ version "1.0.2"
+ resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
+ integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=
+
+yauzl@^2.10.0:
+ version "2.10.0"
+ resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9"
+ integrity sha1-x+sXyT4RLLEIb6bY5R+wZnt5pfk=
+ dependencies:
+ buffer-crc32 "~0.2.3"
+ fd-slicer "~1.1.0"
diff --git a/spark-lineage/README.md b/spark-lineage/README.md
new file mode 100644
index 00000000000000..89ef07d45df564
--- /dev/null
+++ b/spark-lineage/README.md
@@ -0,0 +1,88 @@
+# Spark lineage emitter
+The Spark lineage emitter is a java library that provides a Spark listener implementation "DatahubLineageEmitter". The DatahubLineageEmitter listens to events such application start/end, and SQLExecution start/end to create pipelines (i.e. DataJob) and tasks (i.e. DataFlow) in Datahub along with lineage.
+
+## Configuring Spark emitter
+Listener configuration can be done using a config file or while creating a spark Session.
+
+### Config file for spark-submit
+When running jobs using spark-submit, the listener is to be configured in the config file.
+
+```
+spark.master spark://spark-master:7077
+
+#Configuring datahub spark listener jar
+spark.jars.packages io.acryl:spark-lineage:0.0.1
+spark.extraListeners com.linkedin.datahub.lineage.spark.interceptor.DatahubLineageEmitter
+spark.datahub.lineage.mcpEmitter.gmsUrl http://localhost:8080
+```
+
+### Configuring with SparkSession Builder for notebooks
+When running interactive jobs from a notebook, the listener can be configured while building the Spark Session.
+
+```python
+spark = SparkSession.builder \
+ .master("spark://spark-master:7077") \
+ .appName("test-application") \
+ .config("spark.jars.packages","io.acryl:spark-lineage:0.0.1") \
+ .config("spark.extraListeners","com.linkedin.datahub.lineage.interceptor.spark.DatahubLineageEmitter") \
+ .config("spark.datahub.lineage.mcpEmitter.gmsUrl", "http://localhost:8080") \
+ .enableHiveSupport() \
+ .getOrCreate()
+```
+
+## Model mapping
+A pipeline is created per Spark .
+A task is created per unique Spark query execution within an app.
+
+### Custom properties & relating to Spark UI
+The following custom properties in pipelines and tasks relate to the Spark UI:
+- appName and appId in a pipeline can be used to determine the Spark application
+- description and SQLQueryId in a task can be used to determine the Query Execution within the application on the SQL tab of Spark UI
+
+Other custom properties of pipelines and tasks capture the start and end times of execution etc.
+The query plan is captured in the *queryPlan* property of a task.
+
+## Release notes for v0.0.1
+In this version, basic dataset-level lineage is captured using the model mapping as mentioned earlier.
+
+### Spark versions supported
+The primary version tested is Spark/Scala version 2.4.8/2_11.
+We anticipate this to work well with other Spark 2.4.x versions and Scala 2_11.
+
+Support for other Spark versions is planned in the very near future.
+
+### Environments tested with
+This initial release has been tested with the following environments:
+- spark-submit of Python/Java applications to local and remote servers
+- notebooks
+
+Note that testing for other environments such as Databricks and standalone applications is planned in near future.
+
+### Spark commands supported
+Below is a list of Spark commands that are parsed currently:
+- InsertIntoHadoopFsRelationCommand
+- SaveIntoDataSourceCommand (jdbc)
+- CreateHiveTableAsSelectCommand
+- InsertIntoHiveTable
+
+Effectively, these support data sources/sinks corresponding to Hive, HDFS and JDBC.
+
+### Spark commands not yet supported
+- View related commands
+- Cache commands and implications on lineage
+- RDD jobs
+
+### Important notes on usage
+
+- It is advisable to ensure appName is used appropriately to ensure you can trace lineage from a pipeline back to your source code.
+
+- If multiple apps with the same appName run concurrently, dataset-lineage will be captured correctly but the custom-properties e.g. app-id, SQLQueryId would be unreliable. We expect this to be quite rare.
+
+- If spark execution fails, then an empty pipeline would still get created, but it may not have any tasks.
+
+- For HDFS sources, the folder (name) is regarded as the dataset (name) to align with typical storage of parquet/csv formats.
+
+## Known limitations
+- Only postgres supported for JDBC sources in this initial release. Support for other driver URL formats will be added in future.
+- Behavior with cached datasets is not fully specified/defined in context of lineage.
+- There is a possibility that very short-lived jobs that run within a few milliseconds may not be captured by the listener. This should not cause an issue for realistic Spark applications.
diff --git a/spark-lineage/bin/.gitignore b/spark-lineage/bin/.gitignore
new file mode 100644
index 00000000000000..7eed456bec8db3
--- /dev/null
+++ b/spark-lineage/bin/.gitignore
@@ -0,0 +1,2 @@
+/main/
+/test/
diff --git a/spark-lineage/build.gradle b/spark-lineage/build.gradle
new file mode 100644
index 00000000000000..660969de23ab10
--- /dev/null
+++ b/spark-lineage/build.gradle
@@ -0,0 +1,138 @@
+apply plugin: 'java'
+apply plugin: 'com.github.johnrengelman.shadow'
+apply plugin: 'maven'
+apply plugin: 'signing'
+
+dependencies {
+
+ //Needed for tie breaking of guava version need for spark and wiremock
+ compile(externalDependency.hadoopMapreduceClient) {
+ force = true
+ }
+
+ compile(externalDependency.hadoopCommon) {
+ force = true
+ } // required for org.apache.hadoop.util.StopWatch
+
+ compile(externalDependency.commonsIo) {
+ force = true
+ } // required for org.apache.commons.io.Charsets that is used internally
+
+ compileOnly externalDependency.lombok
+ annotationProcessor externalDependency.lombok
+
+ implementation(project(':metadata-models')) {
+ exclude group: "org.antlr"
+ exclude group: "com.google.guava" // causes issues with Guava Stopwatch constructor
+ }
+
+ implementation(externalDependency.sparkSql){
+ exclude group: "org.apache.hadoop"
+ }
+ implementation(externalDependency.sparkHive){
+ exclude group: "org.apache.hadoop"
+ }
+
+ testImplementation(externalDependency.postgresql)
+
+ testImplementation externalDependency.mockito
+
+ testImplementation(externalDependency.wiremock){
+ exclude group: "com.fasterxml.jackson.core"
+ } // older version to allow older guava
+
+ testImplementation(externalDependency.testContainersPostgresql) // older version to allow older jackson
+}
+
+
+
+shadowJar {
+ zip64=true
+ classifier=''
+ dependencies {
+ exclude(dependency("org.apache.hadoop::"))
+ exclude(dependency("org.apache.spark::"))
+ exclude(dependency(externalDependency.commonsIo))
+ }
+}
+
+
+
+test {
+ useJUnit()
+}
+
+assemble {
+ dependsOn shadowJar
+}
+
+task sourceJar(type: Jar) {
+ classifier 'sources'
+ from sourceSets.main.allJava
+}
+
+task javadocJar(type: Jar, dependsOn: javadoc) {
+ classifier 'javadoc'
+ from javadoc.destinationDir
+}
+
+artifacts {
+ archives shadowJar
+}
+
+// uploadArchives {
+// repositories {
+// mavenDeployer {
+// def ossrhUsername = System.getenv('RELEASE_USERNAME')
+// def ossrhPassword = System.getenv('RELEASE_PASSWORD')
+// beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) }
+
+// repository(url: "https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/") {
+// authentication(userName: ossrhUsername, password: ossrhPassword)
+// }
+
+// snapshotRepository(url: "https://s01.oss.sonatype.org/content/repositories/snapshots/") {
+// authentication(userName: ossrhUsername, password: ossrhPassword)
+// }
+
+// pom.project {
+// //No need to specify name here. Name is always picked up from project name
+// //name 'spark-lineage'
+// packaging 'jar'
+// // optionally artifactId can be defined here
+// description 'Library to push data lineage from spark to datahub'
+// url 'https://datahubproject.io'
+
+// scm {
+// connection 'scm:git:git://github.com/linkedin/datahub.git'
+// developerConnection 'scm:git:ssh://github.com:linkedin/datahub.git'
+// url 'https://github.com/linkedin/datahub.git'
+// }
+
+// licenses {
+// license {
+// name 'The Apache License, Version 2.0'
+// url 'http://www.apache.org/licenses/LICENSE-2.0.txt'
+// }
+// }
+
+// developers {
+// developer {
+// id 'datahub'
+// name 'datahub'
+//
+// }
+// }
+// }
+// }
+// }
+// }
+
+
+// signing {
+// def signingKey = findProperty("signingKey")
+// def signingPassword = findProperty("signingPassword")
+// useInMemoryPgpKeys(signingKey, signingPassword)
+// sign configurations.archives
+// }
+
diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/consumer/impl/MCPEmitter.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/consumer/impl/MCPEmitter.java
new file mode 100644
index 00000000000000..017d75c6078375
--- /dev/null
+++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/consumer/impl/MCPEmitter.java
@@ -0,0 +1,67 @@
+package com.linkedin.datahub.lineage.consumer.impl;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkEnv;
+
+import com.linkedin.datahub.lineage.spark.model.LineageConsumer;
+import com.linkedin.datahub.lineage.spark.model.LineageEvent;
+import com.linkedin.mxe.MetadataChangeProposal;
+
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+public class MCPEmitter implements LineageConsumer {
+
+ private static final String GMS_URL_KEY = "spark.datahub.lineage.mcpEmitter.gmsUrl";
+ private static final String SENTINEL = "moot";
+
+ private ConcurrentHashMap singleton = new ConcurrentHashMap<>();
+
+ private void emit(List mcps) {
+ RESTEmitter emitter = emitter();
+ if (emitter != null) {
+ mcps.forEach(mcp -> {
+ log.debug("Emitting \n" + mcp);
+ try {
+ emitter.emit(mcp);
+ } catch (IOException e) {
+ // log error, but don't impact thread
+ StringWriter s = new StringWriter();
+ PrintWriter p = new PrintWriter(s);
+ e.printStackTrace(p);
+ log.error(s.toString());
+ p.close();
+ }
+ });
+ }
+ }
+
+ // TODO ideally the impl here should not be tied to Spark; the LineageConsumer
+ // API needs tweaking to include configs
+ private RESTEmitter emitter() {
+ singleton.computeIfAbsent(SENTINEL, x -> {
+ SparkConf conf = SparkEnv.get().conf();
+ if (conf.contains(GMS_URL_KEY)) {
+ String gmsUrl = conf.get(GMS_URL_KEY);
+ log.debug("REST emitter configured with GMS url " + gmsUrl);
+ return RESTEmitter.create(gmsUrl);
+ }
+
+ log.error("GMS URL not configured.");
+ return null;
+ });
+
+ return singleton.get(SENTINEL);
+ }
+
+ @Override
+ public void accept(LineageEvent evt) {
+ emit(evt.toMcps());
+ }
+}
diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/consumer/impl/RESTEmitter.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/consumer/impl/RESTEmitter.java
new file mode 100644
index 00000000000000..6fd3ffee83e3b8
--- /dev/null
+++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/consumer/impl/RESTEmitter.java
@@ -0,0 +1,85 @@
+package com.linkedin.datahub.lineage.consumer.impl;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.HashMap;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.linkedin.data.template.JacksonDataTemplateCodec;
+import com.linkedin.mxe.MetadataChangeProposal;
+
+import lombok.Getter;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+@RequiredArgsConstructor
+public class RESTEmitter {
+
+ private static final JacksonDataTemplateCodec DATA_TEMPLATE_CODEC = new JacksonDataTemplateCodec();
+
+ @Getter
+ private final String gmsUrl;
+
+ public void emit(MetadataChangeProposal mcp) throws IOException {
+ String payloadJson = DATA_TEMPLATE_CODEC.mapToString(mcp.data());
+ ObjectMapper om = new ObjectMapper();
+ TypeReference> typeRef = new TypeReference>() {
+ };
+ HashMap o = om.readValue(payloadJson, typeRef);
+ while (o.values().remove(null)) {
+
+ }
+
+ payloadJson = om.writeValueAsString(o);
+ payloadJson = "{" + " \"proposal\" :" + payloadJson + "}";
+ log.debug("Emitting payload: " + payloadJson + "\n to URL " + this.gmsUrl + "/aspects?action=ingestProposal");
+ RESTEmitter.makeRequest(this.gmsUrl + "/aspects?action=ingestProposal", "POST", payloadJson);
+ }
+
+ public static boolean makeRequest(String urlStr, String method, String payloadJson) throws IOException {
+ URL url = new URL(urlStr);
+ HttpURLConnection con = (HttpURLConnection) url.openConnection();
+ con.setRequestMethod(method);
+ con.setRequestProperty("Content-Type", "application/json");
+ con.setRequestProperty("X-RestLi-Protocol-Version", "2.0.0");
+// con.setRequestProperty("Accept", "application/json");
+ con.setDoOutput(true);
+ if (payloadJson != null) {
+ try (OutputStream os = con.getOutputStream()) {
+ byte[] input = payloadJson.getBytes("utf-8");
+ os.write(input, 0, input.length);
+ }
+ }
+ try (BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream(), "utf-8"))) {
+ StringBuilder response = new StringBuilder();
+ String responseLine = null;
+ while ((responseLine = br.readLine()) != null) {
+ response.append(responseLine.trim());
+ }
+ log.debug("URL: " + urlStr + " Response: " + response.toString());
+ }
+ return true;
+
+ }
+
+ public boolean testConnection() {
+ try {
+ RESTEmitter.makeRequest(this.gmsUrl + "/config", "GET", null);
+ return true;
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ return false;
+ }
+ }
+
+ public static RESTEmitter create(String gmsUrl) {
+ return new RESTEmitter(gmsUrl);
+ }
+}
\ No newline at end of file
diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/DatahubLineageEmitter.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/DatahubLineageEmitter.java
new file mode 100644
index 00000000000000..8c70f4f8acff65
--- /dev/null
+++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/DatahubLineageEmitter.java
@@ -0,0 +1,290 @@
+package com.linkedin.datahub.lineage.spark.interceptor;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+import org.apache.spark.SparkEnv;
+import org.apache.spark.scheduler.SparkListener;
+import org.apache.spark.scheduler.SparkListenerApplicationEnd;
+import org.apache.spark.scheduler.SparkListenerApplicationStart;
+import org.apache.spark.scheduler.SparkListenerEvent;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.catalyst.plans.QueryPlan;
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
+import org.apache.spark.sql.execution.QueryExecution;
+import org.apache.spark.sql.execution.SQLExecution;
+import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd;
+import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart;
+
+import com.google.common.base.Splitter;
+import com.linkedin.datahub.lineage.spark.model.AppEndEvent;
+import com.linkedin.datahub.lineage.spark.model.AppStartEvent;
+import com.linkedin.datahub.lineage.spark.model.DatasetLineage;
+import com.linkedin.datahub.lineage.spark.model.LineageConsumer;
+import com.linkedin.datahub.lineage.spark.model.SQLQueryExecEndEvent;
+import com.linkedin.datahub.lineage.spark.model.SQLQueryExecStartEvent;
+import com.linkedin.datahub.lineage.spark.model.dataset.SparkDataset;
+
+import lombok.extern.slf4j.Slf4j;
+import scala.collection.JavaConversions;
+import scala.runtime.AbstractFunction1;
+import scala.runtime.AbstractPartialFunction;
+
+@Slf4j
+public class DatahubLineageEmitter extends SparkListener {
+
+ private static final int THREAD_CNT = 10;
+ public static final String CONSUMER_TYPE_KEY = "spark.datahub.lineage.consumerTypes";
+
+ private final Map appDetails = new ConcurrentHashMap<>();
+ private final Map> appSqlDetails = new ConcurrentHashMap<>();
+ private final Map appPoolDetails = new ConcurrentHashMap<>();
+
+// private static LineageConsumer loggingConsumer() {
+// log.warn("Lineage consumer not specified. Defaulting to LoggingConsumer.");
+// return LineageUtils.LOGGING_CONSUMER;
+// }
+
+ private class SqlStartTask implements Runnable {
+
+ private SparkListenerSQLExecutionStart sqlStart;
+ private SparkContext ctx;
+ private LogicalPlan plan;
+
+ public SqlStartTask(SparkListenerSQLExecutionStart sqlStart, LogicalPlan plan, SparkContext ctx) {
+ this.sqlStart = sqlStart;
+ this.plan = plan;
+ this.ctx = ctx;
+ }
+
+ @Override
+ public void run() {
+ appSqlDetails.get(ctx.appName()).put(sqlStart.executionId(),
+ new SQLQueryExecStartEvent(ctx.conf().get("spark.master"), ctx.appName(), ctx.applicationId(),
+ sqlStart.time(), sqlStart.executionId(), null));
+ log.debug("PLAN for execution id: " + ctx.appName() + ":" + sqlStart.executionId() + "\n");
+ log.debug(plan.toString());
+
+ DatasetExtractor extractor = new DatasetExtractor();
+ Optional extends SparkDataset> outputDS = extractor.asDataset(plan, ctx, true);
+ if (!outputDS.isPresent()) {
+ log.debug("Skipping execution as no output dataset present for execution id: " + ctx.appName() + ":"
+ + sqlStart.executionId());
+ return;
+ }
+
+ DatasetLineage lineage = new DatasetLineage(sqlStart.description(), plan.toString(), outputDS.get());
+ Collection> allInners = new ArrayList<>();
+
+ plan.collect(new AbstractPartialFunction() {
+
+ @Override
+ public Void apply(LogicalPlan plan) {
+ log.debug("CHILD " + plan.getClass() + "\n" + plan + "\n-------------\n");
+ Optional extends SparkDataset> inputDS = extractor.asDataset(plan, ctx, false);
+ inputDS.ifPresent(x -> lineage.addSource(x));
+ allInners.addAll(JavaConversions.asJavaCollection(plan.innerChildren()));
+ return null;
+ }
+
+ @Override
+ public boolean isDefinedAt(LogicalPlan x) {
+ return true;
+ }
+ });
+
+ for (QueryPlan> qp : allInners) {
+ if (!(qp instanceof LogicalPlan)) {
+ continue;
+ }
+ LogicalPlan nestedPlan = (LogicalPlan) qp;
+
+ nestedPlan.collect(new AbstractPartialFunction() {
+
+ @Override
+ public Void apply(LogicalPlan plan) {
+ log.debug("INNER CHILD " + plan.getClass() + "\n" + plan + "\n-------------\n");
+ Optional extends SparkDataset> inputDS = extractor.asDataset(plan, ctx, false);
+ inputDS.ifPresent(
+ x -> log.debug("source added for " + ctx.appName() + "/" + sqlStart.executionId() + ": " + x));
+ inputDS.ifPresent(x -> lineage.addSource(x));
+ return null;
+ }
+
+ @Override
+ public boolean isDefinedAt(LogicalPlan x) {
+ return true;
+ }
+ });
+ }
+
+ SQLQueryExecStartEvent evt = new SQLQueryExecStartEvent(ctx.conf().get("spark.master"), ctx.appName(),
+ ctx.applicationId(),
+ sqlStart.time(), sqlStart.executionId(), lineage);
+
+ appSqlDetails.get(ctx.appName()).put(sqlStart.executionId(), evt);
+
+ consumers().forEach(c -> c.accept(evt)); // TODO parallel stream here?
+
+ log.debug("LINEAGE \n" + lineage + "\n");
+ log.debug("Parsed execution id " + ctx.appName() + ":" + sqlStart.executionId());
+
+ return;
+ }
+
+ }
+
+ @Override
+ public void onApplicationStart(SparkListenerApplicationStart applicationStart) {
+ try {
+ log.debug("App started: " + applicationStart);
+ LineageUtils.findSparkCtx().foreach(new AbstractFunction1() {
+
+ @Override
+ public Void apply(SparkContext sc) {
+ String appId = applicationStart.appId().isDefined() ? applicationStart.appId().get() : "";
+ AppStartEvent evt = new AppStartEvent(LineageUtils.getMaster(sc), applicationStart.appName(), appId,
+ applicationStart.time(), applicationStart.sparkUser());
+
+ consumers().forEach(x -> x.accept(evt));
+ // TODO keyed by appName; only latest will be considered. Potential
+ // inconsistencies not mapped.
+ appDetails.put(applicationStart.appName(), evt);
+ appSqlDetails.put(applicationStart.appName(), new ConcurrentHashMap<>());
+ ExecutorService pool = Executors.newFixedThreadPool(THREAD_CNT);
+ appPoolDetails.put(applicationStart.appName(), pool);
+ return null;
+ }
+ });
+ super.onApplicationStart(applicationStart);
+ } catch (Exception e) {
+ // log error, but don't impact thread
+ StringWriter s = new StringWriter();
+ PrintWriter p = new PrintWriter(s);
+ e.printStackTrace(p);
+ log.error(s.toString());
+ p.close();
+ }
+ }
+
+ @Override
+ public void onApplicationEnd(SparkListenerApplicationEnd applicationEnd) {
+ try {
+ LineageUtils.findSparkCtx().foreach(new AbstractFunction1() {
+
+ @Override
+ public Void apply(SparkContext sc) {
+ log.debug("Application end event received for appId :" + sc.appName());
+ AppStartEvent start = appDetails.remove(sc.appName());
+ appPoolDetails.remove(sc.appName()).shutdown();
+ appSqlDetails.remove(sc.appName());
+ if (start == null) {
+ log.error(
+ "Application end event received, but start event missing for appId " + sc.applicationId());
+ } else {
+ AppEndEvent evt = new AppEndEvent(LineageUtils.getMaster(sc), sc.appName(), sc.applicationId(),
+ applicationEnd.time(), start);
+
+ consumers().forEach(x -> x.accept(evt));
+ }
+ return null;
+ }
+ });
+ super.onApplicationEnd(applicationEnd);
+ } catch (Exception e) {
+ // log error, but don't impact thread
+ StringWriter s = new StringWriter();
+ PrintWriter p = new PrintWriter(s);
+ e.printStackTrace(p);
+ log.error(s.toString());
+ p.close();
+ }
+ }
+
+ @Override
+ public void onOtherEvent(SparkListenerEvent event) {
+ try {
+ if (event instanceof SparkListenerSQLExecutionStart) {
+ SparkListenerSQLExecutionStart sqlEvt = (SparkListenerSQLExecutionStart) event;
+ log.debug("SQL Exec start event with id " + sqlEvt.executionId());
+ processExecution(sqlEvt);
+ } else if (event instanceof SparkListenerSQLExecutionEnd) {
+ SparkListenerSQLExecutionEnd sqlEvt = (SparkListenerSQLExecutionEnd) event;
+ log.debug("SQL Exec end event with id " + sqlEvt.executionId());
+ processExecutionEnd(sqlEvt);
+ }
+ } catch (Exception e) {
+ // log error, but don't impact thread
+ StringWriter s = new StringWriter();
+ PrintWriter p = new PrintWriter(s);
+ e.printStackTrace(p);
+ log.error(s.toString());
+ p.close();
+ }
+ }
+
+ public void processExecutionEnd(SparkListenerSQLExecutionEnd sqlEnd) {
+ LineageUtils.findSparkCtx().foreach(new AbstractFunction1() {
+
+ @Override
+ public Void apply(SparkContext sc) {
+ SQLQueryExecStartEvent start = appSqlDetails.get(sc.appName()).remove(sqlEnd.executionId());
+ if (start == null) {
+ log.error("Execution end event received, but start event missing for appId/sql exec Id " + sc.applicationId()
+ + ":" + sqlEnd.executionId());
+ } else if (start.getDatasetLineage() != null) {
+// JobStatus status = jobEnd.jobResult().equals(org.apache.spark.scheduler.JobSucceeded$.MODULE$)
+// ? JobStatus.COMPLETED
+// : JobStatus.FAILED;
+ SQLQueryExecEndEvent evt = new SQLQueryExecEndEvent(LineageUtils.getMaster(sc), sc.appName(),
+ sc.applicationId(),
+ sqlEnd.time(), sqlEnd.executionId(), start);
+
+ consumers().forEach(x -> x.accept(evt));
+ }
+ return null;
+ }
+ });
+ }
+
+ // TODO sqlEvt.details() unused
+ private void processExecution(SparkListenerSQLExecutionStart sqlStart) {
+ QueryExecution queryExec = SQLExecution.getQueryExecution(sqlStart.executionId());
+ if (queryExec == null) {
+ log.error("Skipping processing for sql exec Id" + sqlStart.executionId() + " as Query execution context could not be read from current spark state");
+ return;
+ }
+ LogicalPlan plan = queryExec.optimizedPlan();
+ SparkSession sess = queryExec.sparkSession();
+ SparkContext ctx = sess.sparkContext();
+ ExecutorService pool = appPoolDetails.get(ctx.appName());
+ pool.execute(new SqlStartTask(sqlStart, plan, ctx));
+ }
+
+ private static List consumers() {
+ SparkConf conf = SparkEnv.get().conf();
+ if (conf.contains(CONSUMER_TYPE_KEY)) {
+ String consumerTypes = conf.get(CONSUMER_TYPE_KEY);
+
+ return StreamSupport.stream(Splitter.on(",").trimResults().split(consumerTypes).spliterator(), false)
+ .map(x -> LineageUtils.getConsumer(x)).filter(x -> x != null).collect(Collectors.toList());
+ } else {
+ return Collections.singletonList(LineageUtils.getConsumer("mcpEmitter"));
+ }
+
+ }
+
+}
diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/DatasetExtractor.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/DatasetExtractor.java
new file mode 100644
index 00000000000000..5b4578588fe57b
--- /dev/null
+++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/DatasetExtractor.java
@@ -0,0 +1,156 @@
+package com.linkedin.datahub.lineage.spark.interceptor;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.spark.SparkContext;
+import org.apache.spark.sql.catalyst.catalog.HiveTableRelation;
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
+import org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand;
+import org.apache.spark.sql.execution.datasources.HadoopFsRelation;
+import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand;
+import org.apache.spark.sql.execution.datasources.LogicalRelation;
+import org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand;
+import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions;
+import org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation;
+import org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand;
+import org.apache.spark.sql.hive.execution.InsertIntoHiveTable;
+import org.apache.spark.sql.sources.BaseRelation;
+
+import com.google.common.collect.ImmutableSet;
+import com.linkedin.datahub.lineage.spark.model.dataset.CatalogTableDataset;
+import com.linkedin.datahub.lineage.spark.model.dataset.HdfsPathDataset;
+import com.linkedin.datahub.lineage.spark.model.dataset.JdbcDataset;
+import com.linkedin.datahub.lineage.spark.model.dataset.SparkDataset;
+
+import scala.Option;
+import scala.collection.JavaConversions;
+
+public class DatasetExtractor {
+ private static final Map, PlanToDataset> PLAN_TO_DATASET = new HashMap<>();
+ private static final Map, RelationToDataset> REL_TO_DATASET = new HashMap<>();
+ private static final Set> OUTPUT_CMD = ImmutableSet
+ .of(InsertIntoHadoopFsRelationCommand.class, SaveIntoDataSourceCommand.class,
+ CreateDataSourceTableAsSelectCommand.class, CreateHiveTableAsSelectCommand.class,
+ InsertIntoHiveTable.class);
+ // TODO InsertIntoHiveDirCommand, InsertIntoDataSourceDirCommand
+
+ private static interface PlanToDataset {
+ Optional extends SparkDataset> fromPlanNode(LogicalPlan plan, SparkContext ctx);
+ }
+
+ private static interface RelationToDataset {
+ Optional extends SparkDataset> fromRelation(BaseRelation rel, SparkContext ctx);
+ }
+
+ static {
+ PLAN_TO_DATASET.put(InsertIntoHadoopFsRelationCommand.class, (p, ctx) -> {
+ InsertIntoHadoopFsRelationCommand cmd = (InsertIntoHadoopFsRelationCommand) p;
+ if (cmd.catalogTable().isDefined()) {
+ return Optional.of(new CatalogTableDataset(cmd.catalogTable().get()));
+ }
+ return Optional.of(new HdfsPathDataset(cmd.outputPath()));
+ });
+
+ PLAN_TO_DATASET.put(LogicalRelation.class, (p, ctx) -> {
+ BaseRelation baseRel = ((LogicalRelation) p).relation();
+ if (!REL_TO_DATASET.containsKey(baseRel.getClass())) {
+ return Optional.empty();
+ }
+ return REL_TO_DATASET.get(baseRel.getClass()).fromRelation(baseRel, ctx);
+ });
+
+ PLAN_TO_DATASET.put(SaveIntoDataSourceCommand.class, (p, ctx) -> {
+ /*
+ * BaseRelation relation; if (((SaveIntoDataSourceCommand) p).dataSource()
+ * instanceof RelationProvider) { RelationProvider relProvider =
+ * (RelationProvider) ((SaveIntoDataSourceCommand) p).dataSource(); relation =
+ * relProvider.createRelation(ctx, ((SaveIntoDataSourceCommand) p).options()); }
+ * else { SchemaRelationProvider relProvider = (SchemaRelationProvider)
+ * ((SaveIntoDataSourceCommand) p).dataSource(); relation =
+ * p.createRelation(ctx, ((SaveIntoDataSourceCommand) p).options(), p.schema());
+ * }
+ */
+ SaveIntoDataSourceCommand cmd = (SaveIntoDataSourceCommand) p;
+
+ Map options = JavaConversions.mapAsJavaMap(cmd.options());
+ String url = options.get("url"); // e.g. jdbc:postgresql://localhost:5432/sparktestdb
+ if (!url.contains("jdbc")) {
+ return Optional.empty();
+ }
+
+ String tbl = options.get("dbtable");
+ return Optional.of(new JdbcDataset(url, tbl));
+ });
+
+ PLAN_TO_DATASET.put(CreateDataSourceTableAsSelectCommand.class, (p, ctx) -> {
+ CreateDataSourceTableAsSelectCommand cmd = (CreateDataSourceTableAsSelectCommand) p;
+ // TODO what of cmd.mode()
+ return Optional.of(new CatalogTableDataset(cmd.table()));
+ });
+ PLAN_TO_DATASET.put(CreateHiveTableAsSelectCommand.class, (p, ctx) -> {
+ CreateHiveTableAsSelectCommand cmd = (CreateHiveTableAsSelectCommand) p;
+ return Optional.of(new CatalogTableDataset(cmd.tableDesc()));
+ });
+ PLAN_TO_DATASET.put(InsertIntoHiveTable.class, (p, ctx) -> {
+ InsertIntoHiveTable cmd = (InsertIntoHiveTable) p;
+ return Optional.of(new CatalogTableDataset(cmd.table()));
+ });
+
+ PLAN_TO_DATASET.put(HiveTableRelation.class, (p, ctx) -> {
+ HiveTableRelation cmd = (HiveTableRelation) p;
+ return Optional.of(new CatalogTableDataset(cmd.tableMeta()));
+ });
+
+ REL_TO_DATASET.put(HadoopFsRelation.class, (r, ctx) -> {
+ List res = JavaConversions.asJavaCollection(((HadoopFsRelation) r).location().rootPaths()).stream()
+ .map(p -> getDirectoryPath(p, ctx.hadoopConfiguration()))
+ .distinct()
+ .collect(Collectors.toList());
+
+ // TODO mapping to URN TBD
+ return Optional.of(new HdfsPathDataset(res.get(0)));
+ });
+ REL_TO_DATASET.put(JDBCRelation.class, (r, ctx) -> {
+ JDBCRelation rel = (JDBCRelation) r;
+ Option tbl = rel.jdbcOptions().parameters().get(JDBCOptions.JDBC_TABLE_NAME());
+ if (tbl.isEmpty()) {
+ return Optional.empty();
+ }
+
+ return Optional.of(new JdbcDataset(rel.jdbcOptions().url(), tbl.get()));
+ });
+ }
+
+ Optional extends SparkDataset> asDataset(LogicalPlan logicalPlan, SparkContext ctx, boolean outputNode) {
+ if (!outputNode && OUTPUT_CMD.contains(logicalPlan.getClass())) {
+ return Optional.empty();
+ }
+
+ if (!PLAN_TO_DATASET.containsKey(logicalPlan.getClass())) {
+ return Optional.empty();
+ }
+
+ return PLAN_TO_DATASET.get(logicalPlan.getClass()).fromPlanNode(logicalPlan, ctx);
+ }
+
+ private static Path getDirectoryPath(Path p, Configuration hadoopConf) {
+ try {
+ if (p.getFileSystem(hadoopConf).getFileStatus(p).isFile()) {
+ return p.getParent();
+ } else {
+ return p;
+ }
+ } catch (IOException e) {
+ // log.warn("Unable to get file system for path ", e);
+ return p;
+ }
+ }
+}
diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/LineageUtils.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/LineageUtils.java
new file mode 100644
index 00000000000000..aab25fc85e2ef5
--- /dev/null
+++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/LineageUtils.java
@@ -0,0 +1,125 @@
+package com.linkedin.datahub.lineage.spark.interceptor;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import javax.annotation.Nonnull;
+
+import org.apache.spark.SparkContext;
+import org.apache.spark.SparkContext$;
+import org.apache.spark.sql.SparkSession;
+
+import com.linkedin.common.urn.DataFlowUrn;
+import com.linkedin.data.ByteString;
+import com.linkedin.data.template.JacksonDataTemplateCodec;
+import com.linkedin.data.template.RecordTemplate;
+import com.linkedin.datahub.lineage.consumer.impl.MCPEmitter;
+import com.linkedin.datahub.lineage.spark.model.LineageConsumer;
+import com.linkedin.mxe.GenericAspect;
+
+import lombok.extern.slf4j.Slf4j;
+import scala.Option;
+import scala.runtime.AbstractFunction0;
+import scala.runtime.AbstractFunction1;
+
+@Slf4j
+public class LineageUtils {
+ private static final JacksonDataTemplateCodec DATA_TEMPLATE_CODEC = new JacksonDataTemplateCodec();
+
+ private static Map consumers = new ConcurrentHashMap<>();
+
+ public static final LineageConsumer LOGGING_CONSUMER = (x -> log.info(x.toString()));
+
+ // hook for replacing paths during testing. Not the cleanest way, TODO improve.
+ /* This is for generating urn from a hash of the plan */
+ // private static Function PATH_REPLACER = (x -> x);
+
+ static {
+ // system defined consumers
+ registerConsumer("mcpEmitter", new MCPEmitter());
+ }
+
+ private LineageUtils() {
+
+ }
+
+ // overwrites existing consumer entry of same type
+ public static void registerConsumer(String consumerType, LineageConsumer consumer) {
+ consumers.put(consumerType, consumer);
+ }
+
+ public static LineageConsumer getConsumer(String consumerType) {
+ return consumers.get(consumerType);
+ }
+
+ public static DataFlowUrn flowUrn(String master, String appName) {
+ return new DataFlowUrn("spark", appName, master.replaceAll(":", "_").replaceAll("/", "_").replaceAll("[_]+", "_"));
+ }
+
+ // Taken from GenericAspectUtils
+ public static GenericAspect serializeAspect(@Nonnull RecordTemplate aspect) {
+ GenericAspect genericAspect = new GenericAspect();
+
+ try {
+ String aspectStr = DATA_TEMPLATE_CODEC.mapToString(aspect.data());
+ genericAspect.setValue(
+ ByteString.unsafeWrap(aspectStr.getBytes(StandardCharsets.UTF_8)));
+ genericAspect.setContentType("application/json");
+ return genericAspect;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ }
+
+ public static Option findSparkCtx() {
+ return SparkSession.getActiveSession()
+ .map(new AbstractFunction1() {
+
+ @Override
+ public SparkContext apply(SparkSession sess) {
+ return sess.sparkContext();
+ }
+ })
+ .orElse(new AbstractFunction0