diff --git a/.gitignore b/.gitignore index 631630d64c7fac..331dd6ee7fc76d 100644 --- a/.gitignore +++ b/.gitignore @@ -39,7 +39,17 @@ MANIFEST # Mac OS **/.DS_Store +#spark-lineage +**/spark-lineage/metastore_db/ +**/spark-lineage/**/derby.log +**/spark-lineage/**/hive/ +**/spark-lineage/**/out.csv/ .vscode +# cypress integration test generated files +**/cypress/videos +**/cypress/screenshots +**/cypress/node_modules + # Metadata Ingestion Generated metadata-ingestion/generated/** diff --git a/build.gradle b/build.gradle index 77c30b3680b427..dc9ca7e85a0509 100644 --- a/build.gradle +++ b/build.gradle @@ -3,7 +3,6 @@ buildscript { ext.gmaVersion = '0.2.81' ext.pegasusVersion = '28.3.7' ext.mavenVersion = '3.6.3' - apply from: './repositories.gradle' buildscript.repositories.addAll(project.repositories) dependencies { @@ -11,6 +10,7 @@ buildscript { classpath 'com.github.node-gradle:gradle-node-plugin:2.2.4' classpath 'com.commercehub.gradle.plugin:gradle-avro-plugin:0.8.1' classpath 'org.springframework.boot:spring-boot-gradle-plugin:2.1.4.RELEASE' + classpath 'com.github.jengelman.gradle.plugins:shadow:5.2.0' } } @@ -68,6 +68,8 @@ project.ext.externalDependency = [ 'guava': 'com.google.guava:guava:27.0.1-jre', 'h2': 'com.h2database:h2:1.4.196', 'hadoopClient': 'org.apache.hadoop:hadoop-client:3.1.1', + 'hadoopCommon':'org.apache.hadoop:hadoop-common:2.7.2', + 'hadoopMapreduceClient':'org.apache.hadoop:hadoop-mapreduce-client-core:2.7.2', 'hibernateCore': 'org.hibernate:hibernate-core:5.2.16.Final', 'httpClient': 'org.apache.httpcomponents:httpclient:4.5.9', 'iStackCommons': 'com.sun.istack:istack-commons-runtime:4.0.1', @@ -114,6 +116,8 @@ project.ext.externalDependency = [ 'rythmEngine': 'org.rythmengine:rythm-engine:1.3.0', 'servletApi': 'javax.servlet:javax.servlet-api:3.1.0', 'shiroCore': 'org.apache.shiro:shiro-core:1.7.1', + 'sparkSql' : 'org.apache.spark:spark-sql_2.11:2.4.8', + 'sparkHive' : 'org.apache.spark:spark-hive_2.11:2.4.8', 'springBeans': 'org.springframework:spring-beans:5.2.3.RELEASE', 'springContext': 'org.springframework:spring-context:5.2.3.RELEASE', 'springCore': 'org.springframework:spring-core:5.2.3.RELEASE', @@ -129,7 +133,9 @@ project.ext.externalDependency = [ 'testng': 'org.testng:testng:7.3.0', 'testContainers': 'org.testcontainers:testcontainers:1.15.1', 'testContainersJunit': 'org.testcontainers:junit-jupiter:1.15.1', + 'testContainersPostgresql':'org.testcontainers:postgresql:1.2.0', 'testContainersElasticsearch': 'org.testcontainers:elasticsearch:1.15.3', + 'wiremock':'com.github.tomakehurst:wiremock:2.10.0', 'zookeeper': 'org.apache.zookeeper:zookeeper:3.4.14' ] diff --git a/datahub-web-react/src/images/metabaselogo.png b/datahub-web-react/src/images/metabaselogo.png new file mode 100644 index 00000000000000..c158f33e020bbc Binary files /dev/null and b/datahub-web-react/src/images/metabaselogo.png differ diff --git a/docker/elasticsearch/env/docker.env b/docker/elasticsearch/env/docker.env index 4239de21e50689..511e627dd152b3 100644 --- a/docker/elasticsearch/env/docker.env +++ b/docker/elasticsearch/env/docker.env @@ -1,3 +1,3 @@ discovery.type=single-node xpack.security.enabled=false -ES_JAVA_OPTS=-Xms256m -Xmx256m +ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true diff --git a/docker/monitoring/grafana/dashboards/datahub_dashboard.json b/docker/monitoring/grafana/dashboards/datahub_dashboard.json index 9d19a1e76f2a9c..b25148bd1fd3d5 100644 --- a/docker/monitoring/grafana/dashboards/datahub_dashboard.json +++ b/docker/monitoring/grafana/dashboards/datahub_dashboard.json @@ -1,2080 +1,2161 @@ { - "__inputs": [], - "__requires": [ + "annotations": { + "list": [ { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "8.1.2" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "links": [], - "panels": [ - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 37, - "panels": [], - "title": "Get", - "type": "row" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 37, + "panels": [], + "title": "Get", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 0, - "y": 1 - }, - "id": 40, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_get_Count{}[1m])/60", - "interval": "", - "legendFormat": "Get QPS", - "refId": "A" - }, - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_get_failed_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Get Failure", - "refId": "B" - }, - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "BatchGet QPS", - "refId": "C" - }, - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_failed_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "BatchGet Failure", - "refId": "D" - } - ], - "title": "Get QPS", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 8, + "w": 7, + "x": 0, + "y": 1 + }, + "id": 40, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_get_Count{}[1m])/60", + "interval": "", + "legendFormat": "Get QPS", + "refId": "A" + }, + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_get_failed_Count{}[1m])/60", + "hide": false, + "interval": "", + "legendFormat": "Get Failure", + "refId": "B" + }, + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_Count{}[1m])/60", + "hide": false, + "interval": "", + "legendFormat": "BatchGet QPS", + "refId": "C" + }, + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_failed_Count{}[1m])/60", + "hide": false, + "interval": "", + "legendFormat": "BatchGet Failure", + "refId": "D" + } + ], + "title": "Get QPS", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 7, - "y": 1 - }, - "id": 41, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_Mean{}", - "interval": "", - "legendFormat": "Get Avg", - "refId": "A" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_75thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "Get P75", - "refId": "B" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_95thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "Get P95", - "refId": "C" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_Mean{}", - "hide": false, - "interval": "", - "legendFormat": "BatchGet Avg", - "refId": "D" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_75thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "BatchGet P75", - "refId": "E" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_95thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "BatchGet P95", - "refId": "F" - } - ], - "title": "Get Latency", - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 6, - "panels": [], - "title": "Ingest", - "type": "row" + "gridPos": { + "h": 8, + "w": 7, + "x": 7, + "y": 1 }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 41, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_Mean{}", + "interval": "", + "legendFormat": "Get Avg", + "refId": "A" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_75thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "Get P75", + "refId": "B" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_95thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "Get P95", + "refId": "C" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_Mean{}", + "hide": false, + "interval": "", + "legendFormat": "BatchGet Avg", + "refId": "D" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_75thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "BatchGet P75", + "refId": "E" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_95thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "BatchGet P95", + "refId": "F" + } + ], + "title": "Get Latency", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 6, + "panels": [], + "title": "Ingest", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 0, - "y": 10 - }, - "id": 8, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Ingest Count", - "refId": "E" - }, - { - "exemplar": false, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchIngest_Count{}[1m])/60", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "BatchIngest Count", - "refId": "B" - }, - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_failed_Count[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Ingest Failure", - "refId": "C" - }, - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchIngest_failed_Count[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "BatchIngest Failure", - "refId": "D" - } - ], - "title": "Ingest QPS", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 8, + "w": 7, + "x": 0, + "y": 10 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_Count{}[1m])/60", + "hide": false, + "interval": "", + "legendFormat": "Ingest Count", + "refId": "E" + }, + { + "exemplar": false, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchIngest_Count{}[1m])/60", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "BatchIngest Count", + "refId": "B" + }, + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_failed_Count[1m])/60", + "hide": false, + "interval": "", + "legendFormat": "Ingest Failure", + "refId": "C" + }, + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchIngest_failed_Count[1m])/60", + "hide": false, + "interval": "", + "legendFormat": "BatchIngest Failure", + "refId": "D" + } + ], + "title": "Ingest QPS", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 7, - "y": 10 - }, - "id": 10, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_Mean{}", - "interval": "", - "legendFormat": "Avg", - "refId": "A" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_75thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P75", - "refId": "B" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_95thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P95", - "refId": "C" - } - ], - "title": "Ingest Latency", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 8, + "w": 7, + "x": 7, + "y": 10 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_Mean{}", + "interval": "", + "legendFormat": "Avg", + "refId": "A" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_75thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P75", + "refId": "B" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_95thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P95", + "refId": "C" + } + ], + "title": "Ingest Latency", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 14, - "y": 10 - }, - "id": 21, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_entity_ebean_EbeanEntityService_ingestAspectToLocalDB_Mean{}", - "hide": false, - "interval": "", - "legendFormat": "Ingest To DB", - "refId": "B" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_entity_ebean_EbeanEntityService_produceMAE_Mean{}", - "hide": false, - "interval": "", - "legendFormat": "Produce MAE", - "refId": "C" - } - ], - "title": "Ingest Steps", - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 12, - "panels": [], - "title": "Search", - "type": "row" + "gridPos": { + "h": 8, + "w": 7, + "x": 14, + "y": 10 }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 21, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_entity_ebean_EbeanEntityService_ingestAspectToLocalDB_Mean{}", + "hide": false, + "interval": "", + "legendFormat": "Ingest To DB", + "refId": "B" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_entity_ebean_EbeanEntityService_produceMAE_Mean{}", + "hide": false, + "interval": "", + "legendFormat": "Produce MAE", + "refId": "C" + } + ], + "title": "Ingest Steps", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 0, - "y": 19 - }, - "id": 23, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Count{}[1m])/60", - "interval": "", - "legendFormat": "QPS", - "refId": "A" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 7, + "x": 0, + "y": 18 + }, + "id": 43, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_maeProcess_Mean", + "interval": "", + "legendFormat": "Avg", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_search_failed_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Failure", - "refId": "B" - } - ], - "title": "Search QPS", - "type": "timeseries" + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_maeProcess_75thPercentile", + "hide": false, + "interval": "", + "legendFormat": "P75", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_maeProcess_95thPercentile", + "hide": false, + "interval": "", + "legendFormat": "P95", + "refId": "C" + } + ], + "title": "MAE Process Latency", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 12, + "panels": [], + "title": "Search", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 7, - "y": 19 - }, - "id": 29, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Mean{}", - "interval": "", - "legendFormat": "Avg", - "refId": "A" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_75thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P75", - "refId": "B" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_95thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P95", - "refId": "C" - } - ], - "title": "Search Latency", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 8, + "w": 7, + "x": 0, + "y": 27 + }, + "id": 23, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Count{}[1m])/60", + "interval": "", + "legendFormat": "QPS", + "refId": "A" + }, + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_search_failed_Count{}[1m])/60", + "hide": false, + "interval": "", + "legendFormat": "Failure", + "refId": "B" + } + ], + "title": "Search QPS", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 14, - "y": 19 - }, - "id": 25, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESSearchDAO_esSearch_Mean{}", - "hide": false, - "interval": "", - "legendFormat": "ES Search", - "refId": "A" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESSearchDAO_searchRequest_Mean{}", - "hide": false, - "interval": "", - "legendFormat": "Request Builder", - "refId": "D" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Mean{}", - "hide": false, - "interval": "", - "legendFormat": "Total Search", - "refId": "B" - } - ], - "title": "Search Steps", - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 27, - "panels": [], - "title": "Browse", - "type": "row" + "gridPos": { + "h": 8, + "w": 7, + "x": 7, + "y": 27 }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Mean{}", + "interval": "", + "legendFormat": "Avg", + "refId": "A" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_75thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P75", + "refId": "B" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_95thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P95", + "refId": "C" + } + ], + "title": "Search Latency", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 0, - "y": 28 - }, - "id": 28, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Count{}[1m])/60", - "interval": "", - "legendFormat": "QPS", - "refId": "A" - }, - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_failed_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Failure", - "refId": "B" - } - ], - "title": "Browse QPS", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 8, + "w": 7, + "x": 14, + "y": 27 + }, + "id": 25, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESSearchDAO_esSearch_Mean{}", + "hide": false, + "interval": "", + "legendFormat": "ES Search", + "refId": "A" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESSearchDAO_searchRequest_Mean{}", + "hide": false, + "interval": "", + "legendFormat": "Request Builder", + "refId": "D" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Mean{}", + "hide": false, + "interval": "", + "legendFormat": "Total Search", + "refId": "B" + } + ], + "title": "Search Steps", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 27, + "panels": [], + "title": "Browse", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 7, - "y": 28 - }, - "id": 24, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Mean{}", - "interval": "", - "legendFormat": "Avg", - "refId": "A" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_75thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P75", - "refId": "B" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_95thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P95", - "refId": "C" - } - ], - "title": "Browse Latency", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 8, + "w": 7, + "x": 0, + "y": 36 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Count{}[1m])/60", + "interval": "", + "legendFormat": "QPS", + "refId": "A" + }, + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_failed_Count{}[1m])/60", + "hide": false, + "interval": "", + "legendFormat": "Failure", + "refId": "B" + } + ], + "title": "Browse QPS", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 14, - "y": 28 - }, - "id": 35, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESBrowseDAO_esGroupSearch_Mean{}", - "hide": false, - "interval": "", - "legendFormat": "ES Groups Query", - "refId": "A" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESBrowseDAO_esEntitiesSearch_Mean{}", - "hide": false, - "interval": "", - "legendFormat": "ES Entities Query", - "refId": "D" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Mean{}", - "hide": false, - "interval": "", - "legendFormat": "Total Browse", - "refId": "B" - } - ], - "title": "Browse Steps", - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 36 - }, - "id": 32, - "panels": [], - "title": "Graph", - "type": "row" + "gridPos": { + "h": 8, + "w": 7, + "x": 7, + "y": 36 }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 24, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Mean{}", + "interval": "", + "legendFormat": "Avg", + "refId": "A" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_75thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P75", + "refId": "B" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_95thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P95", + "refId": "C" + } + ], + "title": "Browse Latency", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 0, - "y": 37 - }, - "id": 33, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_Count{}[1m])/60", - "interval": "", - "legendFormat": "Relationships QPS", - "refId": "A" - }, - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Lineage_get_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Lineage QPS", - "refId": "B" - }, - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_failed_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Relationships Failure", - "refId": "C" - }, - { - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Lineage_get_failed_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Lineage Failure", - "refId": "D" - } - ], - "title": "Graph QPS", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 8, + "w": 7, + "x": 14, + "y": 36 + }, + "id": 35, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESBrowseDAO_esGroupSearch_Mean{}", + "hide": false, + "interval": "", + "legendFormat": "ES Groups Query", + "refId": "A" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESBrowseDAO_esEntitiesSearch_Mean{}", + "hide": false, + "interval": "", + "legendFormat": "ES Entities Query", + "refId": "D" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Mean{}", + "hide": false, + "interval": "", + "legendFormat": "Total Browse", + "refId": "B" + } + ], + "title": "Browse Steps", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 32, + "panels": [], + "title": "Graph", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 7, - "y": 37 - }, - "id": 34, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_Mean{}", - "interval": "", - "legendFormat": "Avg", - "refId": "A" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_75thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P75", - "refId": "B" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_95thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P95", - "refId": "C" - } - ], - "title": "Graph Latency", - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 45 - }, - "id": 4, - "panels": [], - "title": "Kafka", - "type": "row" + "gridPos": { + "h": 8, + "w": 7, + "x": 0, + "y": 45 }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 33, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_Count{}[1m])/60", + "interval": "", + "legendFormat": "Relationships QPS", + "refId": "A" + }, + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Lineage_get_Count{}[1m])/60", + "hide": false, + "interval": "", + "legendFormat": "Lineage QPS", + "refId": "B" + }, + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_failed_Count{}[1m])/60", + "hide": false, + "interval": "", + "legendFormat": "Relationships Failure", + "refId": "C" + }, + { + "exemplar": true, + "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Lineage_get_failed_Count{}[1m])/60", + "hide": false, + "interval": "", + "legendFormat": "Lineage Failure", + "refId": "D" + } + ], + "title": "Graph QPS", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 46 - }, - "id": 18, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "sum by (topic) (kafka_producer_producer_topic_metrics_record_send_rate{})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Producer Metrics", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 8, + "w": 7, + "x": 7, + "y": 45 + }, + "id": 34, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_Mean{}", + "interval": "", + "legendFormat": "Avg", + "refId": "A" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_75thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P75", + "refId": "B" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_95thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P95", + "refId": "C" + } + ], + "title": "Graph Latency", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 53 + }, + "id": 4, + "panels": [], + "title": "Kafka", + "type": "row" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 46 - }, - "id": 19, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "sum by (client_id) (kafka_consumer_consumer_metrics_request_rate{})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Consumer Metrics", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 54 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum by (topic) (kafka_producer_producer_topic_metrics_record_send_rate{})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Producer Metrics", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 54 - }, - "id": 15, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeEventsProcessor_kafkaLag_Mean{}", - "interval": "", - "legendFormat": "Avg", - "refId": "A" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeEventsProcessor_kafkaLag_75thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P75", - "refId": "B" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeEventsProcessor_kafkaLag_95thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P95", - "refId": "C" - } - ], - "title": "MetadataChangeEvent Topic Lag", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 54 + }, + "id": 19, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum by (client_id) (kafka_consumer_consumer_metrics_request_rate{})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Consumer Metrics", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 54 - }, - "id": 13, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeProposalsProcessor_kafkaLag_Mean{}", - "interval": "", - "legendFormat": "Avg", - "refId": "A" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeProposalsProcessor_kafkaLag_75thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P75", - "refId": "B" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeProposalsProcessor_kafkaLag_95thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P95", - "refId": "C" - } - ], - "title": "MetadataChangeProposal Topic Lag", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 62 + }, + "id": 15, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeEventsProcessor_kafkaLag_Mean{}", + "interval": "", + "legendFormat": "Avg", + "refId": "A" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeEventsProcessor_kafkaLag_75thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P75", + "refId": "B" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeEventsProcessor_kafkaLag_95thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P95", + "refId": "C" + } + ], + "title": "MetadataChangeEvent Topic Lag", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 63 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_kafkaLag_Mean{}", - "interval": "", - "legendFormat": "Avg", - "refId": "A" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_kafkaLag_75thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P75", - "refId": "B" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_kafkaLag_95thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P95", - "refId": "C" - } - ], - "title": "MetadataAuditEvent Topic Lag", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 62 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeProposalsProcessor_kafkaLag_Mean{}", + "interval": "", + "legendFormat": "Avg", + "refId": "A" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeProposalsProcessor_kafkaLag_75thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P75", + "refId": "B" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeProposalsProcessor_kafkaLag_95thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P95", + "refId": "C" + } + ], + "title": "MetadataChangeProposal Topic Lag", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 63 - }, - "id": 14, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_kafkaLag_Mean{}", - "interval": "", - "legendFormat": "Avg", - "refId": "A" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_kafkaLag_75thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P75", - "refId": "B" - }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_kafkaLag_95thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P95", - "refId": "C" - } - ], - "title": "MetadataChangeLog Topic Lag", - "type": "timeseries" + "overrides": [] }, - { - "datasource": null, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 71 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_kafkaLag_Mean{}", + "interval": "", + "legendFormat": "Avg", + "refId": "A" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_kafkaLag_75thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P75", + "refId": "B" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_kafkaLag_95thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P95", + "refId": "C" + } + ], + "title": "MetadataAuditEvent Topic Lag", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 71 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_kafkaLag_Mean{}", + "interval": "", + "legendFormat": "Avg", + "refId": "A" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_kafkaLag_75thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P75", + "refId": "B" + }, + { + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_kafkaLag_95thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P95", + "refId": "C" + } + ], + "title": "MetadataChangeLog Topic Lag", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 72 - }, - "id": 16, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "targets": [ - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_DataHubUsageEventsProcessor_kafkaLag_Mean{}", - "interval": "", - "legendFormat": "Avg", - "refId": "A" + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 80 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_DataHubUsageEventsProcessor_kafkaLag_75thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P75", - "refId": "B" + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_DataHubUsageEventsProcessor_kafkaLag_Mean{}", + "interval": "", + "legendFormat": "Avg", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_DataHubUsageEventsProcessor_kafkaLag_95thPercentile{}", - "hide": false, - "interval": "", - "legendFormat": "P95", - "refId": "C" - } - ], - "title": "DataHubUsageEvent Topic Lag", - "type": "timeseries" - } - ], - "refresh": "5s", - "schemaVersion": 30, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "DataHub Dashboard", - "uid": "x4fS54Vnk", - "version": 3 - } \ No newline at end of file + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_DataHubUsageEventsProcessor_kafkaLag_75thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P75", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "metrics_com_linkedin_metadata_kafka_DataHubUsageEventsProcessor_kafkaLag_95thPercentile{}", + "hide": false, + "interval": "", + "legendFormat": "P95", + "refId": "C" + } + ], + "title": "DataHubUsageEvent Topic Lag", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 33, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "DataHub Dashboard", + "uid": "x4fS54Vnk", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/docker/monitoring/prometheus.yaml b/docker/monitoring/prometheus.yaml index 6916f60a8af915..c28a38fef1e7fe 100644 --- a/docker/monitoring/prometheus.yaml +++ b/docker/monitoring/prometheus.yaml @@ -3,6 +3,7 @@ scrape_configs: scrape_interval: 10s static_configs: - targets: + - 'host.docker.internal:4318' - 'datahub-gms:4318' - 'datahub-mae-consumer:4318' - 'datahub-mce-consumer:4318' \ No newline at end of file diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 7a70b77bc6f6e6..88ca38c63a6df3 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -71,7 +71,7 @@ services: environment: - discovery.type=single-node - xpack.security.enabled=false - - ES_JAVA_OPTS=-Xms256m -Xmx256m + - ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true healthcheck: retries: 4 start_period: 2m diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index 1232e63a69add0..918fa336768f1f 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -75,7 +75,7 @@ services: environment: - discovery.type=single-node - xpack.security.enabled=false - - ES_JAVA_OPTS=-Xms256m -Xmx256m + - ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true healthcheck: retries: 4 start_period: 2m diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index e20d407991eaba..2f59da6a10f8ab 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -98,6 +98,7 @@ module.exports = { "docs/lineage/airflow", "docker/airflow/local_airflow", "docs/lineage/sample_code", + "spark-lineage/README", ], }, { @@ -227,6 +228,7 @@ module.exports = { { Advanced: [ "docs/advanced/no-code-modeling", + "docs/advanced/db-retention", "docs/advanced/aspect-versioning", "docs/advanced/es-7-upgrade", "docs/advanced/high-cardinality", diff --git a/docs/advanced/db-retention.md b/docs/advanced/db-retention.md new file mode 100644 index 00000000000000..41f59bfcf93eed --- /dev/null +++ b/docs/advanced/db-retention.md @@ -0,0 +1,79 @@ +# Configuring Database Retention + +## Goal + +DataHub uses a database (or key-value store) to store different versions of the aspects as they get ingested. Storing +multiple versions of the aspects allows us to look at the history of how the aspect changed and to rollback to previous +version when incorrect metadata gets ingested. However, each version takes up space in the database, while bringing less +value to the system. We need to be able to impose **retention** on these records to keep the size of the DB in check. + +Goal of the retention system is to be able to **configure and enforce retention policies** on documents in various +levels ( +global, entity-level, aspect-level) + +## What type of retention policies are supported? + +We support 3 types of retention policies. + +1. Indefinite retention: Keep all versions of aspects +2. Version-based retention: Keep the latest N versions +3. Time-based retention: Keep versions that have been ingested in the last N seconds + +Note, the latest version of each aspect (version 0) is never deleted. This is to ensure that we do not impact the core +functionality of DataHub while applying retention. + +## When is the retention policy applied? + +As of now, retention policies are applied in two places + +1. **GMS boot-up**: On boot, it runs a bootstrap step to ingest the predefined set of retention policies. If there were + no existing policies or the existing policies got updated, it will trigger an asynchronous call to apply retention + to ** + all** records in the database. +2. **Ingest**: On every ingest, if an existing aspect got updated, it applies retention to the urn, aspect pair being + ingested. + +We are planning to support a cron-based application of retention in the near future to ensure that the time-based +retention is applied correctly. + +## How to configure? + +For the initial iteration, we have made this feature opt-in. Please set **ENTITY_SERVICE_ENABLE_RETENTION=true** when +creating the datahub-gms container/k8s pod. + +On GMS start up, it fetches the list of retention policies to ingest from two sources. First is the default we provide, +which adds a version-based retention to keep 20 latest aspects for all entity-aspect pairs. Second, we read YAML files +from the `/etc/datahub/plugins/retention` directory and overlay them on the default set of policies we provide. + +For docker, we set docker-compose to mount `${HOME}/.datahub/plugins` directory to `/etc/datahub/plugins` directory +within the containers, so you can customize the initial set of retention policies by creating +a `${HOME}/.datahub/plugins/retention/retention.yaml` file. + +We will support a standardized way to do this in kubernetes setup in the near future. + +The format for the YAML file is as follows. + +```yaml +- entity: "*" # denotes that policy will be applied to all entities + aspect: "*" # denotes that policy will be applied to all aspects + config: + retention: + version: + maxVersions: 20 +- entity: "dataset" + aspect: "datasetProperties" + config: + retention: + version: + maxVersions: 20 + time: + maxAgeInSeconds: 2592000 # 30 days +``` + +Note, it searches for the policies corresponding to the entity, aspect pair in the following order +1. entity, aspect +2. *, aspect +3. entity, * +4. *, * + +By restarting datahub-gms after creating the plugin yaml file, the new set of retention policies will be applied. \ No newline at end of file diff --git a/docs/quickstart.md b/docs/quickstart.md index b4256dfbbb48ca..05b083ce47b45b 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -52,6 +52,8 @@ To cleanse DataHub of all of it's state (e.g. before ingesting your own), you ca datahub docker nuke ``` +If you want to delete the containers but keep the data you can add `--keep-data` flag to the command. This allows you to run the `quickstart` command to get DataHub running with your data that was ingested earlier. + ## Troubleshooting ### Command not found: datahub diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index 6ce0128179fad4..d19ccac08abd5d 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -49,6 +49,7 @@ Sources: | [ldap](./source_docs/ldap.md) | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source | | [looker](./source_docs/looker.md) | `pip install 'acryl-datahub[looker]'` | Looker source | | [lookml](./source_docs/lookml.md) | `pip install 'acryl-datahub[lookml]'` | LookML source, requires Python 3.7+ | +| [metabase](./source_docs/metabase.md) | `pip install 'acryl-datahub[metabase]` | Metabase source | | [mode](./source_docs/mode.md) | `pip install 'acryl-datahub[mode]'` | Mode Analytics source | | [mongodb](./source_docs/mongodb.md) | `pip install 'acryl-datahub[mongodb]'` | MongoDB source | | [mssql](./source_docs/mssql.md) | `pip install 'acryl-datahub[mssql]'` | SQL Server source | diff --git a/metadata-ingestion/examples/recipes/metabase_to_rest.yml b/metadata-ingestion/examples/recipes/metabase_to_rest.yml new file mode 100644 index 00000000000000..6d4a7e841b3803 --- /dev/null +++ b/metadata-ingestion/examples/recipes/metabase_to_rest.yml @@ -0,0 +1,16 @@ +# see https://datahubproject.io/docs/metadata-ingestion/source_docs/metabase for complete documentation +source: + type: "metabase" + config: + username: admin@metabase.com + password: admin12345 + connect_uri: http://localhost:3000/ + default_schema: public + database_alias_map: + h2: sample-dataset.db + +# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation +sink: + type: "datahub-rest" + config: + server: "http://localhost:8080" diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index b3a776f511f1f5..c0afd76928d814 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -112,6 +112,7 @@ def get_long_description(): "ldap": {"python-ldap>=2.4"}, "looker": looker_common, "lookml": looker_common | {"lkml>=1.1.0", "sql-metadata==2.2.2"}, + "metabase": {"requests"}, "mode": {"requests", "sqllineage"}, "mongodb": {"pymongo>=3.11"}, "mssql": sql_common | {"sqlalchemy-pytds>=0.3"}, @@ -298,6 +299,7 @@ def get_long_description(): "snowflake-usage = datahub.ingestion.source.usage.snowflake_usage:SnowflakeUsageSource", "superset = datahub.ingestion.source.superset:SupersetSource", "openapi = datahub.ingestion.source.openapi:OpenApiSource", + "metabase = datahub.ingestion.source.metabase:MetabaseSource", "trino = datahub.ingestion.source.sql.trino:TrinoSource", "starburst-trino-usage = datahub.ingestion.source.usage.starburst_trino_usage:TrinoUsageSource", "nifi = datahub.ingestion.source.nifi:NifiSource", diff --git a/metadata-ingestion/source_docs/metabase.md b/metadata-ingestion/source_docs/metabase.md new file mode 100644 index 00000000000000..b62765618733d9 --- /dev/null +++ b/metadata-ingestion/source_docs/metabase.md @@ -0,0 +1,99 @@ +# Metabase + +For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md). + +## Setup + +To install this plugin, run `pip install 'acryl-datahub[metabase]'`. + +See documentation for Metabase's API at https://www.metabase.com/learn/administration/metabase-api.html +for more details on Metabase's login api. + + +## Capabilities + +This plugin extracts Charts, dashboards, and associated metadata. This plugin is in beta and has only been tested +on PostgreSQL and H2 database. + +### Dashboard + +[/api/dashboard](https://www.metabase.com/docs/latest/api-documentation.html#dashboard) endpoint is used to +retrieve the following dashboard information. + +- Title and description +- Last edited by +- Owner +- Link to the dashboard in Metabase +- Associated charts + +### Chart + +[/api/card](https://www.metabase.com/docs/latest/api-documentation.html#card) endpoint is used to +retrieve the following information. + +- Title and description +- Last edited by +- Owner +- Link to the chart in Metabase +- Datasource and lineage + +The following properties for a chart are ingested in DataHub. + +| Name | Description | +| ------------- | ----------------------------------------------- | +| `Dimensions` | Column names | +| `Filters` | Any filters applied to the chart | +| `Metrics` | All columns that are being used for aggregation | + + +## Quickstart recipe + +Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options. + +For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes). + +```yml +source: + type: metabase + config: + # Coordinates + connect_uri: http://localhost:3000 + + # Credentials + username: user + password: pass + + # Options + default_schema: public + database_alias_map: + h2: sample-dataset.db + +sink: + # sink configs +``` + +## Config details + + +| Field | Required | Default | Description | +| -------------------- | -------- | ------------------ |------------------------------------------------------------------------| +| `connect_uri` | ✅ | `"localhost:8088"` | Metabase host URL. | +| `username` | ✅ | | Metabase username. | +| `password` | ✅ | | Metabase password. | +| `database_alias_map` | | | Database name map to use when constructing dataset URN. | +| `default_schema` | | `public` | Default schema name to use when schema is not provided in an SQL query | +| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. | + + +DataHub will try to determine database name from Metabase [api/database](https://www.metabase.com/docs/latest/api-documentation.html#database) +payload. However, the name can be overridden from `database_alias_map` for a given database connected to Metabase. + +## Compatibility + +Metabase version [v0.41.2](https://www.metabase.com/start/oss/) + + +## Questions + +If you've got any questions on configuring this source, feel free to ping us on +[our Slack](https://slack.datahubproject.io/)! diff --git a/metadata-ingestion/source_docs/snowflake.md b/metadata-ingestion/source_docs/snowflake.md index 1947998fc1bbd1..8c6989bbd548b2 100644 --- a/metadata-ingestion/source_docs/snowflake.md +++ b/metadata-ingestion/source_docs/snowflake.md @@ -139,19 +139,23 @@ sink: Note that a `.` is used to denote nested fields in the YAML recipe. -| Field | Required | Default | Description | -| ----------------- | -------- | -------------------------------------------------------------- | --------------------------------------------------------------- | -| `username` | | | Snowflake username. | -| `password` | | | Snowflake password. | -| `host_port` | ✅ | | Snowflake host URL. | -| `warehouse` | | | Snowflake warehouse. | -| `role` | | | Snowflake role. | -| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. | -| `bucket_duration` | | `"DAY"` | Duration to bucket usage events by. Can be `"DAY"` or `"HOUR"`. | -| `start_time` | | Last full day in UTC (or hour, depending on `bucket_duration`) | Earliest date of usage logs to consider. | -| `end_time` | | Last full day in UTC (or hour, depending on `bucket_duration`) | Latest date of usage logs to consider. | -| `top_n_queries` | | `10` | Number of top queries to save to each table. | +| Field | Required | Default | Description | +| ----------------- | -------- | --------------------------------------------------------------------| --------------------------------------------------------------- | +| `username` | | | Snowflake username. | +| `password` | | | Snowflake password. | +| `host_port` | ✅ | | Snowflake host URL. | +| `warehouse` | | | Snowflake warehouse. | +| `role` | | | Snowflake role. | +| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. | +| `bucket_duration` | | `"DAY"` | Duration to bucket usage events by. Can be `"DAY"` or `"HOUR"`. | +| `start_time` | | Last full day in UTC (or hour, depending on `bucket_duration`) | Earliest date of usage logs to consider. | +| `end_time` | | Last full day in UTC (or hour, depending on `bucket_duration`) | Latest date of usage logs to consider. | +| `top_n_queries` | | `10` | Number of top queries to save to each table. | +| `database_pattern`| | `"^UTIL_DB$" `
`"^SNOWFLAKE$"`
`"^SNOWFLAKE_SAMPLE_DATA$" | Allow/deny patterns for db in snowflake dataset names. | +| `schema_pattern` | | | Allow/deny patterns for schema in snowflake dataset names. | +| `view_pattern` | | | Allow/deny patterns for views in snowflake dataset names. | +| `table_pattern` | | | Allow/deny patterns for tables in snowflake dataset names. | ### Compatibility Coming soon! diff --git a/metadata-ingestion/src/datahub/cli/docker.py b/metadata-ingestion/src/datahub/cli/docker.py index 1c44c477481704..c94f50dd2e5fe8 100644 --- a/metadata-ingestion/src/datahub/cli/docker.py +++ b/metadata-ingestion/src/datahub/cli/docker.py @@ -365,7 +365,14 @@ def ingest_sample_data(path: Optional[str]) -> None: @docker.command() @telemetry.with_telemetry -def nuke() -> None: +@click.option( + "--keep-data", + type=bool, + is_flag=True, + default=False, + help="Delete data volumes", +) +def nuke(keep_data: bool) -> None: """Remove all Docker containers, networks, and volumes associated with DataHub.""" with get_client_with_error() as (client, error): @@ -381,11 +388,14 @@ def nuke() -> None: ): container.remove(v=True, force=True) - click.echo("Removing volumes in the datahub project") - for volume in client.volumes.list( - filters={"label": "com.docker.compose.project=datahub"} - ): - volume.remove(force=True) + if keep_data: + click.echo("Skipping deleting data volumes in the datahub project") + else: + click.echo("Removing volumes in the datahub project") + for volume in client.volumes.list( + filters={"label": "com.docker.compose.project=datahub"} + ): + volume.remove(force=True) click.echo("Removing networks in the datahub project") for network in client.networks.list( diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index f33546358e2201..b3d974f8704ddd 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -32,15 +32,9 @@ def get_sys_time() -> int: return int(time.time() * 1000) -def _check_data_platform_name(platform_name: str) -> None: - if not platform_name.isalpha(): - logger.warning(f"improperly formatted data platform: {platform_name}") - - def make_data_platform_urn(platform: str) -> str: if platform.startswith("urn:li:dataPlatform:"): return platform - _check_data_platform_name(platform) return f"urn:li:dataPlatform:{platform}" @@ -94,13 +88,11 @@ def make_data_job_urn( def make_dashboard_urn(platform: str, name: str) -> str: # FIXME: dashboards don't currently include data platform urn prefixes. - _check_data_platform_name(platform) return f"urn:li:dashboard:({platform},{name})" def make_chart_urn(platform: str, name: str) -> str: # FIXME: charts don't currently include data platform urn prefixes. - _check_data_platform_name(platform) return f"urn:li:chart:({platform},{name})" diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index cc6fadeb982adc..ec03b0f0779f3e 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -6,7 +6,6 @@ from avrogen.dict_wrapper import DictWrapper from requests.models import HTTPError -from requests.sessions import Session from datahub.configuration.common import ConfigModel, OperationalError from datahub.emitter.rest_emitter import DatahubRestEmitter @@ -40,11 +39,10 @@ def __init__(self, config: DatahubClientConfig) -> None: ca_certificate_path=self.config.ca_certificate_path, ) self.test_connection() - self.g_session = Session() def _get_generic(self, url: str) -> Dict: try: - response = self.g_session.get(url) + response = self._session.get(url) response.raise_for_status() return response.json() except HTTPError as e: @@ -67,7 +65,7 @@ def get_aspect( aspect_type: Type[Aspect], ) -> Optional[Aspect]: url = f"{self._gms_server}/aspects/{urllib.parse.quote(entity_urn)}?aspect={aspect}&version=0" - response = self.g_session.get(url) + response = self._session.get(url) if response.status_code == 404: # not found return None @@ -103,7 +101,7 @@ def get_usage_aspects_from_urn( url = f"{self._gms_server}/aspects?action=getTimeseriesAspectValues" try: usage_aspects: List[DatasetUsageStatisticsClass] = [] - response = self.g_session.post( + response = self._session.post( url, data=json.dumps(payload), headers=headers ) if response.status_code != 200: @@ -135,7 +133,7 @@ def list_all_entity_urns( "Content-Type": "application/json", } try: - response = self.g_session.post( + response = self._session.post( url, data=json.dumps(payload), headers=headers ) if response.status_code != 200: diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py new file mode 100644 index 00000000000000..87331addf1ba70 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -0,0 +1,516 @@ +from functools import lru_cache +from typing import Dict, Iterable, Optional + +import dateutil.parser as dp +import requests +from pydantic import validator +from requests.models import HTTPError +from sqllineage.runner import LineageRunner + +import datahub.emitter.mce_builder as builder +from datahub.configuration.common import ConfigModel +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.source import Source, SourceReport +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.metadata.com.linkedin.pegasus2avro.common import ( + AuditStamp, + ChangeAuditStamps, +) +from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import ( + ChartSnapshot, + DashboardSnapshot, +) +from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent +from datahub.metadata.schema_classes import ( + ChartInfoClass, + ChartQueryClass, + ChartQueryTypeClass, + ChartTypeClass, + DashboardInfoClass, + OwnerClass, + OwnershipClass, + OwnershipTypeClass, +) +from datahub.utilities import config_clean + + +class MetabaseConfig(ConfigModel): + # See the Metabase /api/session endpoint for details + # https://www.metabase.com/docs/latest/api-documentation.html#post-apisession + connect_uri: str = "localhost:3000" + username: Optional[str] = None + password: Optional[str] = None + database_alias_map: Optional[dict] = None + default_schema: str = "public" + env: str = builder.DEFAULT_ENV + + @validator("connect_uri") + def remove_trailing_slash(cls, v): + return config_clean.remove_trailing_slashes(v) + + +class MetabaseSource(Source): + config: MetabaseConfig + report: SourceReport + platform = "metabase" + + def __hash__(self): + return id(self) + + def __init__(self, ctx: PipelineContext, config: MetabaseConfig): + super().__init__(ctx) + self.config = config + self.report = SourceReport() + + login_response = requests.post( + f"{self.config.connect_uri}/api/session", + None, + { + "username": self.config.username, + "password": self.config.password, + }, + ) + + login_response.raise_for_status() + self.access_token = login_response.json().get("id", "") + + self.session = requests.session() + self.session.headers.update( + { + "X-Metabase-Session": f"{self.access_token}", + "Content-Type": "application/json", + "Accept": "*/*", + } + ) + + # Test the connection + try: + test_response = self.session.get( + f"{self.config.connect_uri}/api/user/current" + ) + test_response.raise_for_status() + except HTTPError as e: + self.report.report_failure( + key="metabase-session", + reason=f"Unable to retrieve user {self.config.username} information. %s" + % str(e), + ) + + def close(self) -> None: + response = requests.delete( + f"{self.config.connect_uri}/api/session", + headers={"X-Metabase-Session": self.access_token}, + ) + if response.status_code not in (200, 204): + self.report.report_failure( + key="metabase-session", + reason=f"Unable to logout for user {self.config.username}", + ) + + def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]: + try: + dashboard_response = self.session.get( + f"{self.config.connect_uri}/api/dashboard" + ) + dashboard_response.raise_for_status() + dashboards = dashboard_response.json() + + for dashboard_info in dashboards: + dashboard_snapshot = self.construct_dashboard_from_api_data( + dashboard_info + ) + if dashboard_snapshot is not None: + mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot) + wu = MetadataWorkUnit(id=dashboard_snapshot.urn, mce=mce) + self.report.report_workunit(wu) + yield wu + + except HTTPError as http_error: + self.report.report_failure( + key="metabase-dashboard", + reason=f"Unable to retrieve dashboards. " f"Reason: {str(http_error)}", + ) + + def construct_dashboard_from_api_data( + self, dashboard_info: dict + ) -> Optional[DashboardSnapshot]: + + dashboard_id = dashboard_info.get("id", "") + dashboard_url = f"{self.config.connect_uri}/api/dashboard/{dashboard_id}" + try: + dashboard_response = self.session.get(dashboard_url) + dashboard_response.raise_for_status() + dashboard_details = dashboard_response.json() + except HTTPError as http_error: + self.report.report_failure( + key=f"metabase-dashboard-{dashboard_id}", + reason=f"Unable to retrieve dashboard. " f"Reason: {str(http_error)}", + ) + return None + + dashboard_urn = builder.make_dashboard_urn( + self.platform, dashboard_details.get("id", "") + ) + dashboard_snapshot = DashboardSnapshot( + urn=dashboard_urn, + aspects=[], + ) + last_edit_by = dashboard_details.get("last-edit-info") or {} + modified_actor = builder.make_user_urn(last_edit_by.get("email", "unknown")) + modified_ts = int( + dp.parse(f"{last_edit_by.get('timestamp', 'now')}").timestamp() * 1000 + ) + title = dashboard_details.get("name", "") or "" + description = dashboard_details.get("description", "") or "" + last_modified = ChangeAuditStamps( + created=AuditStamp(time=modified_ts, actor=modified_actor), + lastModified=AuditStamp(time=modified_ts, actor=modified_actor), + ) + + chart_urns = [] + cards_data = dashboard_details.get("ordered_cards", "{}") + for card_info in cards_data: + chart_urn = builder.make_chart_urn(self.platform, card_info.get("id", "")) + chart_urns.append(chart_urn) + + dashboard_info_class = DashboardInfoClass( + description=description, + title=title, + charts=chart_urns, + lastModified=last_modified, + dashboardUrl=f"{self.config.connect_uri}/dashboard/{dashboard_id}", + customProperties={}, + ) + dashboard_snapshot.aspects.append(dashboard_info_class) + + # Ownership + ownership = self._get_ownership(dashboard_details.get("creator_id", "")) + if ownership is not None: + dashboard_snapshot.aspects.append(ownership) + + return dashboard_snapshot + + @lru_cache(maxsize=None) + def _get_ownership(self, creator_id: int) -> Optional[OwnershipClass]: + user_info_url = f"{self.config.connect_uri}/api/user/{creator_id}" + try: + user_info_response = self.session.get(user_info_url) + user_info_response.raise_for_status() + user_details = user_info_response.json() + except HTTPError as http_error: + self.report.report_failure( + key=f"metabase-user-{creator_id}", + reason=f"Unable to retrieve User info. " f"Reason: {str(http_error)}", + ) + return None + + owner_urn = builder.make_user_urn(user_details.get("email", "")) + if owner_urn is not None: + ownership: OwnershipClass = OwnershipClass( + owners=[ + OwnerClass( + owner=owner_urn, + type=OwnershipTypeClass.DATAOWNER, + ) + ] + ) + return ownership + + return None + + def emit_card_mces(self) -> Iterable[MetadataWorkUnit]: + try: + card_response = self.session.get(f"{self.config.connect_uri}/api/card") + card_response.raise_for_status() + cards = card_response.json() + + for card_info in cards: + chart_snapshot = self.construct_card_from_api_data(card_info) + if chart_snapshot is not None: + mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot) + wu = MetadataWorkUnit(id=chart_snapshot.urn, mce=mce) + self.report.report_workunit(wu) + yield wu + + except HTTPError as http_error: + self.report.report_failure( + key="metabase-cards", + reason=f"Unable to retrieve cards. " f"Reason: {str(http_error)}", + ) + return None + + def construct_card_from_api_data(self, card_data: dict) -> Optional[ChartSnapshot]: + card_id = card_data.get("id", "") + card_url = f"{self.config.connect_uri}/api/card/{card_id}" + try: + card_response = self.session.get(card_url) + card_response.raise_for_status() + card_details = card_response.json() + except HTTPError as http_error: + self.report.report_failure( + key=f"metabase-card-{card_id}", + reason=f"Unable to retrieve Card info. " f"Reason: {str(http_error)}", + ) + return None + + chart_urn = builder.make_chart_urn(self.platform, card_id) + chart_snapshot = ChartSnapshot( + urn=chart_urn, + aspects=[], + ) + + last_edit_by = card_details.get("last-edit-info") or {} + modified_actor = builder.make_user_urn(last_edit_by.get("email", "unknown")) + modified_ts = int( + dp.parse(f"{last_edit_by.get('timestamp', 'now')}").timestamp() * 1000 + ) + last_modified = ChangeAuditStamps( + created=AuditStamp(time=modified_ts, actor=modified_actor), + lastModified=AuditStamp(time=modified_ts, actor=modified_actor), + ) + + chart_type = self._get_chart_type( + card_details.get("id", ""), card_details.get("display") + ) + description = card_details.get("description") or "" + title = card_details.get("name") or "" + datasource_urn = self.get_datasource_urn(card_details) + custom_properties = self.construct_card_custom_properties(card_details) + + chart_info = ChartInfoClass( + type=chart_type, + description=description, + title=title, + lastModified=last_modified, + chartUrl=f"{self.config.connect_uri}/card/{card_id}", + inputs=datasource_urn, + customProperties=custom_properties, + ) + chart_snapshot.aspects.append(chart_info) + + if card_details.get("query_type", "") == "native": + raw_query = ( + card_details.get("dataset_query", {}).get("native", {}).get("query", "") + ) + chart_query_native = ChartQueryClass( + rawQuery=raw_query, + type=ChartQueryTypeClass.SQL, + ) + chart_snapshot.aspects.append(chart_query_native) + + # Ownership + ownership = self._get_ownership(card_details.get("creator_id", "")) + if ownership is not None: + chart_snapshot.aspects.append(ownership) + + return chart_snapshot + + def _get_chart_type(self, card_id: int, display_type: str) -> Optional[str]: + type_mapping = { + "table": ChartTypeClass.TABLE, + "bar": ChartTypeClass.BAR, + "line": ChartTypeClass.LINE, + "row": ChartTypeClass.BAR, + "area": ChartTypeClass.AREA, + "pie": ChartTypeClass.PIE, + "funnel": ChartTypeClass.BAR, + "scatter": ChartTypeClass.SCATTER, + "scalar": ChartTypeClass.TEXT, + "smartscalar": ChartTypeClass.TEXT, + "pivot": ChartTypeClass.TABLE, + "waterfall": ChartTypeClass.BAR, + "progress": None, + "combo": None, + "gauge": None, + "map": None, + } + if not display_type: + self.report.report_warning( + key=f"metabase-card-{card_id}", + reason=f"Card type {display_type} is missing. Setting to None", + ) + return None + try: + chart_type = type_mapping[display_type] + except KeyError: + self.report.report_warning( + key=f"metabase-card-{card_id}", + reason=f"Chart type {display_type} not supported. Setting to None", + ) + chart_type = None + + return chart_type + + def construct_card_custom_properties(self, card_details: dict) -> Dict: + result_metadata = card_details.get("result_metadata", []) + metrics, dimensions = [], [] + for meta_data in result_metadata: + display_name = meta_data.get("display_name", "") or "" + metrics.append(display_name) if "aggregation" in meta_data.get( + "field_ref", "" + ) else dimensions.append(display_name) + + filters = (card_details.get("dataset_query", {}).get("query", {})).get( + "filter", [] + ) + + custom_properties = { + "Metrics": ", ".join(metrics), + "Filters": f"{filters}" if len(filters) else "", + "Dimensions": ", ".join(dimensions), + } + + return custom_properties + + def get_datasource_urn(self, card_details): + platform, database_name = self.get_datasource_from_id( + card_details.get("database_id", "") + ) + query_type = card_details.get("dataset_query", {}).get("type", {}) + source_paths = set() + + if query_type == "query": + source_table_id = ( + card_details.get("dataset_query", {}) + .get("query", {}) + .get("source-table", {}) + ) + schema_name, table_name = self.get_source_table_from_id(source_table_id) + if table_name: + source_paths.add( + f"{schema_name + '.' if schema_name else ''}{table_name}" + ) + else: + try: + raw_query = ( + card_details.get("dataset_query", {}) + .get("native", {}) + .get("query", "") + ) + parser = LineageRunner(raw_query) + + for table in parser.source_tables: + sources = str(table).split(".") + source_schema, source_table = sources[-2], sources[-1] + if source_schema == "": + source_schema = str(self.config.default_schema) + + source_paths.add(f"{source_schema}.{source_table}") + except Exception as e: + self.report.report_failure( + key="metabase-query", + reason=f"Unable to retrieve lineage from query. " + f"Query: {raw_query} " + f"Reason: {str(e)} ", + ) + return None + + # Create dataset URNs + dataset_urn = [] + dbname = f"{database_name + '.' if database_name else ''}" + source_tables = list(map(lambda tbl: f"{dbname}{tbl}", source_paths)) + dataset_urn = [ + builder.make_dataset_urn(platform, name, self.config.env) + for name in source_tables + ] + + return dataset_urn + + @lru_cache(maxsize=None) + def get_source_table_from_id(self, table_id): + try: + dataset_response = self.session.get( + f"{self.config.connect_uri}/api/table/{table_id}" + ) + dataset_response.raise_for_status() + dataset_json = dataset_response.json() + schema = dataset_json.get("schema", "") + name = dataset_json.get("name", "") + return schema, name + + except HTTPError as http_error: + self.report.report_failure( + key=f"metabase-table-{table_id}", + reason=f"Unable to retrieve source table. " + f"Reason: {str(http_error)}", + ) + + return None, None + + @lru_cache(maxsize=None) + def get_datasource_from_id(self, datasource_id): + try: + dataset_response = self.session.get( + f"{self.config.connect_uri}/api/database/{datasource_id}" + ) + dataset_response.raise_for_status() + dataset_json = dataset_response.json() + except HTTPError as http_error: + self.report.report_failure( + key=f"metabase-datasource-{datasource_id}", + reason=f"Unable to retrieve Datasource. " f"Reason: {str(http_error)}", + ) + return None, None + + # Map engine names to what datahub expects in + # https://github.com/linkedin/datahub/blob/master/metadata-service/war/src/main/resources/boot/data_platforms.json + engine = dataset_json.get("engine", "") + platform = engine + + engine_mapping = { + "sparksql": "spark", + "mongo": "mongodb", + "presto-jdbc": "presto", + "sqlserver": "mssql", + "bigquery-cloud-sdk": "bigquery", + } + if engine in engine_mapping: + platform = engine_mapping[engine] + else: + self.report.report_warning( + key=f"metabase-platform-{datasource_id}", + reason=f"Platform was not found in DataHub. Using {platform} name as is", + ) + + field_for_dbname_mapping = { + "postgres": "dbname", + "sparksql": "dbname", + "mongo": "dbname", + "redshift": "db", + "snowflake": "db", + "presto-jdbc": "catalog", + "presto": "catalog", + "mysql": "dbname", + "sqlserver": "db", + } + + dbname = ( + dataset_json.get("details", {}).get(field_for_dbname_mapping[engine]) + if engine in field_for_dbname_mapping + else None + ) + + if ( + self.config.database_alias_map is not None + and platform in self.config.database_alias_map + ): + dbname = self.config.database_alias_map[platform] + else: + self.report.report_warning( + key=f"metabase-dbname-{datasource_id}", + reason=f"Cannot determine database name for platform: {platform}", + ) + + return platform, dbname + + @classmethod + def create(cls, config_dict: dict, ctx: PipelineContext) -> Source: + config = MetabaseConfig.parse_obj(config_dict) + return cls(ctx, config) + + def get_workunits(self) -> Iterable[MetadataWorkUnit]: + yield from self.emit_dashboard_mces() + yield from self.emit_card_mces() + + def get_report(self) -> SourceReport: + return self.report diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py index 692eaba595c9a8..93cc1ce281ae2c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py @@ -203,6 +203,8 @@ def _get_upstream_lineage_info( for lineage_entry in lineage: # Update the table-lineage upstream_table_name = lineage_entry[0] + if not self._is_dataset_allowed(upstream_table_name): + continue upstream_table = UpstreamClass( dataset=builder.make_dataset_urn( self.platform, upstream_table_name, self.config.env @@ -229,8 +231,9 @@ def _get_upstream_lineage_info( ) column_lineage[column_lineage_key] = column_lineage_value logger.debug(f"{column_lineage_key}:{column_lineage_value}") - - return UpstreamLineage(upstreams=upstream_tables), column_lineage + if upstream_tables: + return UpstreamLineage(upstreams=upstream_tables), column_lineage + return None # Override the base class method. def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: @@ -288,3 +291,18 @@ def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: # Emit the work unit from super. yield wu + + def _is_dataset_allowed(self, dataset_name: Optional[str]) -> bool: + # View lineages is not supported. Add the allow/deny pattern for that when it is supported. + if dataset_name is None: + return True + dataset_params = dataset_name.split(".") + if len(dataset_params) != 3: + return True + if ( + not self.config.database_pattern.allowed(dataset_params[0]) + or not self.config.schema_pattern.allowed(dataset_params[1]) + or not self.config.table_pattern.allowed(dataset_params[2]) + ): + return False + return True diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 28af83929cd7e1..470e28a8a163f3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -123,6 +123,7 @@ def make_sqlalchemy_uri( class SQLSourceReport(SourceReport): tables_scanned: int = 0 views_scanned: int = 0 + entities_profiled: int = 0 filtered: List[str] = field(default_factory=list) query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None @@ -138,6 +139,9 @@ def report_entity_scanned(self, name: str, ent_type: str = "table") -> None: else: raise KeyError(f"Unknown entity {ent_type}.") + def report_entity_profiled(self, name: str) -> None: + self.entities_profiled += 1 + def report_dropped(self, ent_name: str) -> None: self.filtered.append(ent_name) @@ -642,12 +646,12 @@ def loop_profiler_requests( dataset_name = self.get_identifier( schema=schema, entity=table, inspector=inspector ) - self.report.report_entity_scanned(f"profile of {dataset_name}") if not sql_config.profile_pattern.allowed(dataset_name): self.report.report_dropped(f"profile of {dataset_name}") continue + self.report.report_entity_profiled(dataset_name) yield GEProfilerRequest( pretty_name=dataset_name, batch_kwargs=self.prepare_profiler_args(schema=schema, table=table), diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py index 6c8fd051c2c352..2c616b879802c3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py @@ -12,6 +12,7 @@ from sqlalchemy.engine import Engine import datahub.emitter.mce_builder as builder +from datahub.configuration.common import AllowDenyPattern from datahub.configuration.time_window_config import get_time_bucket from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit @@ -33,7 +34,7 @@ query_history.query_text, query_history.query_type, access_history.base_objects_accessed, - -- access_history.direct_objects_accessed, -- might be useful in the future + access_history.direct_objects_accessed, -- when dealing with views, direct objects will show the view while base will show the underlying table -- query_history.execution_status, -- not really necessary, but should equal "SUCCESS" -- query_history.warehouse_name, access_history.user_name, @@ -82,6 +83,7 @@ class SnowflakeJoinedAccessEvent(PermissiveModel): query_text: str query_type: str base_objects_accessed: List[SnowflakeObjectAccessEntry] + direct_objects_accessed: List[SnowflakeObjectAccessEntry] user_name: str first_name: Optional[str] @@ -94,6 +96,13 @@ class SnowflakeJoinedAccessEvent(PermissiveModel): class SnowflakeUsageConfig(BaseSnowflakeConfig, BaseUsageConfig): env: str = builder.DEFAULT_ENV options: dict = {} + database_pattern: AllowDenyPattern = AllowDenyPattern( + deny=[r"^UTIL_DB$", r"^SNOWFLAKE$", r"^SNOWFLAKE_SAMPLE_DATA$"] + ) + schema_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() + table_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() + view_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() + apply_view_usage_to_tables: bool = False @pydantic.validator("role", always=True) def role_accountadmin(cls, v): @@ -161,14 +170,61 @@ def _get_snowflake_history(self) -> Iterable[SnowflakeJoinedAccessEvent]: if event_dict["query_text"] is None: continue - def is_unsupported_base_object_accessed(obj: Dict[str, Any]) -> bool: + def is_unsupported_object_accessed(obj: Dict[str, Any]) -> bool: unsupported_keys = ["locations"] return any([obj.get(key) is not None for key in unsupported_keys]) + def is_dataset_pattern_allowed( + dataset_name: Optional[Any], dataset_type: Optional[Any] + ) -> bool: + # TODO: support table/view patterns for usage logs by pulling that information as well from the usage query + if not dataset_type or not dataset_name: + return True + + table_or_view_pattern: Optional[ + AllowDenyPattern + ] = AllowDenyPattern.allow_all() + # Test domain type = external_table and then add it + table_or_view_pattern = ( + self.config.table_pattern + if dataset_type.lower() in {"table"} + else ( + self.config.view_pattern + if dataset_type.lower() in {"view", "materialized_view"} + else None + ) + ) + if table_or_view_pattern is None: + return True + + dataset_params = dataset_name.split(".") + assert len(dataset_params) == 3 + if ( + not self.config.database_pattern.allowed(dataset_params[0]) + or not self.config.schema_pattern.allowed(dataset_params[1]) + or not table_or_view_pattern.allowed(dataset_params[2]) + ): + return False + return True + + def is_object_valid(obj: Dict[str, Any]) -> bool: + if is_unsupported_object_accessed( + obj + ) or not is_dataset_pattern_allowed( + obj.get("objectName"), obj.get("objectDomain") + ): + return False + return True + event_dict["base_objects_accessed"] = [ obj for obj in json.loads(event_dict["base_objects_accessed"]) - if not is_unsupported_base_object_accessed(obj) + if is_object_valid(obj) + ] + event_dict["direct_objects_accessed"] = [ + obj + for obj in json.loads(event_dict["direct_objects_accessed"]) + if is_object_valid(obj) ] event_dict["query_start_time"] = ( event_dict["query_start_time"] @@ -195,9 +251,13 @@ def _aggregate_access_events( event.query_start_time, self.config.bucket_duration ) - for object in event.base_objects_accessed: + accessed_data = ( + event.base_objects_accessed + if self.config.apply_view_usage_to_tables + else event.direct_objects_accessed + ) + for object in accessed_data: resource = object.objectName - agg_bucket = datasets[floored_ts].setdefault( resource, AggregatedDataset(bucket_start_time=floored_ts, resource=resource), diff --git a/metadata-ingestion/src/datahub/integrations/airflow/__init__.py b/metadata-ingestion/src/datahub/integrations/airflow/__init__.py deleted file mode 100644 index a6cb3c58d24ee5..00000000000000 --- a/metadata-ingestion/src/datahub/integrations/airflow/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -import warnings - -try: - from datahub_provider.lineage.datahub import ( - DatahubLineageBackend as DatahubAirflowLineageBackend, - ) -except ModuleNotFoundError: - # Compat for older versions of Airflow. - pass - -warnings.warn( - "importing from datahub.integrations.airflow.* is deprecated; " - "use datahub_provider.{hooks,operators,lineage}.datahub instead" -) diff --git a/metadata-ingestion/src/datahub/integrations/airflow/entities.py b/metadata-ingestion/src/datahub/integrations/airflow/entities.py deleted file mode 100644 index b9471c24f02275..00000000000000 --- a/metadata-ingestion/src/datahub/integrations/airflow/entities.py +++ /dev/null @@ -1,8 +0,0 @@ -import warnings - -from datahub_provider.entities import Dataset, _Entity # noqa: F401 - -warnings.warn( - "importing from datahub.integrations.airflow.* is deprecated; " - "use datahub_provider.{hooks,operators,lineage}.datahub instead" -) diff --git a/metadata-ingestion/src/datahub/integrations/airflow/hooks.py b/metadata-ingestion/src/datahub/integrations/airflow/hooks.py deleted file mode 100644 index 31008b2d6bc07c..00000000000000 --- a/metadata-ingestion/src/datahub/integrations/airflow/hooks.py +++ /dev/null @@ -1,12 +0,0 @@ -import warnings - -from datahub_provider.hooks.datahub import ( # noqa: F401 - DatahubGenericHook, - DatahubKafkaHook, - DatahubRestHook, -) - -warnings.warn( - "importing from datahub.integrations.airflow.* is deprecated; " - "use datahub_provider.{hooks,operators,lineage}.datahub instead" -) diff --git a/metadata-ingestion/src/datahub/integrations/airflow/operators.py b/metadata-ingestion/src/datahub/integrations/airflow/operators.py deleted file mode 100644 index f889dca581d34a..00000000000000 --- a/metadata-ingestion/src/datahub/integrations/airflow/operators.py +++ /dev/null @@ -1,11 +0,0 @@ -import warnings - -from datahub_provider.operators.datahub import ( # noqa: F401 - DatahubBaseOperator, - DatahubEmitterOperator, -) - -warnings.warn( - "importing from datahub.integrations.airflow.* is deprecated; " - "use datahub_provider.{hooks,operators,lineage}.datahub instead" -) diff --git a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json new file mode 100644 index 00000000000000..127988ba381d7b --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json @@ -0,0 +1,206 @@ +[ +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { + "urn": "urn:li:dashboard:(metabase,1)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { + "customProperties": {}, + "externalUrl": null, + "title": "Dashboard 1", + "description": "", + "charts": [ + "urn:li:chart:(metabase,1)", + "urn:li:chart:(metabase,2)" + ], + "lastModified": { + "created": { + "time": 1639417721742, + "actor": "urn:li:corpuser:admin@metabase.com", + "impersonator": null + }, + "lastModified": { + "time": 1639417721742, + "actor": "urn:li:corpuser:admin@metabase.com", + "impersonator": null + }, + "deleted": null + }, + "dashboardUrl": "http://localhost:3000/dashboard/1", + "access": null, + "lastRefreshed": null + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:admin@metabase.com", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + } + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1636614000000, + "runId": "metabase-test", + "registryName": null, + "registryVersion": null, + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { + "urn": "urn:li:chart:(metabase,1)", + "aspects": [ + { + "com.linkedin.pegasus2avro.chart.ChartInfo": { + "customProperties": { + "Metrics": "", + "Filters": "", + "Dimensions": "customer_id, first_name, last_name, amount, payment_date, rental_id" + }, + "externalUrl": null, + "title": "Customer Payment", + "description": "", + "lastModified": { + "created": { + "time": 1639417592792, + "actor": "urn:li:corpuser:admin@metabase.com", + "impersonator": null + }, + "lastModified": { + "time": 1639417592792, + "actor": "urn:li:corpuser:admin@metabase.com", + "impersonator": null + }, + "deleted": null + }, + "chartUrl": "http://localhost:3000/card/1", + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:postgres,dvdrental.public.payment,PROD)" + } + ], + "type": "TABLE", + "access": null, + "lastRefreshed": null + } + }, + { + "com.linkedin.pegasus2avro.chart.ChartQuery": { + "rawQuery": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date", + "type": "SQL" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:admin@metabase.com", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + } + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1636614000000, + "runId": "metabase-test", + "registryName": null, + "registryVersion": null, + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { + "urn": "urn:li:chart:(metabase,2)", + "aspects": [ + { + "com.linkedin.pegasus2avro.chart.ChartInfo": { + "customProperties": { + "Metrics": "Count", + "Filters": "['=', ['field', 136, None], 2006]", + "Dimensions": "Rating" + }, + "externalUrl": null, + "title": "Films, Count, Grouped by Rating, Filtered by Release Year, Sorted by [Unknown Field] descending", + "description": "", + "lastModified": { + "created": { + "time": 1639417717110, + "actor": "urn:li:corpuser:admin@metabase.com", + "impersonator": null + }, + "lastModified": { + "time": 1639417717110, + "actor": "urn:li:corpuser:admin@metabase.com", + "impersonator": null + }, + "deleted": null + }, + "chartUrl": "http://localhost:3000/card/2", + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:postgres,dvdrental.public.film,PROD)" + } + ], + "type": "BAR", + "access": null, + "lastRefreshed": null + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:admin@metabase.com", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + } + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1636614000000, + "runId": "metabase-test", + "registryName": null, + "registryVersion": null, + "properties": null + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/metabase_to_file.yml b/metadata-ingestion/tests/integration/metabase/metabase_to_file.yml new file mode 100644 index 00000000000000..3a5984d7ad36b5 --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/metabase_to_file.yml @@ -0,0 +1,15 @@ +run_id: metabase-test + +source: + type: "metabase" + config: + username: admin@metabase.com + password: admin12345 + connect_uri: http://localhost:3000/ + database_alias_map: + h2: sample-dataset.db + +sink: + type: file + config: + filename: "./metabase_mces.json" diff --git a/metadata-ingestion/tests/integration/metabase/setup/card.json b/metadata-ingestion/tests/integration/metabase/setup/card.json new file mode 100644 index 00000000000000..439edbf60014fa --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/card.json @@ -0,0 +1,307 @@ +[{ + "description": null, + "archived": false, + "collection_position": null, + "table_id": null, + "result_metadata": [{ + "name": "customer_id", + "display_name": "customer_id", + "base_type": "type/Integer", + "effective_type": "type/Integer", + "field_ref": ["field", "customer_id", { + "base-type": "type/Integer" + }], + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 517, + "nil%": 0.0 + }, + "type": { + "type/Number": { + "min": 1.0, + "q1": 127.95550051624855, + "q3": 457.48181481488376, + "max": 599.0, + "sd": 183.35453319901166, + "avg": 293.316 + } + } + } + }, { + "name": "first_name", + "display_name": "first_name", + "base_type": "type/Text", + "effective_type": "type/Text", + "field_ref": ["field", "first_name", { + "base-type": "type/Text" + }], + "semantic_type": "type/Name", + "fingerprint": { + "global": { + "distinct-count": 509, + "nil%": 0.0 + }, + "type": { + "type/Text": { + "percent-json": 0.0, + "percent-url": 0.0, + "percent-email": 0.0, + "percent-state": 0.0035, + "average-length": 5.629 + } + } + } + }, { + "name": "last_name", + "display_name": "last_name", + "base_type": "type/Text", + "effective_type": "type/Text", + "field_ref": ["field", "last_name", { + "base-type": "type/Text" + }], + "semantic_type": "type/Name", + "fingerprint": { + "global": { + "distinct-count": 517, + "nil%": 0.0 + }, + "type": { + "type/Text": { + "percent-json": 0.0, + "percent-url": 0.0, + "percent-email": 0.0, + "percent-state": 0.0015, + "average-length": 6.126 + } + } + } + }, { + "name": "amount", + "display_name": "amount", + "base_type": "type/Decimal", + "effective_type": "type/Decimal", + "field_ref": ["field", "amount", { + "base-type": "type/Decimal" + }], + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 11, + "nil%": 0.0 + }, + "type": { + "type/Number": { + "min": 0.99, + "q1": 2.399411317392306, + "q3": 5.52734176879965, + "max": 10.99, + "sd": 2.352151368009511, + "avg": 4.1405 + } + } + } + }, { + "name": "payment_date", + "display_name": "payment_date", + "base_type": "type/DateTime", + "effective_type": "type/DateTime", + "field_ref": ["field", "payment_date", { + "base-type": "type/DateTime" + }], + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 1998, + "nil%": 0.0 + }, + "type": { + "type/DateTime": { + "earliest": "2007-02-14T21:21:59.996577Z", + "latest": "2007-02-21T19:27:46.996577Z" + } + } + } + }, { + "name": "rental_id", + "display_name": "rental_id", + "base_type": "type/Integer", + "effective_type": "type/Integer", + "field_ref": ["field", "rental_id", { + "base-type": "type/Integer" + }], + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 2000, + "nil%": 0.0 + }, + "type": { + "type/Number": { + "min": 1158.0, + "q1": 1731.7967120913397, + "q3": 2871.359273326854, + "max": 4591.0, + "sd": 660.7468728104022, + "avg": 2303.4565 + } + } + } + }], + "creator": { + "email": "admin@metabase.com", + "first_name": "FirstName", + "last_login": "2021-12-13T18:51:32.999", + "is_qbnewb": true, + "is_superuser": true, + "id": 1, + "last_name": "LastName", + "date_joined": "2021-12-13T07:34:21.806", + "common_name": "FirstName LastName" + }, + "database_id": 2, + "enable_embedding": false, + "collection_id": null, + "query_type": "native", + "name": "Customer Payment", + "creator_id": 1, + "updated_at": "2021-12-13T17:48:40.478", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "dataset_query": { + "type": "native", + "native": { + "query": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date", + "template-tags": {} + }, + "database": 2 + }, + "id": 1, + "display": "table", + "last-edit-info": { + "id": 1, + "email": "admin@metabase.com", + "first_name": "FirstName", + "last_name": "LastName", + "timestamp": "2021-12-13T17:46:32.792" + }, + "visualization_settings": { + "table.pivot_column": "amount", + "table.cell_column": "customer_id" + }, + "collection": null, + "favorite": false, + "created_at": "2021-12-13T17:46:32.77", + "public_uuid": null +}, { + "description": null, + "archived": false, + "collection_position": null, + "table_id": 21, + "result_metadata": [{ + "semantic_type": "type/Category", + "coercion_strategy": null, + "name": "rating", + "field_ref": ["field", 131, null], + "effective_type": "type/*", + "id": 131, + "display_name": "Rating", + "fingerprint": { + "global": { + "distinct-count": 5, + "nil%": 0.0 + }, + "type": { + "type/Text": { + "percent-json": 0.0, + "percent-url": 0.0, + "percent-email": 0.0, + "percent-state": 0.0, + "average-length": 2.926 + } + } + }, + "base_type": "type/PostgresEnum" + }, { + "name": "count", + "display_name": "Count", + "base_type": "type/BigInteger", + "effective_type": "type/BigInteger", + "semantic_type": "type/Quantity", + "field_ref": ["aggregation", 0], + "fingerprint": { + "global": { + "distinct-count": 5, + "nil%": 0.0 + }, + "type": { + "type/Number": { + "min": 178.0, + "q1": 190.0, + "q3": 213.25, + "max": 223.0, + "sd": 17.131841699011815, + "avg": 200.0 + } + } + } + }], + "creator": { + "email": "admin@metabase.com", + "first_name": "FirstName", + "last_login": "2021-12-13T18:51:32.999", + "is_qbnewb": true, + "is_superuser": true, + "id": 1, + "last_name": "LastName", + "date_joined": "2021-12-13T07:34:21.806", + "common_name": "FirstName LastName" + }, + "database_id": 2, + "enable_embedding": false, + "collection_id": null, + "query_type": "query", + "name": "Films, Count, Grouped by Rating, Filtered by Release Year, Sorted by [Unknown Field] descending", + "creator_id": 1, + "updated_at": "2021-12-13T17:48:39.999", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "dataset_query": { + "query": { + "source-table": 21, + "breakout": [ + ["field", 131, null] + ], + "aggregation": [ + ["count"] + ], + "order-by": [ + ["desc", ["aggregation", 0]] + ], + "filter": ["=", ["field", 136, null], 2006] + }, + "database": 2, + "type": "query" + }, + "id": 2, + "display": "row", + "last-edit-info": { + "id": 1, + "email": "admin@metabase.com", + "first_name": "FirstName", + "last_name": "LastName", + "timestamp": "2021-12-13T17:48:37.11" + }, + "visualization_settings": { + "graph.series_labels": ["number"], + "graph.dimensions": ["rating"], + "graph.colors": ["#509EE3"], + "graph.metrics": ["count"] + }, + "collection": null, + "favorite": false, + "created_at": "2021-12-13T17:48:37.102", + "public_uuid": null +}] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/setup/card_1.json b/metadata-ingestion/tests/integration/metabase/setup/card_1.json new file mode 100644 index 00000000000000..11f1adee39c660 --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/card_1.json @@ -0,0 +1,201 @@ +{ + "description": null, + "archived": false, + "collection_position": null, + "table_id": null, + "result_metadata": [{ + "name": "customer_id", + "display_name": "customer_id", + "base_type": "type/Integer", + "effective_type": "type/Integer", + "field_ref": ["field", "customer_id", { + "base-type": "type/Integer" + }], + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 517, + "nil%": 0.0 + }, + "type": { + "type/Number": { + "min": 1.0, + "q1": 127.95550051624855, + "q3": 457.48181481488376, + "max": 599.0, + "sd": 183.35453319901166, + "avg": 293.316 + } + } + } + }, { + "name": "first_name", + "display_name": "first_name", + "base_type": "type/Text", + "effective_type": "type/Text", + "field_ref": ["field", "first_name", { + "base-type": "type/Text" + }], + "semantic_type": "type/Name", + "fingerprint": { + "global": { + "distinct-count": 509, + "nil%": 0.0 + }, + "type": { + "type/Text": { + "percent-json": 0.0, + "percent-url": 0.0, + "percent-email": 0.0, + "percent-state": 0.0035, + "average-length": 5.629 + } + } + } + }, { + "name": "last_name", + "display_name": "last_name", + "base_type": "type/Text", + "effective_type": "type/Text", + "field_ref": ["field", "last_name", { + "base-type": "type/Text" + }], + "semantic_type": "type/Name", + "fingerprint": { + "global": { + "distinct-count": 517, + "nil%": 0.0 + }, + "type": { + "type/Text": { + "percent-json": 0.0, + "percent-url": 0.0, + "percent-email": 0.0, + "percent-state": 0.0015, + "average-length": 6.126 + } + } + } + }, { + "name": "amount", + "display_name": "amount", + "base_type": "type/Decimal", + "effective_type": "type/Decimal", + "field_ref": ["field", "amount", { + "base-type": "type/Decimal" + }], + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 11, + "nil%": 0.0 + }, + "type": { + "type/Number": { + "min": 0.99, + "q1": 2.399411317392306, + "q3": 5.52734176879965, + "max": 10.99, + "sd": 2.352151368009511, + "avg": 4.1405 + } + } + } + }, { + "name": "payment_date", + "display_name": "payment_date", + "base_type": "type/DateTime", + "effective_type": "type/DateTime", + "field_ref": ["field", "payment_date", { + "base-type": "type/DateTime" + }], + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 1998, + "nil%": 0.0 + }, + "type": { + "type/DateTime": { + "earliest": "2007-02-14T21:21:59.996577Z", + "latest": "2007-02-21T19:27:46.996577Z" + } + } + } + }, { + "name": "rental_id", + "display_name": "rental_id", + "base_type": "type/Integer", + "effective_type": "type/Integer", + "field_ref": ["field", "rental_id", { + "base-type": "type/Integer" + }], + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 2000, + "nil%": 0.0 + }, + "type": { + "type/Number": { + "min": 1158.0, + "q1": 1731.7967120913397, + "q3": 2871.359273326854, + "max": 4591.0, + "sd": 660.7468728104022, + "avg": 2303.4565 + } + } + } + }], + "creator": { + "email": "admin@metabase.com", + "first_name": "FirstName", + "last_login": "2021-12-13T22:25:45.761", + "is_qbnewb": true, + "is_superuser": true, + "id": 1, + "last_name": "LastName", + "date_joined": "2021-12-13T07:34:21.806", + "common_name": "FirstName LastName" + }, + "can_write": true, + "database_id": 2, + "enable_embedding": false, + "collection_id": null, + "query_type": "native", + "name": "Customer Payment", + "last_query_start": "2021-12-13T17:48:39.802905Z", + "dashboard_count": 1, + "average_query_time": 662, + "creator_id": 1, + "moderation_reviews": [], + "updated_at": "2021-12-13T17:48:40.478", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "dataset_query": { + "type": "native", + "native": { + "query": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date", + "template-tags": {} + }, + "database": 2 + }, + "id": 1, + "display": "table", + "last-edit-info": { + "id": 1, + "email": "admin@metabase.com", + "first_name": "FirstName", + "last_name": "LastName", + "timestamp": "2021-12-13T17:46:32.792" + }, + "visualization_settings": { + "table.pivot_column": "amount", + "table.cell_column": "customer_id" + }, + "collection": null, + "created_at": "2021-12-13T17:46:32.77", + "public_uuid": null +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/setup/card_2.json b/metadata-ingestion/tests/integration/metabase/setup/card_2.json new file mode 100644 index 00000000000000..979178cd24fb0d --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/card_2.json @@ -0,0 +1,115 @@ +{ + "description": null, + "archived": false, + "collection_position": null, + "table_id": 21, + "result_metadata": [{ + "semantic_type": "type/Category", + "coercion_strategy": null, + "name": "rating", + "field_ref": ["field", 131, null], + "effective_type": "type/*", + "id": 131, + "display_name": "Rating", + "fingerprint": { + "global": { + "distinct-count": 5, + "nil%": 0.0 + }, + "type": { + "type/Text": { + "percent-json": 0.0, + "percent-url": 0.0, + "percent-email": 0.0, + "percent-state": 0.0, + "average-length": 2.926 + } + } + }, + "base_type": "type/PostgresEnum" + }, { + "name": "count", + "display_name": "Count", + "base_type": "type/BigInteger", + "effective_type": "type/BigInteger", + "semantic_type": "type/Quantity", + "field_ref": ["aggregation", 0], + "fingerprint": { + "global": { + "distinct-count": 5, + "nil%": 0.0 + }, + "type": { + "type/Number": { + "min": 178.0, + "q1": 190.0, + "q3": 213.25, + "max": 223.0, + "sd": 17.131841699011815, + "avg": 200.0 + } + } + } + }], + "creator": { + "email": "admin@metabase.com", + "first_name": "FirstName", + "last_login": "2021-12-13T22:24:03.913", + "is_qbnewb": true, + "is_superuser": true, + "id": 1, + "last_name": "LastName", + "date_joined": "2021-12-13T07:34:21.806", + "common_name": "FirstName LastName" + }, + "can_write": true, + "database_id": 2, + "enable_embedding": false, + "collection_id": null, + "query_type": "query", + "name": "Films, Count, Grouped by Rating, Filtered by Release Year, Sorted by [Unknown Field] descending", + "last_query_start": "2021-12-13T17:48:39.93677Z", + "dashboard_count": 1, + "average_query_time": 64, + "creator_id": 1, + "moderation_reviews": [], + "updated_at": "2021-12-13T17:48:39.999", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "dataset_query": { + "query": { + "source-table": 21, + "breakout": [ + ["field", 131, null] + ], + "aggregation": [ + ["count"] + ], + "order-by": [ + ["desc", ["aggregation", 0]] + ], + "filter": ["=", ["field", 136, null], 2006] + }, + "database": 2, + "type": "query" + }, + "id": 2, + "display": "row", + "last-edit-info": { + "id": 1, + "email": "admin@metabase.com", + "first_name": "FirstName", + "last_name": "LastName", + "timestamp": "2021-12-13T17:48:37.11" + }, + "visualization_settings": { + "graph.series_labels": ["number"], + "graph.dimensions": ["rating"], + "graph.colors": ["#509EE3"], + "graph.metrics": ["count"] + }, + "collection": null, + "created_at": "2021-12-13T17:48:37.102", + "public_uuid": null +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/setup/dashboard.json b/metadata-ingestion/tests/integration/metabase/setup/dashboard.json new file mode 100644 index 00000000000000..095abf1bbdc6d5 --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/dashboard.json @@ -0,0 +1,40 @@ +[{ + "description": null, + "archived": false, + "collection_position": null, + "creator": { + "email": "admin@metabase.com", + "first_name": "FirstName", + "last_login": "2021-12-13T18:51:32.999", + "is_qbnewb": true, + "is_superuser": true, + "id": 1, + "last_name": "LastName", + "date_joined": "2021-12-13T07:34:21.806", + "common_name": "FirstName LastName" + }, + "enable_embedding": false, + "collection_id": null, + "show_in_getting_started": false, + "name": "Dashboard 1", + "caveats": null, + "creator_id": 1, + "updated_at": "2021-12-13T17:48:41.735", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "id": 1, + "position": null, + "last-edit-info": { + "id": 1, + "email": "admin@metabase.com", + "first_name": "FirstName", + "last_name": "LastName", + "timestamp": "2021-12-13T17:48:41.742" + }, + "parameters": [], + "favorite": false, + "created_at": "2021-12-13T17:46:48.185", + "public_uuid": null, + "points_of_interest": null +}] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json b/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json new file mode 100644 index 00000000000000..0b232cd220045c --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json @@ -0,0 +1,333 @@ +{ + "description": null, + "archived": false, + "collection_position": null, + "ordered_cards": [{ + "sizeX": 4, + "series": [], + "collection_authority_level": null, + "card": { + "description": null, + "archived": false, + "collection_position": null, + "table_id": null, + "result_metadata": [{ + "name": "customer_id", + "display_name": "customer_id", + "base_type": "type/Integer", + "effective_type": "type/Integer", + "field_ref": ["field", "customer_id", { + "base-type": "type/Integer" + }], + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 517, + "nil%": 0.0 + }, + "type": { + "type/Number": { + "min": 1.0, + "q1": 127.95550051624855, + "q3": 457.48181481488376, + "max": 599.0, + "sd": 183.35453319901166, + "avg": 293.316 + } + } + } + }, { + "name": "first_name", + "display_name": "first_name", + "base_type": "type/Text", + "effective_type": "type/Text", + "field_ref": ["field", "first_name", { + "base-type": "type/Text" + }], + "semantic_type": "type/Name", + "fingerprint": { + "global": { + "distinct-count": 509, + "nil%": 0.0 + }, + "type": { + "type/Text": { + "percent-json": 0.0, + "percent-url": 0.0, + "percent-email": 0.0, + "percent-state": 0.0035, + "average-length": 5.629 + } + } + } + }, { + "name": "last_name", + "display_name": "last_name", + "base_type": "type/Text", + "effective_type": "type/Text", + "field_ref": ["field", "last_name", { + "base-type": "type/Text" + }], + "semantic_type": "type/Name", + "fingerprint": { + "global": { + "distinct-count": 517, + "nil%": 0.0 + }, + "type": { + "type/Text": { + "percent-json": 0.0, + "percent-url": 0.0, + "percent-email": 0.0, + "percent-state": 0.0015, + "average-length": 6.126 + } + } + } + }, { + "name": "amount", + "display_name": "amount", + "base_type": "type/Decimal", + "effective_type": "type/Decimal", + "field_ref": ["field", "amount", { + "base-type": "type/Decimal" + }], + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 11, + "nil%": 0.0 + }, + "type": { + "type/Number": { + "min": 0.99, + "q1": 2.399411317392306, + "q3": 5.52734176879965, + "max": 10.99, + "sd": 2.352151368009511, + "avg": 4.1405 + } + } + } + }, { + "name": "payment_date", + "display_name": "payment_date", + "base_type": "type/DateTime", + "effective_type": "type/DateTime", + "field_ref": ["field", "payment_date", { + "base-type": "type/DateTime" + }], + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 1998, + "nil%": 0.0 + }, + "type": { + "type/DateTime": { + "earliest": "2007-02-14T21:21:59.996577Z", + "latest": "2007-02-21T19:27:46.996577Z" + } + } + } + }, { + "name": "rental_id", + "display_name": "rental_id", + "base_type": "type/Integer", + "effective_type": "type/Integer", + "field_ref": ["field", "rental_id", { + "base-type": "type/Integer" + }], + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 2000, + "nil%": 0.0 + }, + "type": { + "type/Number": { + "min": 1158.0, + "q1": 1731.7967120913397, + "q3": 2871.359273326854, + "max": 4591.0, + "sd": 660.7468728104022, + "avg": 2303.4565 + } + } + } + }], + "database_id": 2, + "enable_embedding": false, + "collection_id": null, + "query_type": "native", + "name": "Customer Payment", + "query_average_duration": 820, + "creator_id": 1, + "moderation_reviews": [], + "updated_at": "2021-12-13T17:48:40.478", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "dataset_query": { + "type": "native", + "native": { + "query": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date", + "template-tags": {} + }, + "database": 2 + }, + "id": 1, + "display": "table", + "visualization_settings": { + "table.pivot_column": "amount", + "table.cell_column": "customer_id" + }, + "created_at": "2021-12-13T17:46:32.77", + "public_uuid": null + }, + "updated_at": "2021-12-13T17:48:41.68", + "col": 0, + "id": 1, + "parameter_mappings": [], + "card_id": 1, + "visualization_settings": {}, + "dashboard_id": 1, + "created_at": "2021-12-13T17:46:52.278", + "sizeY": 4, + "row": 0 + }, { + "sizeX": 4, + "series": [], + "collection_authority_level": null, + "card": { + "description": null, + "archived": false, + "collection_position": null, + "table_id": 21, + "result_metadata": [{ + "semantic_type": "type/Category", + "coercion_strategy": null, + "name": "rating", + "field_ref": ["field", 131, null], + "effective_type": "type/*", + "id": 131, + "display_name": "Rating", + "fingerprint": { + "global": { + "distinct-count": 5, + "nil%": 0.0 + }, + "type": { + "type/Text": { + "percent-json": 0.0, + "percent-url": 0.0, + "percent-email": 0.0, + "percent-state": 0.0, + "average-length": 2.926 + } + } + }, + "base_type": "type/PostgresEnum" + }, { + "name": "count", + "display_name": "Count", + "base_type": "type/BigInteger", + "effective_type": "type/BigInteger", + "semantic_type": "type/Quantity", + "field_ref": ["aggregation", 0], + "fingerprint": { + "global": { + "distinct-count": 5, + "nil%": 0.0 + }, + "type": { + "type/Number": { + "min": 178.0, + "q1": 190.0, + "q3": 213.25, + "max": 223.0, + "sd": 17.131841699011815, + "avg": 200.0 + } + } + } + }], + "database_id": 2, + "enable_embedding": false, + "collection_id": null, + "query_type": "query", + "name": "Films, Count, Grouped by Rating, Filtered by Release Year, Sorted by [Unknown Field] descending", + "query_average_duration": 25, + "creator_id": 1, + "moderation_reviews": [], + "updated_at": "2021-12-13T17:48:39.999", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "dataset_query": { + "query": { + "source-table": 21, + "breakout": [ + ["field", 131, null] + ], + "aggregation": [ + ["count"] + ], + "order-by": [ + ["desc", ["aggregation", 0]] + ], + "filter": ["=", ["field", 136, null], 2006] + }, + "database": 2, + "type": "query" + }, + "id": 2, + "display": "row", + "visualization_settings": { + "graph.series_labels": ["number"], + "graph.dimensions": ["rating"], + "graph.colors": ["#509EE3"], + "graph.metrics": ["count"] + }, + "created_at": "2021-12-13T17:48:37.102", + "public_uuid": null + }, + "updated_at": "2021-12-13T17:48:41.682", + "col": 4, + "id": 2, + "parameter_mappings": [], + "card_id": 2, + "visualization_settings": {}, + "dashboard_id": 1, + "created_at": "2021-12-13T17:48:41.62", + "sizeY": 4, + "row": 0 + }], + "param_values": null, + "can_write": true, + "enable_embedding": false, + "collection_id": null, + "show_in_getting_started": false, + "name": "Dashboard 1", + "caveats": null, + "collection_authority_level": null, + "creator_id": 1, + "updated_at": "2021-12-13T17:48:41.735", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "id": 1, + "position": null, + "param_fields": null, + "last-edit-info": { + "id": 1, + "email": "admin@metabase.com", + "first_name": "FirstName", + "last_name": "LastName", + "timestamp": "2021-12-13T17:48:41.742" + }, + "parameters": [], + "created_at": "2021-12-13T17:46:48.185", + "public_uuid": null, + "points_of_interest": null +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/setup/database.json b/metadata-ingestion/tests/integration/metabase/setup/database.json new file mode 100644 index 00000000000000..cd1cbdaf6624f1 --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/database.json @@ -0,0 +1,47 @@ +{ + "description": null, + "features": ["full-join", "basic-aggregations", "standard-deviation-aggregations", "expression-aggregations", "percentile-aggregations", "foreign-keys", "right-join", "left-join", "native-parameters", "nested-queries", "expressions", "set-timezone", "regex", "case-sensitivity-string-filter-options", "binning", "inner-join", "advanced-math-expressions"], + "cache_field_values_schedule": "0 0 5 * * ? *", + "timezone": "UTC", + "auto_run_queries": true, + "metadata_sync_schedule": "0 15 * * * ? *", + "name": "DVD Rental", + "caveats": null, + "is_full_sync": true, + "updated_at": "2021-12-13T17:45:04.871", + "cache_ttl": null, + "details": { + "host": "172.17.0.1", + "port": 5432, + "dbname": "dvdrental", + "user": "postgres", + "password": "**MetabasePass**", + "ssl": true, + "additional-options": null, + "tunnel-enabled": false + }, + "is_sample": false, + "id": 2, + "is_on_demand": false, + "options": null, + "schedules": { + "cache_field_values": { + "schedule_minute": 0, + "schedule_day": null, + "schedule_frame": null, + "schedule_hour": 5, + "schedule_type": "daily" + }, + "metadata_sync": { + "schedule_minute": 15, + "schedule_day": null, + "schedule_frame": null, + "schedule_hour": null, + "schedule_type": "hourly" + } + }, + "engine": "postgres", + "refingerprint": null, + "created_at": "2021-12-13T17:45:04.72", + "points_of_interest": null +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/setup/session.json b/metadata-ingestion/tests/integration/metabase/setup/session.json new file mode 100644 index 00000000000000..576a52874d21c5 --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/session.json @@ -0,0 +1 @@ +{"id": "12345abv6789"} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/setup/table_21.json b/metadata-ingestion/tests/integration/metabase/setup/table_21.json new file mode 100644 index 00000000000000..db748d0102fc16 --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/table_21.json @@ -0,0 +1,50 @@ +{ + "description": null, + "entity_type": "entity/GenericTable", + "schema": "public", + "db": { + "description": null, + "features": ["full-join", "basic-aggregations", "standard-deviation-aggregations", "expression-aggregations", "percentile-aggregations", "foreign-keys", "right-join", "left-join", "native-parameters", "nested-queries", "expressions", "set-timezone", "regex", "case-sensitivity-string-filter-options", "binning", "inner-join", "advanced-math-expressions"], + "cache_field_values_schedule": "0 0 5 * * ? *", + "timezone": "UTC", + "auto_run_queries": true, + "metadata_sync_schedule": "0 15 * * * ? *", + "name": "DVD Rental", + "caveats": null, + "is_full_sync": true, + "updated_at": "2021-12-13T17:45:04.871", + "cache_ttl": null, + "details": { + "host": "172.17.0.1", + "port": 5432, + "dbname": "dvdrental", + "user": "postgres", + "password": "**MetabasePass**", + "ssl": true, + "additional-options": null, + "tunnel-enabled": false + }, + "is_sample": false, + "id": 2, + "is_on_demand": false, + "options": null, + "engine": "postgres", + "refingerprint": null, + "created_at": "2021-12-13T17:45:04.72", + "points_of_interest": null + }, + "show_in_getting_started": false, + "name": "film", + "caveats": null, + "updated_at": "2021-12-13T17:45:10.52", + "pk_field": 128, + "entity_name": null, + "active": true, + "id": 21, + "db_id": 2, + "visibility_type": null, + "field_order": "database", + "display_name": "Film", + "created_at": "2021-12-13T17:45:04.991", + "points_of_interest": null +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/setup/user.json b/metadata-ingestion/tests/integration/metabase/setup/user.json new file mode 100644 index 00000000000000..5cb63eabb8de8f --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/user.json @@ -0,0 +1,18 @@ +{ + "email": "admin@metabase.com", + "ldap_auth": false, + "first_name": "FirstName", + "locale": null, + "last_login": "2021-12-13T18:51:32.999", + "is_active": true, + "is_qbnewb": true, + "updated_at": "2021-12-13T18:51:32.999", + "group_ids": [1, 2], + "is_superuser": true, + "login_attributes": null, + "id": 1, + "last_name": "LastName", + "date_joined": "2021-12-13T07:34:21.806", + "common_name": "FirstName LastName", + "google_auth": false +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/test_metabase.py b/metadata-ingestion/tests/integration/metabase/test_metabase.py new file mode 100644 index 00000000000000..8ee2f67bc532f0 --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/test_metabase.py @@ -0,0 +1,165 @@ +import json +from unittest.mock import patch + +from freezegun import freeze_time +from requests.models import HTTPError + +from datahub.configuration.common import PipelineExecutionError +from datahub.ingestion.run.pipeline import Pipeline +from tests.test_helpers import mce_helpers + +FROZEN_TIME = "2021-11-11 07:00:00" + +JSON_RESPONSE_MAP = { + "http://localhost:3000/api/session": "session.json", + "http://localhost:3000/api/user/current": "user.json", + "http://localhost:3000/api/dashboard": "dashboard.json", + "http://localhost:3000/api/dashboard/1": "dashboard_1.json", + "http://localhost:3000/api/user/1": "user.json", + "http://localhost:3000/api/card": "card.json", + "http://localhost:3000/api/database/2": "database.json", + "http://localhost:3000/api/card/1": "card_1.json", + "http://localhost:3000/api/card/2": "card_2.json", + "http://localhost:3000/api/table/21": "table_21.json", +} + +RESPONSE_ERROR_LIST = ["http://localhost:3000/api/dashboard"] + +test_resources_dir = None + + +class MockResponse: + def __init__(self, url, data=None, jsond=None, error_list=None): + self.json_data = data + self.url = url + self.jsond = jsond + self.error_list = error_list + self.headers = {} + self.auth = None + self.status_code = 200 + + def json(self): + response_json_path = ( + f"{test_resources_dir}/setup/{JSON_RESPONSE_MAP.get(self.url)}" + ) + with open(response_json_path) as file: + data = json.loads(file.read()) + self.json_data = data + return self.json_data + + def get(self, url): + self.url = url + return self + + def raise_for_status(self): + if self.error_list is not None and self.url in self.error_list: + http_error_msg = "%s Client Error: %s for url: %s" % ( + 400, + "Simulate error", + self.url, + ) + raise HTTPError(http_error_msg, response=self) + + +def mocked_requests_sucess(*args, **kwargs): + return MockResponse(None) + + +def mocked_requests_failure(*args, **kwargs): + return MockResponse(None, error_list=RESPONSE_ERROR_LIST) + + +def mocked_requests_session_post(url, data, json): + return MockResponse(url, data, json) + + +def mocked_requests_session_delete(url, headers): + return MockResponse(url, data=None, jsond=headers) + + +@freeze_time(FROZEN_TIME) +def test_mode_ingest_success(pytestconfig, tmp_path): + with patch( + "datahub.ingestion.source.metabase.requests.session", + side_effect=mocked_requests_sucess, + ), patch( + "datahub.ingestion.source.metabase.requests.post", + side_effect=mocked_requests_session_post, + ), patch( + "datahub.ingestion.source.metabase.requests.delete", + side_effect=mocked_requests_session_delete, + ): + global test_resources_dir + test_resources_dir = pytestconfig.rootpath / "tests/integration/metabase" + + pipeline = Pipeline.create( + { + "run_id": "metabase-test", + "source": { + "type": "metabase", + "config": { + "username": "xxxx", + "password": "xxxx", + "connect_uri": "http://localhost:3000/", + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/metabase_mces.json", + }, + }, + } + ) + pipeline.run() + pipeline.raise_from_status() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=f"{tmp_path}/metabase_mces.json", + golden_path=test_resources_dir / "metabase_mces_golden.json", + ignore_paths=mce_helpers.IGNORE_PATH_TIMESTAMPS, + ) + + +@freeze_time(FROZEN_TIME) +def test_mode_ingest_failure(pytestconfig, tmp_path): + with patch( + "datahub.ingestion.source.metabase.requests.session", + side_effect=mocked_requests_failure, + ), patch( + "datahub.ingestion.source.metabase.requests.post", + side_effect=mocked_requests_session_post, + ), patch( + "datahub.ingestion.source.metabase.requests.delete", + side_effect=mocked_requests_session_delete, + ): + global test_resources_dir + test_resources_dir = pytestconfig.rootpath / "tests/integration/metabase" + + pipeline = Pipeline.create( + { + "run_id": "metabase-test", + "source": { + "type": "metabase", + "config": { + "username": "xxxx", + "password": "xxxx", + "connect_uri": "http://localhost:3000/", + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/metabase_mces.json", + }, + }, + } + ) + pipeline.run() + try: + pipeline.raise_from_status() + except PipelineExecutionError as exec_error: + assert exec_error.args[0] == "Source reported errors" + assert len(exec_error.args[1].failures) == 1 + assert list(exec_error.args[1].failures.keys())[0] == "metabase-dashboard" diff --git a/metadata-ingestion/tests/unit/test_usage_common.py b/metadata-ingestion/tests/unit/test_usage_common.py index 3c21bd98d6dcc8..021a988422688a 100644 --- a/metadata-ingestion/tests/unit/test_usage_common.py +++ b/metadata-ingestion/tests/unit/test_usage_common.py @@ -12,9 +12,9 @@ ) from datahub.metadata.schema_classes import DatasetUsageStatisticsClass -TestTableRef = str +_TestTableRef = str -TestAggregatedDataset = GenericAggregatedDataset[TestTableRef] +_TestAggregatedDataset = GenericAggregatedDataset[_TestTableRef] def test_add_one_query_without_columns(): @@ -27,7 +27,7 @@ def test_add_one_query_without_columns(): resource = "test_db.test_schema.test_table" - ta = TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource) + ta = _TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource) ta.add_read_entry( test_email, test_query, @@ -52,7 +52,7 @@ def test_multiple_query_without_columns(): resource = "test_db.test_schema.test_table" - ta = TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource) + ta = _TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource) ta.add_read_entry( test_email, test_query, @@ -88,7 +88,7 @@ def test_make_usage_workunit(): resource = "test_db.test_schema.test_table" - ta = TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource) + ta = _TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource) ta.add_read_entry( test_email, test_query, @@ -117,7 +117,7 @@ def test_query_trimming(): resource = "test_db.test_schema.test_table" - ta = TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource) + ta = _TestAggregatedDataset(bucket_start_time=floored_ts, resource=resource) ta.total_budget_for_query_list = total_budget_for_query_list ta.add_read_entry( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java index b074d89050e961..30eb17f6911e60 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.entity; +import com.codahale.metrics.Timer; import com.google.common.collect.ImmutableList; import com.google.common.collect.Streams; import com.linkedin.common.AuditStamp; @@ -26,6 +27,7 @@ import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericAspectUtils; import com.linkedin.metadata.utils.PegasusUtils; +import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.mxe.MetadataAuditOperation; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.MetadataChangeProposal; @@ -38,9 +40,13 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.Getter; +import lombok.Setter; +import lombok.Value; import lombok.extern.slf4j.Slf4j; import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION; @@ -88,7 +94,10 @@ public abstract class EntityService { private final EntityEventProducer _producer; private final EntityRegistry _entityRegistry; private final Map> _entityToValidAspects; - private Boolean _emitAspectSpecificAuditEvent = false; + @Getter + @Setter + private RetentionService retentionService; + private Boolean _alwaysEmitAuditEvent = false; public static final String DEFAULT_RUN_ID = "no-run-id-provided"; public static final String BROWSE_PATHS = "browsePaths"; public static final String DATA_PLATFORM_INSTANCE = "dataPlatformInstance"; @@ -107,7 +116,7 @@ protected EntityService(@Nonnull final EntityEventProducer producer, @Nonnull fi * @param aspectNames aspects to fetch for each urn in urns set * @return a map of provided {@link Urn} to a List containing the requested aspects. */ - protected abstract Map> getLatestAspects(@Nonnull final Set urns, + public abstract Map> getLatestAspects(@Nonnull final Set urns, @Nonnull final Set aspectNames); /** @@ -146,6 +155,22 @@ public abstract VersionedAspect getVersionedAspect(@Nonnull final Urn urn, @Nonn public abstract ListResult listLatestAspects(@Nonnull final String entityName, @Nonnull final String aspectName, final int start, int count); + /** + * Checks whether there is an actual update to the aspect by applying the updateLambda + * If there is an update, push the new version into the local DB. + * Otherwise, do not push the new version, but just update the system metadata. + * + * @param urn an urn associated with the new aspect + * @param aspectName name of the aspect being inserted + * @param updateLambda Function to apply to the latest version of the aspect to get the updated version + * @param auditStamp an {@link AuditStamp} containing metadata about the writer & current time * @param providedSystemMetadata + * @return Details about the new and old version of the aspect + */ + @Nonnull + protected abstract UpdateAspectResult ingestAspectToLocalDB(@Nonnull final Urn urn, @Nonnull final String aspectName, + @Nonnull final Function, RecordTemplate> updateLambda, + @Nonnull final AuditStamp auditStamp, @Nonnull final SystemMetadata providedSystemMetadata); + /** * Ingests (inserts) a new version of an entity aspect & emits a {@link com.linkedin.mxe.MetadataAuditEvent}. * @@ -159,8 +184,47 @@ public abstract ListResult listLatestAspects(@Nonnull final Stri * @param systemMetadata * @return the {@link RecordTemplate} representation of the written aspect object */ - public abstract RecordTemplate ingestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName, - @Nonnull final RecordTemplate newValue, @Nonnull final AuditStamp auditStamp, SystemMetadata systemMetadata); + public RecordTemplate ingestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName, + @Nonnull final RecordTemplate newValue, @Nonnull final AuditStamp auditStamp, SystemMetadata systemMetadata) { + + log.debug("Invoked ingestAspect with urn: {}, aspectName: {}, newValue: {}", urn, aspectName, newValue); + + if (!urn.toString().trim().equals(urn.toString())) { + throw new IllegalArgumentException("Error: cannot provide an URN with leading or trailing whitespace"); + } + + Timer.Context ingestToLocalDBTimer = MetricUtils.timer(this.getClass(), "ingestAspectToLocalDB").time(); + UpdateAspectResult result = ingestAspectToLocalDB(urn, aspectName, ignored -> newValue, auditStamp, systemMetadata); + ingestToLocalDBTimer.stop(); + + final RecordTemplate oldValue = result.getOldValue(); + final RecordTemplate updatedValue = result.getNewValue(); + + // Apply retention policies asynchronously if there was an update to existing aspect value + if (oldValue != updatedValue && oldValue != null && retentionService != null) { + retentionService.applyRetention(urn, aspectName, + Optional.of(new RetentionService.RetentionContext(Optional.of(result.maxVersion)))); + } + + // Produce MAE after a successful update + if (oldValue != updatedValue || _alwaysEmitAuditEvent) { + log.debug(String.format("Producing MetadataAuditEvent for ingested aspect %s, urn %s", aspectName, urn)); + Timer.Context produceMAETimer = MetricUtils.timer(this.getClass(), "produceMAE").time(); + if (aspectName.equals(getKeyAspectName(urn))) { + produceMetadataAuditEventForKey(urn, result.getNewSystemMetadata()); + } else { + produceMetadataAuditEvent(urn, oldValue, updatedValue, result.getOldSystemMetadata(), + result.getNewSystemMetadata(), MetadataAuditOperation.UPDATE); + } + produceMAETimer.stop(); + } else { + log.debug( + String.format("Skipped producing MetadataAuditEvent for ingested aspect %s, urn %s. Aspect has not changed.", + aspectName, urn)); + } + + return updatedValue; + } public RecordTemplate ingestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName, @Nonnull final RecordTemplate newValue, @Nonnull final AuditStamp auditStamp) { @@ -171,6 +235,105 @@ public RecordTemplate ingestAspect(@Nonnull final Urn urn, @Nonnull final String return ingestAspect(urn, aspectName, newValue, auditStamp, generatedSystemMetadata); } + public IngestProposalResult ingestProposal(@Nonnull MetadataChangeProposal metadataChangeProposal, AuditStamp auditStamp) { + + log.debug("entity type = {}", metadataChangeProposal.getEntityType()); + EntitySpec entitySpec = getEntityRegistry().getEntitySpec(metadataChangeProposal.getEntityType()); + log.debug("entity spec = {}", entitySpec); + + Urn entityUrn = EntityKeyUtils.getUrnFromProposal(metadataChangeProposal, entitySpec.getKeyAspectSpec()); + + if (metadataChangeProposal.getChangeType() != ChangeType.UPSERT) { + throw new UnsupportedOperationException("Only upsert operation is supported"); + } + + if (!metadataChangeProposal.hasAspectName() || !metadataChangeProposal.hasAspect()) { + throw new UnsupportedOperationException("Aspect and aspect name is required for create and update operations"); + } + + AspectSpec aspectSpec = entitySpec.getAspectSpec(metadataChangeProposal.getAspectName()); + + if (aspectSpec == null) { + throw new RuntimeException( + String.format("Unknown aspect %s for entity %s", metadataChangeProposal.getAspectName(), + metadataChangeProposal.getEntityType())); + } + + log.debug("aspect spec = {}", aspectSpec); + + RecordTemplate aspect; + try { + aspect = GenericAspectUtils.deserializeAspect(metadataChangeProposal.getAspect().getValue(), + metadataChangeProposal.getAspect().getContentType(), aspectSpec); + ValidationUtils.validateOrThrow(aspect); + } catch (ModelConversionException e) { + throw new RuntimeException( + String.format("Could not deserialize {} for aspect {}", metadataChangeProposal.getAspect().getValue(), + metadataChangeProposal.getAspectName())); + } + log.debug("aspect = {}", aspect); + + SystemMetadata systemMetadata = metadataChangeProposal.getSystemMetadata(); + if (systemMetadata == null) { + systemMetadata = new SystemMetadata(); + systemMetadata.setRunId(DEFAULT_RUN_ID); + systemMetadata.setLastObserved(System.currentTimeMillis()); + } + systemMetadata.setRegistryName(aspectSpec.getRegistryName()); + systemMetadata.setRegistryVersion(aspectSpec.getRegistryVersion().toString()); + + RecordTemplate oldAspect = null; + SystemMetadata oldSystemMetadata = null; + RecordTemplate newAspect = aspect; + SystemMetadata newSystemMetadata = systemMetadata; + + if (!aspectSpec.isTimeseries()) { + Timer.Context ingestToLocalDBTimer = MetricUtils.timer(this.getClass(), "ingestProposalToLocalDB").time(); + UpdateAspectResult result = + ingestAspectToLocalDB(entityUrn, metadataChangeProposal.getAspectName(), ignored -> aspect, auditStamp, + systemMetadata); + ingestToLocalDBTimer.stop(); + oldAspect = result.getOldValue(); + oldSystemMetadata = result.getOldSystemMetadata(); + newAspect = result.getNewValue(); + newSystemMetadata = result.getNewSystemMetadata(); + // Apply retention policies asynchronously if there was an update to existing aspect value + if (oldAspect != newAspect && oldAspect != null && retentionService != null) { + retentionService.applyRetention(entityUrn, aspectSpec.getName(), + Optional.of(new RetentionService.RetentionContext(Optional.of(result.maxVersion)))); + } + } + + if (oldAspect != newAspect || getAlwaysEmitAuditEvent()) { + log.debug(String.format("Producing MetadataChangeLog for ingested aspect %s, urn %s", + metadataChangeProposal.getAspectName(), entityUrn)); + + final MetadataChangeLog metadataChangeLog = new MetadataChangeLog(metadataChangeProposal.data()); + if (oldAspect != null) { + metadataChangeLog.setPreviousAspectValue(GenericAspectUtils.serializeAspect(oldAspect)); + } + if (oldSystemMetadata != null) { + metadataChangeLog.setPreviousSystemMetadata(oldSystemMetadata); + } + if (newAspect != null) { + metadataChangeLog.setAspect(GenericAspectUtils.serializeAspect(newAspect)); + } + if (newSystemMetadata != null) { + metadataChangeLog.setSystemMetadata(newSystemMetadata); + } + + log.debug(String.format("Serialized MCL event: %s", metadataChangeLog)); + // Since only timeseries aspects are ingested as of now, simply produce mae event for it + produceMetadataChangeLog(entityUrn, aspectSpec, metadataChangeLog); + } else { + log.debug( + String.format("Skipped producing MetadataAuditEvent for ingested aspect %s, urn %s. Aspect has not changed.", + metadataChangeProposal.getAspectName(), entityUrn)); + } + + return new IngestProposalResult(entityUrn, oldAspect != newAspect); + } + /** * Updates a particular version of an aspect & optionally emits a {@link com.linkedin.mxe.MetadataAuditEvent}. * @@ -483,12 +646,12 @@ private Map> buildEntityToValidAspects(final EntityRegistry entry -> entry.getAspectSpecs().stream().map(AspectSpec::getName).collect(Collectors.toSet()))); } - public Boolean getEmitAspectSpecificAuditEvent() { - return _emitAspectSpecificAuditEvent; + public Boolean getAlwaysEmitAuditEvent() { + return _alwaysEmitAuditEvent; } - public void setEmitAspectSpecificAuditEvent(Boolean emitAspectSpecificAuditEvent) { - _emitAspectSpecificAuditEvent = emitAspectSpecificAuditEvent; + public void setAlwaysEmitAuditEvent(Boolean alwaysEmitAuditEvent) { + _alwaysEmitAuditEvent = alwaysEmitAuditEvent; } public EntityRegistry getEntityRegistry() { @@ -505,8 +668,6 @@ protected Set getEntityAspectNames(final String entityName) { public abstract void setWritable(boolean canWrite); - public abstract Urn ingestProposal(MetadataChangeProposal metadataChangeProposal, AuditStamp auditStamp); - public RollbackRunResult rollbackRun(List aspectRows, String runId) { return rollbackWithConditions(aspectRows, Collections.singletonMap("runId", runId)); } @@ -517,4 +678,21 @@ public abstract RollbackRunResult rollbackWithConditions(List public abstract RollbackRunResult deleteUrn(Urn urn); public abstract Boolean exists(Urn urn); + + @Value + public static class UpdateAspectResult { + Urn urn; + RecordTemplate oldValue; + RecordTemplate newValue; + SystemMetadata oldSystemMetadata; + SystemMetadata newSystemMetadata; + MetadataAuditOperation operation; + long maxVersion; + } + + @Value + public static class IngestProposalResult { + Urn urn; + boolean didUpdate; + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java new file mode 100644 index 00000000000000..8c57c9de7e54f4 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java @@ -0,0 +1,186 @@ +package com.linkedin.metadata.entity; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.key.DataHubRetentionKey; +import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.GenericAspectUtils; +import com.linkedin.mxe.GenericAspect; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.retention.DataHubRetentionConfig; +import com.linkedin.retention.Retention; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.SneakyThrows; +import lombok.Value; + + +/** + * Service coupled with an entity service to handle retention + */ +public abstract class RetentionService { + protected static final String ALL = "*"; + protected static final String DATAHUB_RETENTION_ENTITY = "dataHubRetention"; + protected static final String DATAHUB_RETENTION_ASPECT = "dataHubRetentionConfig"; + protected static final String DATAHUB_RETENTION_KEY_ASPECT = "dataHubRetentionKey"; + + protected abstract EntityService getEntityService(); + + /** + * Fetch retention policies given the entityName and aspectName + * Uses the entity service to fetch the latest retention policies set for the input entity and aspect + * + * @param entityName Name of the entity + * @param aspectName Name of the aspect + * @return retention policies to apply to the input entity and aspect + */ + public Retention getRetention(@Nonnull String entityName, @Nonnull String aspectName) { + // Prioritized list of retention keys to fetch + List retentionUrns = getRetentionKeys(entityName, aspectName); + Map> fetchedAspects = + getEntityService().getLatestAspects(new HashSet<>(retentionUrns), ImmutableSet.of(DATAHUB_RETENTION_ASPECT)); + // Find the first retention info that is set among the prioritized list of retention keys above + Optional retentionInfo = retentionUrns.stream() + .flatMap(urn -> fetchedAspects.getOrDefault(urn, Collections.emptyList()) + .stream() + .filter(aspect -> aspect instanceof DataHubRetentionConfig)) + .map(retention -> (DataHubRetentionConfig) retention) + .findFirst(); + return retentionInfo.map(DataHubRetentionConfig::getRetention).orElse(new Retention()); + } + + // Get list of datahub retention keys that match the input entity name and aspect name + protected List getRetentionKeys(@Nonnull String entityName, @Nonnull String aspectName) { + return ImmutableList.of(new DataHubRetentionKey().setEntityName(entityName).setAspectName(aspectName), + new DataHubRetentionKey().setEntityName(entityName).setAspectName(ALL), + new DataHubRetentionKey().setEntityName(ALL).setAspectName(aspectName), + new DataHubRetentionKey().setEntityName(ALL).setAspectName(ALL)) + .stream() + .map(key -> EntityKeyUtils.convertEntityKeyToUrn(key, DATAHUB_RETENTION_ENTITY)) + .collect(Collectors.toList()); + } + + /** + * Set retention policy for given entity and aspect. If entity or aspect names are null, the policy is set as default + * + * @param entityName Entity name to apply policy to. If null, set as "*", + * meaning it will be the default for any entities without specified policy + * @param aspectName Aspect name to apply policy to. If null, set as "*", + * meaning it will be the default for any aspects without specified policy + * @param retentionConfig Retention policy + */ + @SneakyThrows + public boolean setRetention(@Nullable String entityName, @Nullable String aspectName, + @Nonnull DataHubRetentionConfig retentionConfig) { + validateRetention(retentionConfig.getRetention()); + DataHubRetentionKey retentionKey = new DataHubRetentionKey(); + retentionKey.setEntityName(entityName != null ? entityName : ALL); + retentionKey.setAspectName(aspectName != null ? aspectName : ALL); + Urn retentionUrn = EntityKeyUtils.convertEntityKeyToUrn(retentionKey, DATAHUB_RETENTION_ENTITY); + MetadataChangeProposal keyProposal = new MetadataChangeProposal(); + GenericAspect keyAspect = GenericAspectUtils.serializeAspect(retentionKey); + keyProposal.setAspect(keyAspect); + keyProposal.setAspectName(DATAHUB_RETENTION_KEY_ASPECT); + keyProposal.setEntityType(DATAHUB_RETENTION_ENTITY); + keyProposal.setChangeType(ChangeType.UPSERT); + keyProposal.setEntityUrn(retentionUrn); + AuditStamp auditStamp = + new AuditStamp().setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis()); + getEntityService().ingestProposal(keyProposal, auditStamp); + MetadataChangeProposal aspectProposal = keyProposal.clone(); + GenericAspect retentionAspect = GenericAspectUtils.serializeAspect(retentionConfig); + aspectProposal.setAspect(retentionAspect); + aspectProposal.setAspectName(DATAHUB_RETENTION_ASPECT); + return getEntityService().ingestProposal(aspectProposal, auditStamp).isDidUpdate(); + } + + /** + * Delete the retention policy set for given entity and aspect. + * + * @param entityName Entity name to apply policy to. If null, set as "*", + * meaning it will delete the default policy for any entities without specified policy + * @param aspectName Aspect name to apply policy to. If null, set as "*", + * meaning it will delete the default policy for any aspects without specified policy + */ + public void deleteRetention(@Nullable String entityName, @Nullable String aspectName) { + DataHubRetentionKey retentionKey = new DataHubRetentionKey(); + retentionKey.setEntityName(entityName != null ? entityName : ALL); + retentionKey.setAspectName(aspectName != null ? aspectName : ALL); + Urn retentionUrn = EntityKeyUtils.convertEntityKeyToUrn(retentionKey, DATAHUB_RETENTION_ENTITY); + getEntityService().deleteUrn(retentionUrn); + } + + private void validateRetention(Retention retention) { + if (retention.hasVersion()) { + if (retention.getVersion().getMaxVersions() <= 0) { + throw new IllegalArgumentException("Invalid maxVersions: " + retention.getVersion().getMaxVersions()); + } + } + if (retention.hasTime()) { + if (retention.getTime().getMaxAgeInSeconds() <= 0) { + throw new IllegalArgumentException("Invalid maxAgeInSeconds: " + retention.getTime().getMaxAgeInSeconds()); + } + } + } + + /** + * Apply retention policies given the urn and aspect name asynchronously + * + * @param urn Urn of the entity + * @param aspectName Name of the aspect + * @param context Additional context that could be used to apply retention + */ + public void applyRetentionAsync(@Nonnull Urn urn, @Nonnull String aspectName, Optional context) { + CompletableFuture.runAsync(() -> applyRetention(urn, aspectName, context)); + } + + /** + * Apply retention policies given the urn and aspect name + * + * @param urn Urn of the entity + * @param aspectName Name of the aspect + * @param context Additional context that could be used to apply retention + */ + public void applyRetention(@Nonnull Urn urn, @Nonnull String aspectName, Optional context) { + Retention retentionPolicy = getRetention(urn.getEntityType(), aspectName); + if (retentionPolicy.data().isEmpty()) { + return; + } + applyRetention(urn, aspectName, retentionPolicy, context); + } + + /** + * Apply retention policies given the urn and aspect name and policies + * @param urn Urn of the entity + * @param aspectName Name of the aspect + * @param retentionPolicy Retention policies to apply + * @param context Additional context that could be used to apply retention + */ + public abstract void applyRetention(@Nonnull Urn urn, @Nonnull String aspectName, Retention retentionPolicy, + Optional context); + + /** + * Batch apply retention to all records that match the input entityName and aspectName + * + * @param entityName Name of the entity to apply retention to. If null, applies to all entities + * @param aspectName Name of the aspect to apply retention to. If null, applies to all aspects + */ + public abstract void batchApplyRetention(@Nullable String entityName, @Nullable String aspectName); + + @Value + public static class RetentionContext { + Optional maxVersion; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java index 71d5fe82bc8e28..ab3323f5ac920f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java @@ -4,10 +4,6 @@ import com.linkedin.common.urn.Urn; import com.linkedin.metadata.dao.exception.ModelConversionException; import com.linkedin.metadata.dao.exception.RetryLimitReached; -import com.linkedin.metadata.dao.retention.IndefiniteRetention; -import com.linkedin.metadata.dao.retention.Retention; -import com.linkedin.metadata.dao.retention.TimeBasedRetention; -import com.linkedin.metadata.dao.retention.VersionBasedRetention; import com.linkedin.metadata.dao.utils.QueryUtils; import com.linkedin.metadata.entity.AspectStorageValidationUtil; import com.linkedin.metadata.entity.ListResult; @@ -45,11 +41,8 @@ @Slf4j public class EbeanAspectDao { - private static final IndefiniteRetention INDEFINITE_RETENTION = new IndefiniteRetention(); - private final EbeanServer _server; private boolean _connectionValidated = false; - private final Map _aspectRetentionMap = new HashMap<>(); private final Clock _clock = Clock.systemUTC(); // Why 375? From tuning, this seems to be about the largest size we can get without having ebean batch issues. @@ -124,9 +117,6 @@ public long saveLatestAspect( // Save newValue as the latest version (v0) saveAspect(urn, aspectName, newAspectMetadata, newActor, newImpersonator, newTime, newSystemMetadata, ASPECT_LATEST_VERSION, oldAspectMetadata == null); - // Apply retention policy - applyRetention(urn, aspectName, getRetention(aspectName), largestVersion); - return largestVersion; } @@ -171,12 +161,11 @@ protected EbeanAspectV2 getLatestAspect(@Nonnull final String urn, @Nonnull fina return _server.find(EbeanAspectV2.class, key); } - @Nullable public long getMaxVersion(@Nonnull final String urn, @Nonnull final String aspectName) { - validateConnection(); List result = _server.find(EbeanAspectV2.class) .where() - .eq("urn", urn).eq("aspect", aspectName) + .eq("urn", urn) + .eq("aspect", aspectName) .orderBy() .desc("version") .findList(); @@ -443,15 +432,6 @@ public ListResult listAspectMetadata( return toListResult(aspects, listResultMetadata, pagedList, start); } - @Nonnull - public Retention getRetention(@Nonnull final String aspectName) { - return _aspectRetentionMap.getOrDefault(aspectName, INDEFINITE_RETENTION); - } - - public void setRetention(@Nonnull final String aspectName, @Nonnull final Retention retention) { - _aspectRetentionMap.put(aspectName, retention); - } - @Nonnull public T runInTransactionWithRetry(@Nonnull final Supplier block, final int maxTransactionRetry) { validateConnection(); @@ -477,58 +457,6 @@ public T runInTransactionWithRetry(@Nonnull final Supplier block, final i return result; } - - private void applyRetention( - @Nonnull final String urn, - @Nonnull final String aspectName, - @Nonnull final Retention retention, - long largestVersion) { - if (retention instanceof IndefiniteRetention) { - return; - } - - if (retention instanceof VersionBasedRetention) { - applyVersionBasedRetention(urn, aspectName, (VersionBasedRetention) retention, largestVersion); - return; - } - - if (retention instanceof TimeBasedRetention) { - applyTimeBasedRetention(urn, aspectName, (TimeBasedRetention) retention, _clock.millis()); - return; - } - } - - protected void applyVersionBasedRetention( - @Nonnull final String urn, - @Nonnull final String aspectName, - @Nonnull final VersionBasedRetention retention, - long largestVersion) { - validateConnection(); - - _server.find(EbeanAspectV2.class) - .where() - .eq(EbeanAspectV2.URN_COLUMN, urn) - .eq(EbeanAspectV2.ASPECT_COLUMN, aspectName) - .ne(EbeanAspectV2.VERSION_COLUMN, ASPECT_LATEST_VERSION) - .le(EbeanAspectV2.VERSION_COLUMN, largestVersion - retention.getMaxVersionsToRetain() + 1) - .delete(); - } - - protected void applyTimeBasedRetention( - @Nonnull final String urn, - @Nonnull final String aspectName, - @Nonnull final TimeBasedRetention retention, - long currentTime) { - validateConnection(); - - _server.find(EbeanAspectV2.class) - .where() - .eq(EbeanAspectV2.URN_COLUMN, urn.toString()) - .eq(EbeanAspectV2.ASPECT_COLUMN, aspectName) - .lt(EbeanAspectV2.CREATED_ON_COLUMN, new Timestamp(currentTime - retention.getMaxAgeToRetain())) - .delete(); - } - private long getNextVersion(@Nonnull final String urn, @Nonnull final String aspectName) { validateConnection(); final List result = _server.find(EbeanAspectV2.class) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanEntityService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanEntityService.java index 912fa05a12f62f..16c9072f3b4d47 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanEntityService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanEntityService.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.entity.ebean; -import com.codahale.metrics.Timer; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterators; @@ -13,28 +12,20 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.Aspect; import com.linkedin.metadata.aspect.VersionedAspect; -import com.linkedin.metadata.dao.exception.ModelConversionException; import com.linkedin.metadata.dao.utils.RecordUtils; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ListResult; import com.linkedin.metadata.entity.RollbackResult; import com.linkedin.metadata.entity.RollbackRunResult; -import com.linkedin.metadata.entity.ValidationUtils; import com.linkedin.metadata.event.EntityEventProducer; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.ListUrnsResult; import com.linkedin.metadata.run.AspectRowSummary; -import com.linkedin.metadata.utils.EntityKeyUtils; -import com.linkedin.metadata.utils.GenericAspectUtils; import com.linkedin.metadata.utils.PegasusUtils; -import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.mxe.MetadataAuditOperation; -import com.linkedin.mxe.MetadataChangeLog; -import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; -import io.opentelemetry.extension.annotations.WithSpan; import java.net.URISyntaxException; import java.sql.Timestamp; import java.util.ArrayList; @@ -50,11 +41,12 @@ import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import lombok.Value; import lombok.extern.slf4j.Slf4j; -import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.entity.ebean.EbeanUtils.*; +import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION; +import static com.linkedin.metadata.entity.ebean.EbeanUtils.parseSystemMetadata; +import static com.linkedin.metadata.entity.ebean.EbeanUtils.toAspectRecord; +import static com.linkedin.metadata.entity.ebean.EbeanUtils.toJsonAspect; /** @@ -68,8 +60,6 @@ public class EbeanEntityService extends EntityService { private final EbeanAspectDao _entityDao; private final JacksonDataTemplateCodec _dataTemplateCodec = new JacksonDataTemplateCodec(); - private Boolean _alwaysEmitAuditEvent = false; - public EbeanEntityService(@Nonnull final EbeanAspectDao entityDao, @Nonnull final EntityEventProducer eventProducer, @Nonnull final EntityRegistry entityRegistry) { @@ -206,50 +196,9 @@ public ListResult listLatestAspects(@Nonnull final String entity @Override @Nonnull - @WithSpan - public RecordTemplate ingestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName, - @Nonnull final RecordTemplate newValue, @Nonnull final AuditStamp auditStamp, - @Nonnull final SystemMetadata systemMetadata) { - - log.debug("Invoked ingestAspect with urn: {}, aspectName: {}, newValue: {}", urn, aspectName, newValue); - - if (!urn.toString().trim().equals(urn.toString())) { - throw new IllegalArgumentException("Error: cannot provide an URN with leading or trailing whitespace"); - } - - Timer.Context ingestToLocalDBTimer = MetricUtils.timer(this.getClass(), "ingestAspectToLocalDB").time(); - UpdateAspectResult result = ingestAspectToLocalDB(urn, aspectName, ignored -> newValue, auditStamp, systemMetadata, - DEFAULT_MAX_TRANSACTION_RETRY); - ingestToLocalDBTimer.stop(); - - final RecordTemplate oldValue = result.getOldValue(); - final RecordTemplate updatedValue = result.getNewValue(); - - // 5. Produce MAE after a successful update - if (oldValue != updatedValue || _alwaysEmitAuditEvent) { - log.debug(String.format("Producing MetadataAuditEvent for ingested aspect %s, urn %s", aspectName, urn)); - Timer.Context produceMAETimer = MetricUtils.timer(this.getClass(), "produceMAE").time(); - if (aspectName.equals(getKeyAspectName(urn))) { - produceMetadataAuditEventForKey(urn, result.getNewSystemMetadata()); - } else { - produceMetadataAuditEvent(urn, oldValue, updatedValue, result.getOldSystemMetadata(), - result.getNewSystemMetadata(), MetadataAuditOperation.UPDATE); - } - produceMAETimer.stop(); - } else { - log.debug( - String.format("Skipped producing MetadataAuditEvent for ingested aspect %s, urn %s. Aspect has not changed.", - aspectName, urn)); - } - - return updatedValue; - } - - @Nonnull - private UpdateAspectResult ingestAspectToLocalDB(@Nonnull final Urn urn, @Nonnull final String aspectName, + protected UpdateAspectResult ingestAspectToLocalDB(@Nonnull final Urn urn, @Nonnull final String aspectName, @Nonnull final Function, RecordTemplate> updateLambda, - @Nonnull final AuditStamp auditStamp, @Nonnull final SystemMetadata providedSystemMetadata, - final int maxTransactionRetry) { + @Nonnull final AuditStamp auditStamp, @Nonnull final SystemMetadata providedSystemMetadata) { return _entityDao.runInTransactionWithRetry(() -> { @@ -274,12 +223,12 @@ private UpdateAspectResult ingestAspectToLocalDB(@Nonnull final Urn urn, @Nonnul return new UpdateAspectResult(urn, oldValue, oldValue, EbeanUtils.parseSystemMetadata(latest.getSystemMetadata()), latestSystemMetadata, - MetadataAuditOperation.UPDATE); + MetadataAuditOperation.UPDATE, 0); } // 4. Save the newValue as the latest version log.debug(String.format("Ingesting aspect with name %s, urn %s", aspectName, urn)); - _entityDao.saveLatestAspect(urn.toString(), aspectName, latest == null ? null : toJsonAspect(oldValue), + long versionOfOld = _entityDao.saveLatestAspect(urn.toString(), aspectName, latest == null ? null : toJsonAspect(oldValue), latest == null ? null : latest.getCreatedBy(), latest == null ? null : latest.getCreatedFor(), latest == null ? null : latest.getCreatedOn(), latest == null ? null : latest.getSystemMetadata(), toJsonAspect(newValue), auditStamp.getActor().toString(), @@ -288,8 +237,8 @@ private UpdateAspectResult ingestAspectToLocalDB(@Nonnull final Urn urn, @Nonnul return new UpdateAspectResult(urn, oldValue, newValue, latest == null ? null : EbeanUtils.parseSystemMetadata(latest.getSystemMetadata()), providedSystemMetadata, - MetadataAuditOperation.UPDATE); - }, maxTransactionRetry); + MetadataAuditOperation.UPDATE, versionOfOld); + }, DEFAULT_MAX_TRANSACTION_RETRY); } @Override @@ -329,7 +278,7 @@ private RecordTemplate updateAspect(@Nonnull final Urn urn, @Nonnull final Strin new Timestamp(auditStamp.getTime()), toJsonAspect(newSystemMetadata), version, oldAspect == null); return new UpdateAspectResult(urn, oldValue, value, oldSystemMetadata, newSystemMetadata, - MetadataAuditOperation.UPDATE); + MetadataAuditOperation.UPDATE, version); }, maxTransactionRetry); final RecordTemplate oldValue = result.getOldValue(); @@ -337,8 +286,8 @@ private RecordTemplate updateAspect(@Nonnull final Urn urn, @Nonnull final Strin if (emitMae) { log.debug(String.format("Producing MetadataAuditEvent for updated aspect %s, urn %s", aspectName, urn)); - produceMetadataChangeLog(urn, entityName, aspectName, aspectSpec, oldValue, newValue, result.oldSystemMetadata, - result.newSystemMetadata, ChangeType.UPSERT); + produceMetadataChangeLog(urn, entityName, aspectName, aspectSpec, oldValue, newValue, + result.getOldSystemMetadata(), result.getNewSystemMetadata(), ChangeType.UPSERT); } else { log.debug(String.format("Skipped producing MetadataAuditEvent for updated aspect %s, urn %s. emitMAE is false.", aspectName, urn)); @@ -347,114 +296,11 @@ private RecordTemplate updateAspect(@Nonnull final Urn urn, @Nonnull final Strin return newValue; } - public Boolean getAlwaysEmitAuditEvent() { - return _alwaysEmitAuditEvent; - } - - public void setAlwaysEmitAuditEvent(Boolean alwaysEmitAuditEvent) { - _alwaysEmitAuditEvent = alwaysEmitAuditEvent; - } - public void setWritable(boolean canWrite) { log.debug("Enabling writes"); _entityDao.setWritable(canWrite); } - @Override - public Urn ingestProposal(@Nonnull MetadataChangeProposal metadataChangeProposal, AuditStamp auditStamp) { - - log.debug("entity type = {}", metadataChangeProposal.getEntityType()); - EntitySpec entitySpec = getEntityRegistry().getEntitySpec(metadataChangeProposal.getEntityType()); - log.debug("entity spec = {}", entitySpec); - - Urn entityUrn = EntityKeyUtils.getUrnFromProposal(metadataChangeProposal, entitySpec.getKeyAspectSpec()); - - if (metadataChangeProposal.getChangeType() != ChangeType.UPSERT) { - throw new UnsupportedOperationException("Only upsert operation is supported"); - } - - if (!metadataChangeProposal.hasAspectName() || !metadataChangeProposal.hasAspect()) { - throw new UnsupportedOperationException("Aspect and aspect name is required for create and update operations"); - } - - AspectSpec aspectSpec = entitySpec.getAspectSpec(metadataChangeProposal.getAspectName()); - - if (aspectSpec == null) { - throw new RuntimeException( - String.format("Unknown aspect %s for entity %s", metadataChangeProposal.getAspectName(), - metadataChangeProposal.getEntityType())); - } - - log.debug("aspect spec = {}", aspectSpec); - - RecordTemplate aspect; - try { - aspect = GenericAspectUtils.deserializeAspect(metadataChangeProposal.getAspect().getValue(), - metadataChangeProposal.getAspect().getContentType(), aspectSpec); - ValidationUtils.validateOrThrow(aspect); - } catch (ModelConversionException e) { - throw new RuntimeException( - String.format("Could not deserialize {} for aspect {}", metadataChangeProposal.getAspect().getValue(), - metadataChangeProposal.getAspectName())); - } - log.debug("aspect = {}", aspect); - - SystemMetadata systemMetadata = metadataChangeProposal.getSystemMetadata(); - if (systemMetadata == null) { - systemMetadata = new SystemMetadata(); - systemMetadata.setRunId(DEFAULT_RUN_ID); - systemMetadata.setLastObserved(System.currentTimeMillis()); - } - systemMetadata.setRegistryName(aspectSpec.getRegistryName()); - systemMetadata.setRegistryVersion(aspectSpec.getRegistryVersion().toString()); - - RecordTemplate oldAspect = null; - SystemMetadata oldSystemMetadata = null; - RecordTemplate newAspect = aspect; - SystemMetadata newSystemMetadata = systemMetadata; - - if (!aspectSpec.isTimeseries()) { - Timer.Context ingestToLocalDBTimer = MetricUtils.timer(this.getClass(), "ingestProposalToLocalDB").time(); - UpdateAspectResult result = - ingestAspectToLocalDB(entityUrn, metadataChangeProposal.getAspectName(), ignored -> aspect, auditStamp, - systemMetadata, DEFAULT_MAX_TRANSACTION_RETRY); - ingestToLocalDBTimer.stop(); - oldAspect = result.oldValue; - oldSystemMetadata = result.oldSystemMetadata; - newAspect = result.newValue; - newSystemMetadata = result.newSystemMetadata; - } - - if (oldAspect != newAspect || _alwaysEmitAuditEvent) { - log.debug(String.format("Producing MetadataChangeLog for ingested aspect %s, urn %s", - metadataChangeProposal.getAspectName(), entityUrn)); - - final MetadataChangeLog metadataChangeLog = new MetadataChangeLog(metadataChangeProposal.data()); - if (oldAspect != null) { - metadataChangeLog.setPreviousAspectValue(GenericAspectUtils.serializeAspect(oldAspect)); - } - if (oldSystemMetadata != null) { - metadataChangeLog.setPreviousSystemMetadata(oldSystemMetadata); - } - if (newAspect != null) { - metadataChangeLog.setAspect(GenericAspectUtils.serializeAspect(newAspect)); - } - if (newSystemMetadata != null) { - metadataChangeLog.setSystemMetadata(newSystemMetadata); - } - - log.debug(String.format("Serialized MCL event: %s", metadataChangeLog)); - // Since only timeseries aspects are ingested as of now, simply produce mae event for it - produceMetadataChangeLog(entityUrn, aspectSpec, metadataChangeLog); - } else { - log.debug( - String.format("Skipped producing MetadataAuditEvent for ingested aspect %s, urn %s. Aspect has not changed.", - metadataChangeProposal.getAspectName(), entityUrn)); - } - - return entityUrn; - } - private boolean filterMatch(SystemMetadata systemMetadata, Map conditions) { String runIdCondition = conditions.getOrDefault("runId", null); if (runIdCondition != null) { @@ -585,7 +431,7 @@ public RollbackResult deleteAspect(String urn, String aspectName, Map aspectRows, String runId) { @@ -694,14 +540,4 @@ public ListUrnsResult listUrns(@Nonnull final String entityName, final int start result.setEntities(entityUrns); return result; } - - @Value - private static class UpdateAspectResult { - Urn urn; - RecordTemplate oldValue; - RecordTemplate newValue; - SystemMetadata oldSystemMetadata; - SystemMetadata newSystemMetadata; - MetadataAuditOperation operation; - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java new file mode 100644 index 00000000000000..f119787b570061 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java @@ -0,0 +1,194 @@ +package com.linkedin.metadata.entity.ebean; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.dao.utils.RecordUtils; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.RetentionService; +import com.linkedin.retention.DataHubRetentionConfig; +import com.linkedin.retention.Retention; +import com.linkedin.retention.TimeBasedRetention; +import com.linkedin.retention.VersionBasedRetention; +import io.ebean.EbeanServer; +import io.ebean.Expression; +import io.ebean.ExpressionList; +import io.ebean.PagedList; +import io.ebean.Transaction; +import io.ebeaninternal.server.expression.Op; +import io.ebeaninternal.server.expression.SimpleExpression; +import io.opentelemetry.extension.annotations.WithSpan; +import java.sql.Timestamp; +import java.time.Clock; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION; + + +@Slf4j +@RequiredArgsConstructor +public class EbeanRetentionService extends RetentionService { + private final EntityService _entityService; + private final EbeanServer _server; + private final int _batchSize; + + private final Clock _clock = Clock.systemUTC(); + + public EntityService getEntityService() { + return _entityService; + } + + @Override + @WithSpan + public void applyRetention(@Nonnull Urn urn, @Nonnull String aspectName, Retention retentionPolicy, + Optional retentionContext) { + log.debug("Applying retention to urn {}, aspectName {}", urn, aspectName); + // If no policies are set or has indefinite policy set, do not apply any retention + if (retentionPolicy.data().isEmpty()) { + return; + } + ExpressionList deleteQuery = _server.find(EbeanAspectV2.class) + .where() + .eq(EbeanAspectV2.URN_COLUMN, urn.toString()) + .eq(EbeanAspectV2.ASPECT_COLUMN, aspectName) + .ne(EbeanAspectV2.VERSION_COLUMN, ASPECT_LATEST_VERSION) + .or(); + + List filterList = new ArrayList<>(); + if (retentionPolicy.hasVersion()) { + getVersionBasedRetentionQuery(urn, aspectName, retentionPolicy.getVersion(), + retentionContext.flatMap(RetentionService.RetentionContext::getMaxVersion)).ifPresent(filterList::add); + } + if (retentionPolicy.hasTime()) { + filterList.add(getTimeBasedRetentionQuery(retentionPolicy.getTime())); + } + + // Only run delete if at least one of the retention policies are applicable + if (!filterList.isEmpty()) { + filterList.forEach(deleteQuery::add); + deleteQuery.endOr().delete(); + } + } + + private long getMaxVersion(@Nonnull final String urn, @Nonnull final String aspectName) { + List result = _server.find(EbeanAspectV2.class) + .where() + .eq("urn", urn) + .eq("aspect", aspectName) + .orderBy() + .desc("version") + .findList(); + if (result.size() == 0) { + return -1; + } + return result.get(0).getKey().getVersion(); + } + + private Optional getVersionBasedRetentionQuery(@Nonnull Urn urn, @Nonnull String aspectName, + @Nonnull final VersionBasedRetention retention, final Optional maxVersionFromUpdate) { + long largestVersion = maxVersionFromUpdate.orElseGet(() -> getMaxVersion(urn.toString(), aspectName)); + + if (largestVersion < retention.getMaxVersions()) { + return Optional.empty(); + } + return Optional.of( + new SimpleExpression(EbeanAspectV2.VERSION_COLUMN, Op.LT, largestVersion - retention.getMaxVersions() + 1)); + } + + private Expression getTimeBasedRetentionQuery(@Nonnull final TimeBasedRetention retention) { + return new SimpleExpression(EbeanAspectV2.CREATED_ON_COLUMN, Op.LT, + new Timestamp(_clock.millis() - retention.getMaxAgeInSeconds() * 1000)); + } + + @Override + @WithSpan + public void batchApplyRetention(@Nullable String entityName, @Nullable String aspectName) { + log.debug("Applying retention to all records"); + int numCandidates = queryCandidates(entityName, aspectName).findCount(); + log.info("Found {} urn, aspect pair with more than 1 version", numCandidates); + Map retentionPolicyMap = getAllRetentionPolicies(); + + int start = 0; + while (start < numCandidates) { + log.info("Applying retention to pairs {} through {}", start, start + _batchSize); + PagedList rows = getPagedAspects(entityName, aspectName, start, _batchSize); + + try (Transaction transaction = _server.beginTransaction()) { + transaction.setBatchMode(true); + transaction.setBatchSize(_batchSize); + for (EbeanAspectV2 row : rows.getList()) { + // Only run for cases where there's multiple versions of the aspect + if (row.getVersion() == 0) { + continue; + } + // 1. Extract an Entity type from the entity Urn + Urn urn; + try { + urn = Urn.createFromString(row.getUrn()); + } catch (Exception e) { + log.error("Failed to serialize urn {}", row.getUrn(), e); + continue; + } + final String aspectNameFromRecord = row.getAspect(); + // Get the retention policies to apply from the local retention policy map + Optional retentionPolicy = getRetentionKeys(urn.getEntityType(), aspectNameFromRecord).stream() + .map(key -> retentionPolicyMap.get(key.toString())) + .filter(Objects::nonNull) + .findFirst() + .map(DataHubRetentionConfig::getRetention); + retentionPolicy.ifPresent(retention -> applyRetention(urn, aspectNameFromRecord, retention, + Optional.of(new RetentionContext(Optional.of(row.getVersion()))))); + } + transaction.commit(); + } + + start += _batchSize; + } + + log.info("Finished applying retention to all records"); + } + + private Map getAllRetentionPolicies() { + return _server.find(EbeanAspectV2.class) + .select(String.format("%s, %s, %s", EbeanAspectV2.URN_COLUMN, EbeanAspectV2.ASPECT_COLUMN, + EbeanAspectV2.METADATA_COLUMN)) + .where() + .eq(EbeanAspectV2.ASPECT_COLUMN, DATAHUB_RETENTION_ASPECT) + .eq(EbeanAspectV2.VERSION_COLUMN, ASPECT_LATEST_VERSION) + .findList() + .stream() + .collect(Collectors.toMap(EbeanAspectV2::getUrn, + row -> RecordUtils.toRecordTemplate(DataHubRetentionConfig.class, row.getMetadata()))); + } + + private ExpressionList queryCandidates(@Nullable String entityName, @Nullable String aspectName) { + ExpressionList query = _server.find(EbeanAspectV2.class) + .setDistinct(true) + .select(String.format("%s, %s, max(%s)", EbeanAspectV2.URN_COLUMN, EbeanAspectV2.ASPECT_COLUMN, + EbeanAspectV2.VERSION_COLUMN)) + .where(); + if (entityName != null) { + query.like(EbeanAspectV2.URN_COLUMN, String.format("urn:li:%s%%", entityName)); + } + if (aspectName != null) { + query.eq(EbeanAspectV2.ASPECT_COLUMN, aspectName); + } + return query; + } + + private PagedList getPagedAspects(@Nullable String entityName, @Nullable String aspectName, + final int start, final int pageSize) { + return queryCandidates(entityName, aspectName).orderBy( + EbeanAspectV2.URN_COLUMN + ", " + EbeanAspectV2.ASPECT_COLUMN) + .setFirstRow(start) + .setMaxRows(pageSize) + .findPagedList(); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index ff30110214ae32..438d9e2d7f4542 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -3,6 +3,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; +import com.linkedin.common.Status; import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.data.ByteString; @@ -19,6 +20,7 @@ import com.linkedin.metadata.entity.ebean.EbeanAspectDao; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.ebean.EbeanEntityService; +import com.linkedin.metadata.entity.ebean.EbeanRetentionService; import com.linkedin.metadata.entity.ebean.EbeanUtils; import com.linkedin.metadata.event.EntityEventProducer; import com.linkedin.metadata.key.CorpUserKey; @@ -37,6 +39,9 @@ import com.linkedin.mxe.MetadataAuditOperation; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; +import com.linkedin.retention.DataHubRetentionConfig; +import com.linkedin.retention.Retention; +import com.linkedin.retention.VersionBasedRetention; import io.ebean.EbeanServer; import io.ebean.EbeanServerFactory; import io.ebean.config.ServerConfig; @@ -56,6 +61,7 @@ import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; @@ -71,6 +77,7 @@ public class EbeanEntityServiceTest { private EbeanAspectDao _aspectDao; private EbeanServer _server; private EntityEventProducer _mockProducer; + private EbeanRetentionService _retentionService; public EbeanEntityServiceTest() throws EntityRegistryException { } @@ -107,6 +114,8 @@ public void setupTest() { _aspectDao = new EbeanAspectDao(_server); _aspectDao.setConnectionValidated(true); _entityService = new EbeanEntityService(_aspectDao, _mockProducer, _testEntityRegistry); + _retentionService = new EbeanRetentionService(_entityService, _server, 1000); + _entityService.setRetentionService(_retentionService); } @Test @@ -650,6 +659,61 @@ public void testIngestListUrns() throws Exception { assertEquals(entityUrn3.toString(), batch2.getEntities().get(0).toString()); } + @Test + public void testRetention() throws Exception { + Urn entityUrn = Urn.createFromString("urn:li:corpuser:test1"); + + SystemMetadata metadata1 = new SystemMetadata(); + metadata1.setLastObserved(1625792689); + metadata1.setRunId("run-123"); + + String aspectName = PegasusUtils.getAspectNameFromSchema(new CorpUserInfo().schema()); + + // Ingest CorpUserInfo Aspect + CorpUserInfo writeAspect1 = createCorpUserInfo("email@test.com"); + _entityService.ingestAspect(entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); + CorpUserInfo writeAspect1a = createCorpUserInfo("email_a@test.com"); + _entityService.ingestAspect(entityUrn, aspectName, writeAspect1a, TEST_AUDIT_STAMP, metadata1); + CorpUserInfo writeAspect1b = createCorpUserInfo("email_b@test.com"); + _entityService.ingestAspect(entityUrn, aspectName, writeAspect1b, TEST_AUDIT_STAMP, metadata1); + + String aspectName2 = PegasusUtils.getAspectNameFromSchema(new Status().schema()); + // Ingest Status Aspect + Status writeAspect2 = new Status().setRemoved(true); + _entityService.ingestAspect(entityUrn, aspectName2, writeAspect2, TEST_AUDIT_STAMP, metadata1); + Status writeAspect2a = new Status().setRemoved(false); + _entityService.ingestAspect(entityUrn, aspectName2, writeAspect2a, TEST_AUDIT_STAMP, metadata1); + Status writeAspect2b = new Status().setRemoved(true); + _entityService.ingestAspect(entityUrn, aspectName2, writeAspect2b, TEST_AUDIT_STAMP, metadata1); + + assertEquals(_entityService.getAspect(entityUrn, aspectName, 1), writeAspect1); + assertEquals(_entityService.getAspect(entityUrn, aspectName2, 1), writeAspect2); + + _retentionService.setRetention(null, null, new DataHubRetentionConfig().setRetention( + new Retention().setVersion(new VersionBasedRetention().setMaxVersions(2)))); + _retentionService.setRetention("corpuser", "status", new DataHubRetentionConfig().setRetention( + new Retention().setVersion(new VersionBasedRetention().setMaxVersions(4)))); + + // Ingest CorpUserInfo Aspect again + CorpUserInfo writeAspect1c = createCorpUserInfo("email_c@test.com"); + _entityService.ingestAspect(entityUrn, aspectName, writeAspect1c, TEST_AUDIT_STAMP, metadata1); + // Ingest Status Aspect again + Status writeAspect2c = new Status().setRemoved(false); + _entityService.ingestAspect(entityUrn, aspectName2, writeAspect2c, TEST_AUDIT_STAMP, metadata1); + + assertNull(_entityService.getAspect(entityUrn, aspectName, 1)); + assertEquals(_entityService.getAspect(entityUrn, aspectName2, 1), writeAspect2); + + // Reset retention policies + _retentionService.setRetention(null, null, new DataHubRetentionConfig().setRetention( + new Retention().setVersion(new VersionBasedRetention().setMaxVersions(1)))); + _retentionService.deleteRetention("corpuser", "status"); + // Invoke batch apply + _retentionService.batchApplyRetention(null, null); + assertEquals(_entityService.listLatestAspects(entityUrn.getEntityType(), aspectName, 0, 10).getTotalCount(), 1); + assertEquals(_entityService.listLatestAspects(entityUrn.getEntityType(), aspectName2, 0, 10).getTotalCount(), 1); + } + @Nonnull private com.linkedin.entity.Entity createCorpUserEntity(Urn entityUrn, String email) throws Exception { CorpuserUrn corpuserUrn = CorpuserUrn.createFromUrn(entityUrn); diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataAuditEventsProcessor.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataAuditEventsProcessor.java index 20e9358b451b76..0ff4105704e931 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataAuditEventsProcessor.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataAuditEventsProcessor.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.kafka; +import com.codahale.metrics.Timer; import com.codahale.metrics.Histogram; import com.codahale.metrics.MetricRegistry; import com.linkedin.common.urn.Urn; @@ -92,7 +93,7 @@ public void consume(final ConsumerRecord consumerRecord) final GenericRecord record = consumerRecord.value(); log.debug("Got MAE"); - try { + try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "maeProcess").time()) { final MetadataAuditEvent event = EventUtils.avroToPegasusMAE(record); final RecordTemplate snapshot = RecordUtils.getSelectedRecordTemplateFromUnion(event.getNewSnapshot()); diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/DataHubRetentionAspect.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/DataHubRetentionAspect.pdl new file mode 100644 index 00000000000000..bb470cee158e02 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/DataHubRetentionAspect.pdl @@ -0,0 +1,12 @@ +namespace com.linkedin.metadata.aspect + +import com.linkedin.metadata.key.DataHubRetentionKey +import com.linkedin.retention.DataHubRetentionConfig + +/** + * A union of all supported metadata aspects for a DataHub access policy. + */ +typeref DataHubRetentionAspect = union[ + DataHubRetentionKey, + DataHubRetentionConfig +] \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataHubRetentionKey.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataHubRetentionKey.pdl new file mode 100644 index 00000000000000..39c5c97cd51f48 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataHubRetentionKey.pdl @@ -0,0 +1,19 @@ +namespace com.linkedin.metadata.key + +/** + * Key for a DataHub Retention + */ +@Aspect = { + "name": "dataHubRetentionKey" +} +record DataHubRetentionKey { + /** + * Entity name to apply retention to. * (or empty) for applying defaults. + */ + entityName: string + + /** + * Aspect name to apply retention to. * (or empty) for applying defaults. + */ + aspectName: string +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/DataHubRetentionSnapshot.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/DataHubRetentionSnapshot.pdl new file mode 100644 index 00000000000000..313b0f79b3b5db --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/DataHubRetentionSnapshot.pdl @@ -0,0 +1,24 @@ +namespace com.linkedin.metadata.snapshot + +import com.linkedin.common.Urn +import com.linkedin.metadata.aspect.DataHubRetentionAspect + +/** + * A metadata snapshot for DataHub Access Policy data. + */ +@Entity = { + "name": "dataHubRetention", + "keyAspect": "dataHubRetentionKey" +} +record DataHubRetentionSnapshot { + + /** + * URN for the entity the metadata snapshot is associated with. + */ + urn: Urn + + /** + * The list of metadata aspects associated with the DataHub access policy. + */ + aspects: array[DataHubRetentionAspect] +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/Snapshot.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/Snapshot.pdl index 44104970acf934..91993724afbadc 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/Snapshot.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/Snapshot.pdl @@ -24,4 +24,5 @@ typeref Snapshot = union[ GlossaryNodeSnapshot, DataHubPolicySnapshot, SchemaFieldSnapshot, + DataHubRetentionSnapshot, ] diff --git a/metadata-models/src/main/pegasus/com/linkedin/retention/DataHubRetentionConfig.pdl b/metadata-models/src/main/pegasus/com/linkedin/retention/DataHubRetentionConfig.pdl new file mode 100644 index 00000000000000..3e1977760257ff --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/retention/DataHubRetentionConfig.pdl @@ -0,0 +1,8 @@ +namespace com.linkedin.retention + +@Aspect = { + "name": "dataHubRetentionConfig" +} +record DataHubRetentionConfig { + retention: Retention +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/retention/Retention.pdl b/metadata-models/src/main/pegasus/com/linkedin/retention/Retention.pdl new file mode 100644 index 00000000000000..61bd896490c0ee --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/retention/Retention.pdl @@ -0,0 +1,10 @@ +namespace com.linkedin.retention + +/** + * Base class that encapsulates different retention policies. + * Only one of the fields should be set + */ +record Retention { + version: optional VersionBasedRetention + time: optional TimeBasedRetention +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/retention/TimeBasedRetention.pdl b/metadata-models/src/main/pegasus/com/linkedin/retention/TimeBasedRetention.pdl new file mode 100644 index 00000000000000..9db4fc8fd42523 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/retention/TimeBasedRetention.pdl @@ -0,0 +1,8 @@ +namespace com.linkedin.retention + +/** + * Keep records that are less than X seconds old + */ +record TimeBasedRetention { + maxAgeInSeconds: int +} diff --git a/metadata-models/src/main/pegasus/com/linkedin/retention/VersionBasedRetention.pdl b/metadata-models/src/main/pegasus/com/linkedin/retention/VersionBasedRetention.pdl new file mode 100644 index 00000000000000..1a51a2ca009dc0 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/retention/VersionBasedRetention.pdl @@ -0,0 +1,8 @@ +namespace com.linkedin.retention + +/** + * Keep max N latest records + */ +record VersionBasedRetention { + maxVersions: int +} diff --git a/metadata-service/auth-impl/build.gradle b/metadata-service/auth-impl/build.gradle index f6ed0033667fa6..f87b2cd2b2c892 100644 --- a/metadata-service/auth-impl/build.gradle +++ b/metadata-service/auth-impl/build.gradle @@ -7,7 +7,7 @@ dependencies { implementation 'io.jsonwebtoken:jjwt-api:0.11.2' runtimeOnly 'io.jsonwebtoken:jjwt-impl:0.11.2', - 'io.jsonwebtoken:jjwt-jackson:0.11.2' // or 'io.jsonwebtoken:jjwt-gson:0.11.2' for gson + 'io.jsonwebtoken:jjwt-jackson:0.11.2' compile externalDependency.lombok diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/TokenService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/TokenService.java index 419c192175c0a3..4b844ed07048f6 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/TokenService.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/TokenService.java @@ -7,6 +7,7 @@ import io.jsonwebtoken.JwtBuilder; import io.jsonwebtoken.Jwts; import io.jsonwebtoken.SignatureAlgorithm; +import java.nio.charset.StandardCharsets; import java.security.Key; import java.util.ArrayList; import java.util.Base64; @@ -104,7 +105,7 @@ public String generateAccessToken(@Nonnull final String sub, @Nonnull final Map< if (this.iss != null) { builder.setIssuer(this.iss); } - byte[] apiKeySecretBytes = Base64.getDecoder().decode(this.signingKey); // Key must be base64'd. + byte [] apiKeySecretBytes = this.signingKey.getBytes(StandardCharsets.UTF_8); final Key signingKey = new SecretKeySpec(apiKeySecretBytes, this.signingAlgorithm.getJcaName()); return builder.signWith(signingKey, this.signingAlgorithm).compact(); } @@ -118,8 +119,10 @@ public String generateAccessToken(@Nonnull final String sub, @Nonnull final Map< public TokenClaims validateAccessToken(@Nonnull final String accessToken) throws TokenException { Objects.requireNonNull(accessToken); try { + byte [] apiKeySecretBytes = this.signingKey.getBytes(StandardCharsets.UTF_8); + final String base64Key = Base64.getEncoder().encodeToString(apiKeySecretBytes); final Claims claims = (Claims) Jwts.parserBuilder() - .setSigningKey(this.signingKey) + .setSigningKey(base64Key) .build() .parse(accessToken) .getBody(); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java new file mode 100644 index 00000000000000..00ab3bb1c0baa9 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java @@ -0,0 +1,42 @@ +package com.linkedin.gms.factory.entity; + +import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.RetentionService; +import com.linkedin.metadata.entity.ebean.EbeanRetentionService; +import io.ebean.EbeanServer; +import javax.annotation.Nonnull; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.DependsOn; +import org.springframework.context.annotation.PropertySource; + + +@Configuration +@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class) +public class RetentionServiceFactory { + + @Autowired + @Qualifier("entityService") + private EntityService _entityService; + + @Autowired + @Qualifier("ebeanServer") + private EbeanServer _server; + + @Value("${RETENTION_APPLICATION_BATCH_SIZE:1000}") + private Integer _batchSize; + + + @Bean(name = "retentionService") + @DependsOn({"ebeanServer", "entityService"}) + @Nonnull + protected RetentionService createInstance() { + RetentionService retentionService = new EbeanRetentionService(_entityService, _server, _batchSize); + _entityService.setRetentionService(retentionService); + return retentionService; + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java index 1a60f6538e5273..150e1e48f39afb 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java @@ -1,6 +1,6 @@ package com.linkedin.gms.factory.entityregistry; -//import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; import com.linkedin.metadata.models.registry.PluginEntityRegistryLoader; import java.io.FileNotFoundException; import java.net.MalformedURLException; @@ -8,14 +8,14 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -//import org.springframework.context.annotation.PropertySource; +import org.springframework.context.annotation.PropertySource; @Configuration -//@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class) +@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class) public class PluginEntityRegistryFactory { - @Value("${ENTITY_REGISTRY_PLUGIN_PATH:/etc/datahub/plugins/models}") + @Value("${datahub.plugin.entityRegistry.path}") private String pluginRegistryPath; @Bean(name = "pluginEntityRegistry") diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapManagerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java similarity index 77% rename from metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapManagerFactory.java rename to metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java index b2a60259c0c7eb..6996e9277c14d0 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapManagerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java @@ -1,10 +1,12 @@ -package com.linkedin.metadata.boot; +package com.linkedin.metadata.boot.factories; import com.google.common.collect.ImmutableList; import com.linkedin.gms.factory.entity.EntityServiceFactory; +import com.linkedin.metadata.boot.BootstrapManager; import com.linkedin.metadata.boot.steps.IngestDataPlatformInstancesStep; import com.linkedin.metadata.boot.steps.IngestDataPlatformsStep; import com.linkedin.metadata.boot.steps.IngestPoliciesStep; +import com.linkedin.metadata.boot.steps.IngestRetentionPoliciesStep; import com.linkedin.metadata.boot.steps.IngestRootUserStep; import com.linkedin.metadata.entity.EntityService; import io.ebean.EbeanServer; @@ -29,6 +31,10 @@ public class BootstrapManagerFactory { @Qualifier("ebeanServer") private EbeanServer _server; + @Autowired + @Qualifier("ingestRetentionPoliciesStep") + private IngestRetentionPoliciesStep _ingestRetentionPoliciesStep; + @Bean(name = "bootstrapManager") @Scope("singleton") @Nonnull @@ -38,11 +44,7 @@ protected BootstrapManager createInstance() { final IngestDataPlatformsStep ingestDataPlatformsStep = new IngestDataPlatformsStep(_entityService); final IngestDataPlatformInstancesStep ingestDataPlatformInstancesStep = new IngestDataPlatformInstancesStep(_entityService, _server); - return new BootstrapManager( - ImmutableList.of( - ingestRootUserStep, - ingestPoliciesStep, - ingestDataPlatformsStep, - ingestDataPlatformInstancesStep)); + return new BootstrapManager(ImmutableList.of(ingestRootUserStep, ingestPoliciesStep, ingestDataPlatformsStep, + ingestDataPlatformInstancesStep, _ingestRetentionPoliciesStep)); } } \ No newline at end of file diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java new file mode 100644 index 00000000000000..a5525cfa2c94b0 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java @@ -0,0 +1,39 @@ +package com.linkedin.metadata.boot.factories; + +import com.linkedin.gms.factory.entity.RetentionServiceFactory; +import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.boot.steps.IngestRetentionPoliciesStep; +import com.linkedin.metadata.entity.RetentionService; +import javax.annotation.Nonnull; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.PropertySource; +import org.springframework.context.annotation.Scope; + + +@Configuration +@Import({RetentionServiceFactory.class}) +@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class) +public class IngestRetentionPoliciesStepFactory { + + @Autowired + @Qualifier("retentionService") + private RetentionService _retentionService; + + @Value("${entityService.retention.enabled}") + private Boolean _enableRetention; + + @Value("${datahub.plugin.retention.path}") + private String _pluginRegistryPath; + + @Bean(name = "ingestRetentionPoliciesStep") + @Scope("singleton") + @Nonnull + protected IngestRetentionPoliciesStep createInstance() { + return new IngestRetentionPoliciesStep(_retentionService, _enableRetention, _pluginRegistryPath); + } +} \ No newline at end of file diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStep.java new file mode 100644 index 00000000000000..088e955203ecdd --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStep.java @@ -0,0 +1,151 @@ +package com.linkedin.metadata.boot.steps; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import com.linkedin.metadata.boot.BootstrapStep; +import com.linkedin.metadata.dao.utils.RecordUtils; +import com.linkedin.metadata.entity.RetentionService; +import com.linkedin.metadata.key.DataHubRetentionKey; +import com.linkedin.retention.DataHubRetentionConfig; +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import javax.annotation.Nonnull; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.core.io.ClassPathResource; + + +@Slf4j +@RequiredArgsConstructor +public class IngestRetentionPoliciesStep implements BootstrapStep { + + private final RetentionService _retentionService; + private final boolean _enableRetention; + private final String pluginPath; + + private static final ObjectMapper YAML_MAPPER = new ObjectMapper(new YAMLFactory()); + + @Nonnull + @Override + public ExecutionMode getExecutionMode() { + return ExecutionMode.ASYNC; + } + + @Override + public String name() { + return "IngestRetentionPoliciesStep"; + } + + @Override + public void execute() throws IOException, URISyntaxException { + // 0. Execute preflight check to see whether we need to ingest policies + log.info("Ingesting default retention..."); + + // If retention is disabled, skip step + if (!_enableRetention) { + log.info("IngestRetentionPolicies disabled. Skipping."); + return; + } + + // 1. Read default retention config + final Map retentionPolicyMap = + parseFileOrDir(new ClassPathResource("./boot/retention.yaml").getFile()); + + // 2. Read plugin retention config files from input path and overlay + retentionPolicyMap.putAll(parseFileOrDir(new File(pluginPath))); + + // 4. Set the specified retention policies + log.info("Setting {} policies", retentionPolicyMap.size()); + boolean hasUpdate = false; + for (DataHubRetentionKey key : retentionPolicyMap.keySet()) { + if (_retentionService.setRetention(key.getEntityName(), key.getAspectName(), retentionPolicyMap.get(key))) { + hasUpdate = true; + } + } + + // 5. If there were updates on any of the retention policies, apply retention to all records + if (hasUpdate) { + log.info("Applying policies to all records"); + _retentionService.batchApplyRetention(null, null); + } + } + + // Parse input yaml file or yaml files in the input directory to generate a retention policy map + private Map parseFileOrDir(File retentionFileOrDir) throws IOException { + // If path does not exist return empty + if (!retentionFileOrDir.exists()) { + return Collections.emptyMap(); + } + + // If directory, parse the yaml files under the directory + if (retentionFileOrDir.isDirectory()) { + Map result = new HashMap<>(); + + for (File retentionFile : retentionFileOrDir.listFiles()) { + if (!retentionFile.isFile()) { + log.info("Element {} in plugin directory {} is not a file. Skipping", retentionFile.getPath(), + retentionFileOrDir.getPath()); + continue; + } + result.putAll(parseFileOrDir(retentionFile)); + } + return result; + } + // If file, parse the yaml file and return result; + if (!retentionFileOrDir.getPath().endsWith(".yaml") && retentionFileOrDir.getPath().endsWith(".yml")) { + log.info("File {} is not a YAML file. Skipping", retentionFileOrDir.getPath()); + return Collections.emptyMap(); + } + return parseYamlRetentionConfig(retentionFileOrDir); + } + + /** + * Parse yaml retention config + * + * The structure of yaml must be a list of retention policies where each element specifies the entity, aspect + * to apply the policy to and the policy definition. The policy definition is converted into the + * {@link com.linkedin.retention.DataHubRetentionConfig} class. + */ + private Map parseYamlRetentionConfig(File retentionConfigFile) + throws IOException { + final JsonNode retentionPolicies = YAML_MAPPER.readTree(retentionConfigFile); + if (!retentionPolicies.isArray()) { + throw new IllegalArgumentException("Retention config file must contain an array of retention policies"); + } + + Map retentionPolicyMap = new HashMap<>(); + + for (JsonNode retentionPolicy : retentionPolicies) { + DataHubRetentionKey key = new DataHubRetentionKey(); + if (retentionPolicy.has("entity")) { + key.setEntityName(retentionPolicy.get("entity").asText()); + } else { + throw new IllegalArgumentException( + "Each element in the retention config must contain field entity. Set to * for setting defaults"); + } + + if (retentionPolicy.has("aspect")) { + key.setAspectName(retentionPolicy.get("aspect").asText()); + } else { + throw new IllegalArgumentException( + "Each element in the retention config must contain field aspect. Set to * for setting defaults"); + } + + DataHubRetentionConfig retentionInfo; + if (retentionPolicy.has("config")) { + retentionInfo = + RecordUtils.toRecordTemplate(DataHubRetentionConfig.class, retentionPolicy.get("config").toString()); + } else { + throw new IllegalArgumentException("Each element in the retention config must contain field config"); + } + + retentionPolicyMap.put(key, retentionInfo); + } + return retentionPolicyMap; + } +} diff --git a/metadata-service/factories/src/main/resources/application.yml b/metadata-service/factories/src/main/resources/application.yml index 71c80f1a846762..62faf6ad2bc1c1 100644 --- a/metadata-service/factories/src/main/resources/application.yml +++ b/metadata-service/factories/src/main/resources/application.yml @@ -32,6 +32,16 @@ datahub: sslContext: protocol: ${DATAHUB_GMS_SSL_PROTOCOL:${GMS_SSL_PROTOCOL:#{null}}} + plugin: + entityRegistry: + path: ${ENTITY_REGISTRY_PLUGIN_PATH:/etc/datahub/plugins/models} + retention: + path: ${RETENTION_PLUGIN_PATH:/etc/datahub/plugins/retention} + +entityService: + retention: + enabled: ${ENTITY_SERVICE_ENABLE_RETENTION:false} + graphService: type: ${GRAPH_SERVICE_IMPL:elasticsearch} @@ -42,9 +52,6 @@ configEntityRegistry: # TODO: Change to read from resources on classpath. path: ${ENTITY_REGISTRY_CONFIG_PATH:../../metadata-models/src/main/resources/entity-registry.yml} -pluginEntityRegistry: - path: ${ENTITY_REGISTRY_PLUGIN_PATH:$HOME/.datahub/plugins/models} - authorizationManager: enabled: ${AUTH_POLICIES_ENABLED:true} cacheRefreshIntervalSecs: ${POLICY_CACHE_REFRESH_INTERVAL_SECONDS:120} diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index b89becccc8d317..5cdb05c3f89f4b 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -708,7 +708,7 @@ "Searchable" : { "addToFilters" : true, "fieldName" : "tags", - "fieldType" : "URN_PARTIAL", + "fieldType" : "URN", "filterNameOverride" : "Tag", "hasValuesFieldName" : "hasTags" } @@ -785,7 +785,7 @@ "Searchable" : { "addToFilters" : true, "fieldName" : "glossaryTerms", - "fieldType" : "URN_PARTIAL", + "fieldType" : "URN", "filterNameOverride" : "Glossary Term" } } ] @@ -2027,20 +2027,21 @@ "items" : { "type" : "record", "name" : "SchemaField", - "doc" : "SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema", + "doc" : "SchemaField to describe metadata related to dataset schema.", "fields" : [ { "name" : "fieldPath", "type" : "com.linkedin.dataset.SchemaFieldPath", - "doc" : "Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above.", + "doc" : "Flattened name of the field. Field is computed from jsonPath field.", "Searchable" : { "fieldName" : "fieldPaths", - "fieldType" : "TEXT_PARTIAL" + "fieldType" : "TEXT" } }, { "name" : "jsonPath", "type" : "string", "doc" : "Flattened name of a field in JSON Path notation.", - "optional" : true + "optional" : true, + "Deprecated" : true }, { "name" : "nullable", "type" : "boolean", @@ -2178,7 +2179,7 @@ "/tags/*/tag" : { "boostScore" : 0.5, "fieldName" : "fieldTags", - "fieldType" : "URN_PARTIAL" + "fieldType" : "URN" } } }, { @@ -2190,7 +2191,7 @@ "/terms/*/urn" : { "boostScore" : 0.5, "fieldName" : "fieldGlossaryTerms", - "fieldType" : "URN_PARTIAL" + "fieldType" : "URN" } } }, { @@ -2348,7 +2349,7 @@ "/tags/*/tag" : { "boostScore" : 0.5, "fieldName" : "editedFieldTags", - "fieldType" : "URN_PARTIAL" + "fieldType" : "URN" } } }, { @@ -2360,7 +2361,7 @@ "/terms/*/urn" : { "boostScore" : 0.5, "fieldName" : "editedFieldGlossaryTerms", - "fieldType" : "URN_PARTIAL" + "fieldType" : "URN" } } } ] diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index de46ad6ed6f097..e9852ba46ede08 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -776,7 +776,7 @@ "Searchable" : { "addToFilters" : true, "fieldName" : "tags", - "fieldType" : "URN_PARTIAL", + "fieldType" : "URN", "filterNameOverride" : "Tag", "hasValuesFieldName" : "hasTags" } @@ -853,7 +853,7 @@ "Searchable" : { "addToFilters" : true, "fieldName" : "glossaryTerms", - "fieldType" : "URN_PARTIAL", + "fieldType" : "URN", "filterNameOverride" : "Glossary Term" } } ] @@ -2505,20 +2505,21 @@ "items" : { "type" : "record", "name" : "SchemaField", - "doc" : "SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema", + "doc" : "SchemaField to describe metadata related to dataset schema.", "fields" : [ { "name" : "fieldPath", "type" : "com.linkedin.dataset.SchemaFieldPath", - "doc" : "Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above.", + "doc" : "Flattened name of the field. Field is computed from jsonPath field.", "Searchable" : { "fieldName" : "fieldPaths", - "fieldType" : "TEXT_PARTIAL" + "fieldType" : "TEXT" } }, { "name" : "jsonPath", "type" : "string", "doc" : "Flattened name of a field in JSON Path notation.", - "optional" : true + "optional" : true, + "Deprecated" : true }, { "name" : "nullable", "type" : "boolean", @@ -2656,7 +2657,7 @@ "/tags/*/tag" : { "boostScore" : 0.5, "fieldName" : "fieldTags", - "fieldType" : "URN_PARTIAL" + "fieldType" : "URN" } } }, { @@ -2668,7 +2669,7 @@ "/terms/*/urn" : { "boostScore" : 0.5, "fieldName" : "fieldGlossaryTerms", - "fieldType" : "URN_PARTIAL" + "fieldType" : "URN" } } }, { @@ -2826,7 +2827,7 @@ "/tags/*/tag" : { "boostScore" : 0.5, "fieldName" : "editedFieldTags", - "fieldType" : "URN_PARTIAL" + "fieldType" : "URN" } } }, { @@ -2838,7 +2839,7 @@ "/terms/*/urn" : { "boostScore" : 0.5, "fieldName" : "editedFieldGlossaryTerms", - "fieldType" : "URN_PARTIAL" + "fieldType" : "URN" } } } ] @@ -4463,10 +4464,93 @@ "keyAspect" : "schemaFieldKey", "name" : "schemaField" } + }, { + "type" : "record", + "name" : "DataHubRetentionSnapshot", + "doc" : "A metadata snapshot for DataHub Access Policy data.", + "fields" : [ { + "name" : "urn", + "type" : "com.linkedin.common.Urn", + "doc" : "URN for the entity the metadata snapshot is associated with." + }, { + "name" : "aspects", + "type" : { + "type" : "array", + "items" : { + "type" : "typeref", + "name" : "DataHubRetentionAspect", + "namespace" : "com.linkedin.metadata.aspect", + "doc" : "A union of all supported metadata aspects for a DataHub access policy.", + "ref" : [ { + "type" : "record", + "name" : "DataHubRetentionKey", + "namespace" : "com.linkedin.metadata.key", + "doc" : "Key for a DataHub Retention", + "fields" : [ { + "name" : "entityName", + "type" : "string", + "doc" : "Entity name to apply retention to. * (or empty) for applying defaults." + }, { + "name" : "aspectName", + "type" : "string", + "doc" : "Aspect name to apply retention to. * (or empty) for applying defaults." + } ], + "Aspect" : { + "name" : "dataHubRetentionKey" + } + }, { + "type" : "record", + "name" : "DataHubRetentionConfig", + "namespace" : "com.linkedin.retention", + "fields" : [ { + "name" : "retention", + "type" : { + "type" : "record", + "name" : "Retention", + "doc" : "Base class that encapsulates different retention policies.\nOnly one of the fields should be set", + "fields" : [ { + "name" : "version", + "type" : { + "type" : "record", + "name" : "VersionBasedRetention", + "doc" : "Keep max N latest records", + "fields" : [ { + "name" : "maxVersions", + "type" : "int" + } ] + }, + "optional" : true + }, { + "name" : "time", + "type" : { + "type" : "record", + "name" : "TimeBasedRetention", + "doc" : "Keep records that are less than X seconds old", + "fields" : [ { + "name" : "maxAgeInSeconds", + "type" : "int" + } ] + }, + "optional" : true + } ] + } + } ], + "Aspect" : { + "name" : "dataHubRetentionConfig" + } + } ] + } + }, + "doc" : "The list of metadata aspects associated with the DataHub access policy." + } ], + "Entity" : { + "keyAspect" : "dataHubRetentionKey", + "name" : "dataHubRetention" + } } ] } } ] - }, "com.linkedin.glossary.GlossaryNodeInfo", "com.linkedin.glossary.GlossaryRelatedTerms", "com.linkedin.glossary.GlossaryTermInfo", "com.linkedin.identity.CorpGroupInfo", "com.linkedin.identity.CorpUserEditableInfo", "com.linkedin.identity.CorpUserInfo", "com.linkedin.identity.CorpUserStatus", "com.linkedin.identity.GroupMembership", "com.linkedin.metadata.aspect.ChartAspect", "com.linkedin.metadata.aspect.CorpGroupAspect", "com.linkedin.metadata.aspect.CorpUserAspect", "com.linkedin.metadata.aspect.DashboardAspect", "com.linkedin.metadata.aspect.DataFlowAspect", "com.linkedin.metadata.aspect.DataHubPolicyAspect", "com.linkedin.metadata.aspect.DataJobAspect", "com.linkedin.metadata.aspect.DataPlatformAspect", "com.linkedin.metadata.aspect.DataProcessAspect", "com.linkedin.metadata.aspect.DatasetAspect", "com.linkedin.metadata.aspect.GlossaryNodeAspect", "com.linkedin.metadata.aspect.GlossaryTermAspect", "com.linkedin.metadata.aspect.MLFeatureAspect", "com.linkedin.metadata.aspect.MLFeatureTableAspect", "com.linkedin.metadata.aspect.MLModelAspect", "com.linkedin.metadata.aspect.MLModelDeploymentAspect", "com.linkedin.metadata.aspect.MLModelGroupAspect", "com.linkedin.metadata.aspect.MLPrimaryKeyAspect", "com.linkedin.metadata.aspect.SchemaFieldAspect", "com.linkedin.metadata.aspect.TagAspect", { + }, "com.linkedin.glossary.GlossaryNodeInfo", "com.linkedin.glossary.GlossaryRelatedTerms", "com.linkedin.glossary.GlossaryTermInfo", "com.linkedin.identity.CorpGroupInfo", "com.linkedin.identity.CorpUserEditableInfo", "com.linkedin.identity.CorpUserInfo", "com.linkedin.identity.CorpUserStatus", "com.linkedin.identity.GroupMembership", "com.linkedin.metadata.aspect.ChartAspect", "com.linkedin.metadata.aspect.CorpGroupAspect", "com.linkedin.metadata.aspect.CorpUserAspect", "com.linkedin.metadata.aspect.DashboardAspect", "com.linkedin.metadata.aspect.DataFlowAspect", "com.linkedin.metadata.aspect.DataHubPolicyAspect", "com.linkedin.metadata.aspect.DataHubRetentionAspect", "com.linkedin.metadata.aspect.DataJobAspect", "com.linkedin.metadata.aspect.DataPlatformAspect", "com.linkedin.metadata.aspect.DataProcessAspect", "com.linkedin.metadata.aspect.DatasetAspect", "com.linkedin.metadata.aspect.GlossaryNodeAspect", "com.linkedin.metadata.aspect.GlossaryTermAspect", "com.linkedin.metadata.aspect.MLFeatureAspect", "com.linkedin.metadata.aspect.MLFeatureTableAspect", "com.linkedin.metadata.aspect.MLModelAspect", "com.linkedin.metadata.aspect.MLModelDeploymentAspect", "com.linkedin.metadata.aspect.MLModelGroupAspect", "com.linkedin.metadata.aspect.MLPrimaryKeyAspect", "com.linkedin.metadata.aspect.SchemaFieldAspect", "com.linkedin.metadata.aspect.TagAspect", { "type" : "record", "name" : "BrowseResult", "namespace" : "com.linkedin.metadata.browse", @@ -4550,7 +4634,7 @@ "type" : "int", "doc" : "The total number of elements (entities + groups) directly under queried path" } ] - }, "com.linkedin.metadata.browse.BrowseResultEntity", "com.linkedin.metadata.browse.BrowseResultGroup", "com.linkedin.metadata.browse.BrowseResultMetadata", "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataHubPolicyKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.DataPlatformKey", "com.linkedin.metadata.key.DataProcessKey", "com.linkedin.metadata.key.DatasetKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLFeatureTableKey", "com.linkedin.metadata.key.MLModelDeploymentKey", "com.linkedin.metadata.key.MLModelGroupKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.MLPrimaryKeyKey", "com.linkedin.metadata.key.SchemaFieldKey", "com.linkedin.metadata.key.TagKey", { + }, "com.linkedin.metadata.browse.BrowseResultEntity", "com.linkedin.metadata.browse.BrowseResultGroup", "com.linkedin.metadata.browse.BrowseResultMetadata", "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataHubPolicyKey", "com.linkedin.metadata.key.DataHubRetentionKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.DataPlatformKey", "com.linkedin.metadata.key.DataProcessKey", "com.linkedin.metadata.key.DatasetKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLFeatureTableKey", "com.linkedin.metadata.key.MLModelDeploymentKey", "com.linkedin.metadata.key.MLModelGroupKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.MLPrimaryKeyKey", "com.linkedin.metadata.key.SchemaFieldKey", "com.linkedin.metadata.key.TagKey", { "type" : "record", "name" : "AutoCompleteResult", "namespace" : "com.linkedin.metadata.query", @@ -4889,7 +4973,7 @@ "type" : "int", "doc" : "The total number of entities directly under searched path" } ] - }, "com.linkedin.metadata.search.SearchResultMetadata", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { + }, "com.linkedin.metadata.search.SearchResultMetadata", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { "type" : "record", "name" : "SystemMetadata", "namespace" : "com.linkedin.mxe", @@ -4925,7 +5009,7 @@ "doc" : "Additional properties", "optional" : true } ] - }, "com.linkedin.policy.DataHubActorFilter", "com.linkedin.policy.DataHubPolicyInfo", "com.linkedin.policy.DataHubResourceFilter", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ], + }, "com.linkedin.policy.DataHubActorFilter", "com.linkedin.policy.DataHubPolicyInfo", "com.linkedin.policy.DataHubResourceFilter", "com.linkedin.retention.DataHubRetentionConfig", "com.linkedin.retention.Retention", "com.linkedin.retention.TimeBasedRetention", "com.linkedin.retention.VersionBasedRetention", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ], "schema" : { "name" : "entities", "namespace" : "com.linkedin.entity", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index 79480a199a7c6d..4dbbeaec6bf5dc 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -556,7 +556,7 @@ "Searchable" : { "addToFilters" : true, "fieldName" : "tags", - "fieldType" : "URN_PARTIAL", + "fieldType" : "URN", "filterNameOverride" : "Tag", "hasValuesFieldName" : "hasTags" } @@ -633,7 +633,7 @@ "Searchable" : { "addToFilters" : true, "fieldName" : "glossaryTerms", - "fieldType" : "URN_PARTIAL", + "fieldType" : "URN", "filterNameOverride" : "Glossary Term" } } ] @@ -1862,20 +1862,21 @@ "items" : { "type" : "record", "name" : "SchemaField", - "doc" : "SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema", + "doc" : "SchemaField to describe metadata related to dataset schema.", "fields" : [ { "name" : "fieldPath", "type" : "com.linkedin.dataset.SchemaFieldPath", - "doc" : "Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above.", + "doc" : "Flattened name of the field. Field is computed from jsonPath field.", "Searchable" : { "fieldName" : "fieldPaths", - "fieldType" : "TEXT_PARTIAL" + "fieldType" : "TEXT" } }, { "name" : "jsonPath", "type" : "string", "doc" : "Flattened name of a field in JSON Path notation.", - "optional" : true + "optional" : true, + "Deprecated" : true }, { "name" : "nullable", "type" : "boolean", @@ -2013,7 +2014,7 @@ "/tags/*/tag" : { "boostScore" : 0.5, "fieldName" : "fieldTags", - "fieldType" : "URN_PARTIAL" + "fieldType" : "URN" } } }, { @@ -2025,7 +2026,7 @@ "/terms/*/urn" : { "boostScore" : 0.5, "fieldName" : "fieldGlossaryTerms", - "fieldType" : "URN_PARTIAL" + "fieldType" : "URN" } } }, { @@ -2183,7 +2184,7 @@ "/tags/*/tag" : { "boostScore" : 0.5, "fieldName" : "editedFieldTags", - "fieldType" : "URN_PARTIAL" + "fieldType" : "URN" } } }, { @@ -2195,7 +2196,7 @@ "/terms/*/urn" : { "boostScore" : 0.5, "fieldName" : "editedFieldGlossaryTerms", - "fieldType" : "URN_PARTIAL" + "fieldType" : "URN" } } } ] diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java index e294fe4ca3692f..2fa428aa37046e 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java @@ -355,7 +355,7 @@ public String ingestProposal(@Nonnull MetadataChangeProposal metadataChangePropo final List additionalChanges = AspectUtils.getAdditionalChanges(metadataChangeProposal, _entityService); - Urn urn = _entityService.ingestProposal(metadataChangeProposal, auditStamp); + Urn urn = _entityService.ingestProposal(metadataChangeProposal, auditStamp).getUrn(); additionalChanges.forEach(proposal -> _entityService.ingestProposal(proposal, auditStamp)); return urn.toString(); } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index e61a2b818855d5..eb6c0428b5b8c1 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -132,7 +132,7 @@ public Task ingestProposal( return RestliUtil.toTask(() -> { log.debug("Proposal: {}", metadataChangeProposal); try { - Urn urn = _entityService.ingestProposal(metadataChangeProposal, auditStamp); + Urn urn = _entityService.ingestProposal(metadataChangeProposal, auditStamp).getUrn(); additionalChanges.forEach(proposal -> _entityService.ingestProposal(proposal, auditStamp)); return urn.toString(); } catch (ValidationException e) { diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index 10a5df70c458bb..ccbadb931482ed 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -58,9 +58,22 @@ import lombok.extern.slf4j.Slf4j; import org.apache.maven.artifact.versioning.ComparableVersion; -import static com.linkedin.metadata.entity.ValidationUtils.*; -import static com.linkedin.metadata.restli.RestliConstants.*; -import static com.linkedin.metadata.utils.PegasusUtils.*; +import static com.linkedin.metadata.entity.ValidationUtils.validateOrThrow; +import static com.linkedin.metadata.restli.RestliConstants.ACTION_AUTOCOMPLETE; +import static com.linkedin.metadata.restli.RestliConstants.ACTION_BROWSE; +import static com.linkedin.metadata.restli.RestliConstants.ACTION_GET_BROWSE_PATHS; +import static com.linkedin.metadata.restli.RestliConstants.ACTION_INGEST; +import static com.linkedin.metadata.restli.RestliConstants.PARAM_ASPECTS; +import static com.linkedin.metadata.restli.RestliConstants.PARAM_FIELD; +import static com.linkedin.metadata.restli.RestliConstants.PARAM_FILTER; +import static com.linkedin.metadata.restli.RestliConstants.PARAM_INPUT; +import static com.linkedin.metadata.restli.RestliConstants.PARAM_LIMIT; +import static com.linkedin.metadata.restli.RestliConstants.PARAM_PATH; +import static com.linkedin.metadata.restli.RestliConstants.PARAM_QUERY; +import static com.linkedin.metadata.restli.RestliConstants.PARAM_SORT; +import static com.linkedin.metadata.restli.RestliConstants.PARAM_START; +import static com.linkedin.metadata.restli.RestliConstants.PARAM_URN; +import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; /** diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java index 5717d24c9b6f67..86da4b37dda156 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java @@ -24,6 +24,7 @@ public class Config extends HttpServlet { Map config = new HashMap() {{ put("noCode", "true"); + put("retention", "true"); }}; ObjectMapper objectMapper = new ObjectMapper().setSerializationInclusion(JsonInclude.Include.NON_NULL); @@ -51,8 +52,7 @@ protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws IO PrintWriter out = resp.getWriter(); try { - Map config = new HashMap<>(); - config.put("noCode", "true"); + Map config = new HashMap<>(this.config); Map> pluginTree = getPluginModels(req.getServletContext()); config.put("models", pluginTree); diff --git a/metadata-service/war/src/main/resources/boot/retention.yaml b/metadata-service/war/src/main/resources/boot/retention.yaml new file mode 100644 index 00000000000000..e564a9124aa09e --- /dev/null +++ b/metadata-service/war/src/main/resources/boot/retention.yaml @@ -0,0 +1,14 @@ +- entity: "*" + aspect: "*" + config: + retention: + version: + maxVersions: 20 +#- entity: dataset +# aspect: datasetProperties +# config: +# retention: +# version: +# maxVersions: 10 +# time: +# maxAgeInSeconds: 2592000 # 30 days \ No newline at end of file diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/Constants.java b/metadata-utils/src/main/java/com/linkedin/metadata/Constants.java index f8f4ffe7891b22..07b8e9dfb23b61 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -29,11 +29,11 @@ public class Constants { public static final String CORP_USER_STATUS_ASPECT_NAME = "corpUserStatus"; public static final String CORP_USER_KEY_ASPECT_NAME = "corpUserKey"; - /** * User Status */ public static final String CORP_USER_STATUS_ACTIVE = "ACTIVE"; - private Constants() { } + private Constants() { + } } diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityKeyUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityKeyUtils.java index b6ddcb82cd707b..14b2bfd7056232 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityKeyUtils.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/EntityKeyUtils.java @@ -139,7 +139,7 @@ public static Urn convertEntityKeyToUrn(@Nonnull final RecordTemplate keyAspect, final List urnParts = new ArrayList<>(); for (RecordDataSchema.Field field : keyAspect.schema().getFields()) { Object value = keyAspect.data().get(field.getName()); - String valueString = value.toString(); + String valueString = value == null ? "" : value.toString(); urnParts.add(valueString); // TODO: Determine whether all fields, including urns, should be URL encoded. } return Urn.createFromTuple(entityName, urnParts); diff --git a/perf-test/locustfiles/ingest.py b/perf-test/locustfiles/ingest.py index 0f7535a68476c7..0aa15a3b78b198 100644 --- a/perf-test/locustfiles/ingest.py +++ b/perf-test/locustfiles/ingest.py @@ -10,11 +10,15 @@ ) from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProperties from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot -from locust import HttpUser, between, task +from locust import HttpUser, constant, task class IngestUser(HttpUser): - wait_time = between(1, 5) + wait_time = constant(1) + + @task + def config(self): + self.client.get("/config") @task def ingest(self): diff --git a/settings.gradle b/settings.gradle index 36d3b3d8c499b4..f654dd1275ebbe 100644 --- a/settings.gradle +++ b/settings.gradle @@ -39,3 +39,4 @@ include 'metadata-perf' include 'docs-website' include 'metadata-models-custom' include 'entity-registry:custom-test-model' +include 'spark-lineage' diff --git a/smoke-test/smoke.sh b/smoke-test/smoke.sh index 4169c1709eae56..d6c69f8b6ca134 100755 --- a/smoke-test/smoke.sh +++ b/smoke-test/smoke.sh @@ -23,4 +23,6 @@ datahub docker quickstart \ --quickstart-compose-file ../docker/docker-compose.dev.yml \ --dump-logs-on-failure +(cd tests/cypress ; yarn install) + pytest -vv --continue-on-collection-errors --junit-xml=junit.smoke.xml diff --git a/metadata-ingestion/src/datahub/integrations/__init__.py b/smoke-test/tests/cypress/__init__.py similarity index 100% rename from metadata-ingestion/src/datahub/integrations/__init__.py rename to smoke-test/tests/cypress/__init__.py diff --git a/smoke-test/tests/cypress/cypress.json b/smoke-test/tests/cypress/cypress.json new file mode 100644 index 00000000000000..33003d2939f4dd --- /dev/null +++ b/smoke-test/tests/cypress/cypress.json @@ -0,0 +1,3 @@ +{ + "baseUrl": "http://localhost:9002" +} diff --git a/smoke-test/tests/cypress/cypress/fixtures/example.json b/smoke-test/tests/cypress/cypress/fixtures/example.json new file mode 100644 index 00000000000000..02e4254378e978 --- /dev/null +++ b/smoke-test/tests/cypress/cypress/fixtures/example.json @@ -0,0 +1,5 @@ +{ + "name": "Using fixtures to represent data", + "email": "hello@cypress.io", + "body": "Fixtures are a great way to mock data for responses to routes" +} diff --git a/smoke-test/tests/cypress/cypress/integration/login/login.js b/smoke-test/tests/cypress/cypress/integration/login/login.js new file mode 100644 index 00000000000000..d5799c52c9ab58 --- /dev/null +++ b/smoke-test/tests/cypress/cypress/integration/login/login.js @@ -0,0 +1,9 @@ +describe('login', () => { + it('logs in', () => { + cy.visit('/'); + cy.get('input[placeholder=Username]').type('datahub'); + cy.get('input[placeholder=Password]').type('datahub'); + cy.contains('Log in').click(); + cy.contains('Welcome back, datahub'); + }); +}) diff --git a/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js b/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js new file mode 100644 index 00000000000000..4a5b4eb8924c4a --- /dev/null +++ b/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js @@ -0,0 +1,40 @@ +describe('mutations', () => { + it('can create and add a tag to dataset and visit new tag page', () => { + cy.deleteUrn('urn:li:tag:CypressTestAddTag') + cy.login(); + cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)'); + cy.contains('cypress_logging_events'); + + cy.contains('Add Tag').click(); + + cy.focused().type('CypressTestAddTag'); + + cy.contains('Create CypressTestAddTag').click(); + + cy.get('textarea').type('CypressTestAddTag Test Description'); + + cy.contains(/Create$/).click(); + + // go to tag page + cy.get('a[href="/tag/urn:li:tag:CypressTestAddTag"]').click(); + + // title of tag page + cy.contains('CypressTestAddTag'); + + // description of tag page + cy.contains('CypressTestAddTag Test Description'); + + // used by panel - click to search + cy.contains('1 Datasets').click(); + + // verify dataset shows up in search now + cy.contains('of 1 result').click(); + cy.contains('cypress_logging_events').click(); + cy.get('a[href="/tag/urn:li:tag:CypressTestAddTag"]').within(() => cy.get('span[aria-label=close]').click()); + cy.contains('Yes').click(); + + cy.get('a[href="/tag/urn:li:tag:CypressTestAddTag"]').should('not.exist'); + + cy.deleteUrn('urn:li:tag:CypressTestAddTag') + }); +}) diff --git a/smoke-test/tests/cypress/cypress/integration/search/search.js b/smoke-test/tests/cypress/cypress/integration/search/search.js new file mode 100644 index 00000000000000..a4d0a6238da246 --- /dev/null +++ b/smoke-test/tests/cypress/cypress/integration/search/search.js @@ -0,0 +1,44 @@ +describe('search', () => { + it('can hit all entities search, see some results (testing this any more is tricky because it is cached for now)', () => { + cy.login(); + cy.visit('/'); + cy.get('input[data-testid=search-input]').type('*{enter}'); + cy.contains('of 0 results').should('not.exist'); + cy.contains(/of [0-9]+ results/); + }); + + it('can hit all entities search with an impossible query and find 0 results', () => { + cy.login(); + cy.visit('/'); + // random string that is unlikely to accidentally have a match + cy.get('input[data-testid=search-input]').type('zzzzzzzzzzzzzqqqqqqqqqqqqqzzzzzzqzqzqzqzq{enter}'); + cy.contains('of 0 results'); + }); + + it('can search, find a result, and visit the dataset page', () => { + cy.login(); + cy.visit('http://localhost:9002/search?filter_entity=DATASET&filter_tags=urn%3Ali%3Atag%3ACypress&page=1&query=users_created') + cy.contains('of 1 result'); + + cy.contains('Cypress') + + cy.contains('fct_cypress_users_created').click(); + + // platform + cy.contains('Hive'); + + // entity type + cy.contains('Dataset'); + + // entity name + cy.contains('fct_cypress_users_created'); + + // column name + cy.contains('user_id'); + // column description + cy.contains('Id of the user'); + + // table description + cy.contains('table containing all the users created on a single day'); + }); +}) \ No newline at end of file diff --git a/smoke-test/tests/cypress/cypress/plugins/index.js b/smoke-test/tests/cypress/cypress/plugins/index.js new file mode 100644 index 00000000000000..59b2bab6e4e605 --- /dev/null +++ b/smoke-test/tests/cypress/cypress/plugins/index.js @@ -0,0 +1,22 @@ +/// +// *********************************************************** +// This example plugins/index.js can be used to load plugins +// +// You can change the location of this file or turn off loading +// the plugins file with the 'pluginsFile' configuration option. +// +// You can read more here: +// https://on.cypress.io/plugins-guide +// *********************************************************** + +// This function is called when a project is opened or re-opened (e.g. due to +// the project's config changing) + +/** + * @type {Cypress.PluginConfig} + */ +// eslint-disable-next-line no-unused-vars +module.exports = (on, config) => { + // `on` is used to hook into various events Cypress emits + // `config` is the resolved Cypress config +} diff --git a/smoke-test/tests/cypress/cypress/support/commands.js b/smoke-test/tests/cypress/cypress/support/commands.js new file mode 100644 index 00000000000000..74b071682c3f0d --- /dev/null +++ b/smoke-test/tests/cypress/cypress/support/commands.js @@ -0,0 +1,39 @@ +// *********************************************** +// This example commands.js shows you how to +// create various custom commands and overwrite +// existing commands. +// +// For more comprehensive examples of custom +// commands please read more here: +// https://on.cypress.io/custom-commands +// *********************************************** +// +// +// -- This is a parent command -- +Cypress.Commands.add('login', () => { + cy.request('POST', '/logIn', { + username: 'datahub', + password: 'datahub', + }) +}) + +Cypress.Commands.add('deleteUrn', (urn) => { + cy.request({ method: 'POST', url: 'http://localhost:8080/entities?action=delete', body: { + urn + }, headers: { + "X-RestLi-Protocol-Version": "2.0.0", + "Content-Type": "application/json", + }}) +}) +// +// +// -- This is a child command -- +// Cypress.Commands.add('drag', { prevSubject: 'element'}, (subject, options) => { ... }) +// +// +// -- This is a dual command -- +// Cypress.Commands.add('dismiss', { prevSubject: 'optional'}, (subject, options) => { ... }) +// +// +// -- This will overwrite an existing command -- +// Cypress.Commands.overwrite('visit', (originalFn, url, options) => { ... }) diff --git a/smoke-test/tests/cypress/cypress/support/index.js b/smoke-test/tests/cypress/cypress/support/index.js new file mode 100644 index 00000000000000..d68db96df2697e --- /dev/null +++ b/smoke-test/tests/cypress/cypress/support/index.js @@ -0,0 +1,20 @@ +// *********************************************************** +// This example support/index.js is processed and +// loaded automatically before your test files. +// +// This is a great place to put global configuration and +// behavior that modifies Cypress. +// +// You can change the location of this file or turn off +// automatically serving support files with the +// 'supportFile' configuration option. +// +// You can read more here: +// https://on.cypress.io/configuration +// *********************************************************** + +// Import commands.js using ES2015 syntax: +import './commands' + +// Alternatively you can use CommonJS syntax: +// require('./commands') diff --git a/smoke-test/tests/cypress/data.json b/smoke-test/tests/cypress/data.json new file mode 100644 index 00000000000000..22362837df6635 --- /dev/null +++ b/smoke-test/tests/cypress/data.json @@ -0,0 +1,1560 @@ +[ + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": ["/prod/kafka/SampleKafkaDataset"] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "description": null, + "uri": null, + "tags": [], + "customProperties": { + "prop1": "fakeprop", + "prop2": "pikachu" + } + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jdoe", + "type": "DATAOWNER", + "source": null + }, + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.common.InstitutionalMemory": { + "elements": [ + { + "url": "https://www.linkedin.com", + "description": "Sample doc", + "createStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "SampleKafkaSchema", + "platform": "urn:li:dataPlatform:kafka", + "version": 0, + "created": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.KafkaSchema": { + "documentSchema": "{\"type\":\"record\",\"name\":\"SampleKafkaSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample Kafka dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[type=boolean].field_foo_2", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Foo field description" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "varchar(100)", + "globalTags": { + "tags": [{ "tag": "urn:li:tag:NeedsDocumentation" }] + }, + "recursive": false + }, + { + "fieldPath": "[version=2.0].[type=boolean].field_bar", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Bar field description" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false + }, + { + "fieldPath": "[version=2.0].[key=True].[type=int].id", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Id specifying which partition the message should go to" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [{ "tag": "urn:li:tag:Cypress" }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": ["/prod/hdfs/SampleCypressHdfsDataset"] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jdoe", + "type": "DATAOWNER", + "source": null + }, + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.InstitutionalMemory": { + "elements": [ + { + "url": "https://www.linkedin.com", + "description": "Sample doc", + "createStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.EditableSchemaMetadata": { + "created": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "deleted": null, + "editableSchemaFieldInfo": [ + { + "fieldPath": "shipment_info", + "globalTags": { "tags": [{ "tag": "urn:li:tag:Legacy" }] } + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "SampleHdfsSchema", + "platform": "urn:li:dataPlatform:hdfs", + "version": 0, + "created": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.KafkaSchema": { + "documentSchema": "{\"type\":\"record\",\"name\":\"SampleHdfsSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample HDFS dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}" + } + }, + "fields": [ + { + "fieldPath": "shipment_info", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Shipment info description" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "varchar(100)", + "recursive": false + }, + { + "fieldPath": "shipment_info.date", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Shipment info date description" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "Date", + "recursive": false + }, + { + "fieldPath": "shipment_info.target", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Shipment info target description" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false + }, + { + "fieldPath": "shipment_info.destination", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Shipment info destination description" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "varchar(100)", + "recursive": false + }, + { + "fieldPath": "shipment_info.geo_info", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Shipment info geo_info description" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "varchar(100)", + "recursive": false + }, + { + "fieldPath": "shipment_info.geo_info.lat", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Shipment info geo_info lat" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "float", + "recursive": false + }, + { + "fieldPath": "shipment_info.geo_info.lng", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Shipment info geo_info lng" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "float", + "recursive": false + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [{ "tag": "urn:li:tag:Cypress" }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jdoe", + "type": "DATAOWNER", + "source": null + }, + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.InstitutionalMemory": { + "elements": [ + { + "url": "https://www.linkedin.com", + "description": "Sample doc", + "createStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "SampleHiveSchema", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.KafkaSchema": { + "documentSchema": "{\"type\":\"record\",\"name\":\"SampleHiveSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample Hive dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}" + } + }, + "fields": [ + { + "fieldPath": "field_foo", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Foo field description" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "varchar(100)", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "field_bar", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Bar field description" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [{ "tag": "urn:li:tag:Cypress" }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "description": "table where each row represents a single log event", + "uri": null, + "tags": [], + "customProperties": { + "encoding": "utf-8" + } + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jdoe", + "type": "DATAOWNER", + "source": null + }, + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.InstitutionalMemory": { + "elements": [ + { + "url": "https://www.linkedin.com", + "description": "Sample doc", + "createStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "SampleHiveSchema", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.KafkaSchema": { + "documentSchema": "{\"type\":\"record\",\"name\":\"SampleHiveSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample Hive dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}" + } + }, + "fields": [ + { + "fieldPath": "event_name", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Name of your logging event" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "varchar(100)", + "recursive": false + }, + { + "fieldPath": "event_data", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Data of your event" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false + }, + { + "fieldPath": "timestamp", + "jsonPath": null, + "nullable": false, + "description": { + "string": "TS the event was ingested" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false + }, + { + "fieldPath": "browser", + "jsonPath": null, + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [{ "tag": "urn:li:tag:Cypress" }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created_no_tag,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "description": "table containing all the users created on a single day", + "uri": null, + "tags": [], + "customProperties": { + "encoding": "utf-8" + } + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jdoe", + "type": "DATAOWNER", + "source": null + }, + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.InstitutionalMemory": { + "elements": [ + { + "url": "https://www.linkedin.com", + "description": "Sample doc", + "createStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "SampleHiveSchema", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.KafkaSchema": { + "documentSchema": "{\"type\":\"record\",\"name\":\"SampleHiveSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample Hive dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}" + } + }, + "fields": [ + { + "fieldPath": "user_id", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Id of the user created" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "varchar(100)", + "recursive": false + }, + { + "fieldPath": "user_name", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Name of the user who signed up" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": [{ + "name": "user id", + "foreignFields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD),user_id)" + ], + "sourceFields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD),user_id)" + ], + "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)" + }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "description": "table containing all the users created on a single day", + "uri": null, + "tags": [], + "customProperties": { + "encoding": "utf-8" + } + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jdoe", + "type": "DATAOWNER", + "source": null + }, + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.InstitutionalMemory": { + "elements": [ + { + "url": "https://www.linkedin.com", + "description": "Sample doc", + "createStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "SampleHiveSchema", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.KafkaSchema": { + "documentSchema": "{\"type\":\"record\",\"name\":\"SampleHiveSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample Hive dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}" + } + }, + "fields": [ + { + "fieldPath": "user_id", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Id of the user created" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "varchar(100)", + "recursive": false + }, + { + "fieldPath": "user_name", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Name of the user who signed up" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": [{ + "name": "user id", + "foreignFields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD),user_id)" + ], + "sourceFields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD),user_id)" + ], + "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)" + }] + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [{ "tag": "urn:li:tag:Cypress" }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "description": "table containing all the users deleted on a single day", + "uri": null, + "tags": [], + "customProperties": { + "encoding": "utf-8" + } + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jdoe", + "type": "DATAOWNER", + "source": null + }, + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.InstitutionalMemory": { + "elements": [ + { + "url": "https://www.linkedin.com", + "description": "Sample doc", + "createStamp": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "SampleHiveSchema", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.KafkaSchema": { + "documentSchema": "{\"type\":\"record\",\"name\":\"SampleHiveSchema\",\"namespace\":\"com.linkedin.dataset\",\"doc\":\"Sample Hive dataset\",\"fields\":[{\"name\":\"field_foo\",\"type\":[\"string\"]},{\"name\":\"field_bar\",\"type\":[\"boolean\"]}]}" + } + }, + "fields": [ + { + "fieldPath": "user_name", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Name of the user who was deleted" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "varchar(100)", + "recursive": false + }, + { + "fieldPath": "timestamp", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Timestamp user was deleted at" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false + }, + { + "fieldPath": "user_id", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Id of the user deleted" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "varchar(100)", + "recursive": false + }, + { + "fieldPath": "browser_id", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Cookie attached to identify the browser" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "varchar(100)", + "recursive": false + }, + { + "fieldPath": "session_id", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Cookie attached to identify the session" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "varchar(100)", + "recursive": false + }, + { + "fieldPath": "deletion_reason", + "jsonPath": null, + "nullable": false, + "description": { + "string": "Why the user chose to deactivate" + }, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "varchar(100)", + "recursive": false + } + ], + "primaryKeys": ["user_name"], + "foreignKeysSpecs": null, + "foreignKeys": [{ + "name": "user session", + "foreignFields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD),user_id)" + ], + "sourceFields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD),user_id)" + ], + "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)" + }] + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [{ "tag": "urn:li:tag:Cypress" }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_123)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:datahub", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataJobInfo": { + "name": "User Creations", + "description": "Constructs the fct_users_created from logging_events", + "type": "SQL", + "flowUrn": "urn:li:dataFlow:(airflow,cypress_dag_abc,PROD)" + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [{ "tag": "urn:li:tag:Cypress" }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_456)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:datahub", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataJobInfo": { + "name": "User Deletions", + "description": "Constructs the fct_users_deleted from logging_events", + "type": "SQL", + "flowUrn": "urn:li:dataFlow:(airflow,cypress_dag_abc,PROD)" + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)" + ], + "inputDatajobs": [ + "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_123)" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [{ "tag": "urn:li:tag:Cypress" }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { + "urn": "urn:li:dataFlow:(airflow,cypress_dag_abc,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:datahub", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { + "name": "Users", + "description": "Constructs the fct_users_deleted and fct_users_created tables", + "project": null + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [{ "tag": "urn:li:tag:Cypress" }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { + "urn": "urn:li:chart:(looker,cypress_baz1)", + "aspects": [ + { + "com.linkedin.pegasus2avro.chart.ChartInfo": { + "title": "Baz Chart 1", + "description": "Baz Chart 1", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:datahub", + "impersonator": null + }, + "deleted": null + }, + "chartUrl": null, + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)" + ], + "type": null, + "access": null, + "lastRefreshed": null + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [{ "tag": "urn:li:tag:Cypress" }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { + "urn": "urn:li:chart:(looker,cypress_baz2)", + "aspects": [ + { + "com.linkedin.pegasus2avro.chart.ChartInfo": { + "title": "Baz Chart 2", + "description": "Baz Chart 2", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:datahub", + "impersonator": null + }, + "deleted": null + }, + "chartUrl": null, + "inputs": { + "array": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)" + } + ] + }, + "type": null, + "access": null, + "lastRefreshed": null + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [{ "tag": "urn:li:tag:Cypress" }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { + "urn": "urn:li:dashboard:(looker,cypress_baz)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpGroup:bfoo", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { + "title": "Baz Dashboard", + "description": "Baz Dashboard", + "customProperties": { + "prop1": "fakeprop", + "prop2": "pikachu" + }, + "charts": [ + "urn:li:chart:(looker,cypress_baz1)", + "urn:li:chart:(looker,cypress_baz2)" + ], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:datahub", + "impersonator": null + }, + "deleted": null + }, + "dashboardUrl": null, + "access": null, + "lastRefreshed": null + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [{ "tag": "urn:li:tag:Cypress" }] + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:CypressNode.CypressTerm", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "definition": "a product provided to consumers and businesses by a bank or similar depository institution such as a checking account, savings account, certificate of deposit, debit or pre-paid card, or credit card", + "parentNode": "urn:li:glossaryNode:CypressNode", + "sourceRef": "FIBO", + "termSource": "EXTERNAL", + "sourceUrl": "https://spec.edmcouncil.org/fibo/ontology/FBC/FunctionalEntities/FinancialServicesEntities/BankingProduct", + "customProperties": { + "FQDN": "SavingAccount" + } + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [{ + "owner": "urn:li:corpuser:jdoe", + "type": "DATAOWNER" + }], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe" + } + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": { + "urn": "urn:li:glossaryNode:CypressNode", + "aspects": [{ + "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": { + "definition": "Provides basic concepts such as account, account holder, account provider, relationship manager that are commonly used by financial services providers to describe customers and to determine counterparty identities" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [{ + "owner": "urn:li:corpuser:jdoe", + "type": "DATAOWNER" + }], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe" + } + } + } + ] + } + }, + "proposedDelta": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Cypress", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Cypress", + "description": "Indicates the entity is for cypress integration test purposes" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jdoe", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe", + "impersonator": null + } + } + } + ] + } + }, + "proposedDelta": null + } +] \ No newline at end of file diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py new file mode 100644 index 00000000000000..2eb716fbf75b12 --- /dev/null +++ b/smoke-test/tests/cypress/integration_test.py @@ -0,0 +1,28 @@ +import pytest +import subprocess + +from tests.utils import ingest_file_via_rest +from tests.utils import delete_urns_from_file + + +@pytest.fixture(scope="module", autouse=True) +def ingest_cleanup_data(): + print("ingesting test data") + ingest_file_via_rest("tests/cypress/data.json") + yield + print("removing test data") + delete_urns_from_file("tests/cypress/data.json") + + +def test_run_cypress(frontend_session, wait_for_healthchecks): + command = f"npx cypress run" + print('starting?') + proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd="tests/cypress") + stdout = proc.stdout.read() + stderr = proc.stderr.read() + return_code = proc.wait() + print(stdout.decode("utf-8")) + print('stderr output:') + print(stderr.decode("utf-8")) + print('return code', return_code) + assert(return_code == 0) diff --git a/smoke-test/tests/cypress/package.json b/smoke-test/tests/cypress/package.json new file mode 100644 index 00000000000000..499b4854f0aefe --- /dev/null +++ b/smoke-test/tests/cypress/package.json @@ -0,0 +1,9 @@ +{ + "name": "smoke-test", + "version": "1.0.0", + "main": "index.js", + "license": "MIT", + "devDependencies": { + "cypress": "^9.1.0" + } +} diff --git a/smoke-test/tests/cypress/yarn.lock b/smoke-test/tests/cypress/yarn.lock new file mode 100644 index 00000000000000..e4a05529966139 --- /dev/null +++ b/smoke-test/tests/cypress/yarn.lock @@ -0,0 +1,1112 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +"@cypress/request@^2.88.7": + version "2.88.10" + resolved "https://registry.yarnpkg.com/@cypress/request/-/request-2.88.10.tgz#b66d76b07f860d3a4b8d7a0604d020c662752cce" + integrity sha512-Zp7F+R93N0yZyG34GutyTNr+okam7s/Fzc1+i3kcqOP8vk6OuajuE9qZJ6Rs+10/1JFtXFYMdyarnU1rZuJesg== + dependencies: + aws-sign2 "~0.7.0" + aws4 "^1.8.0" + caseless "~0.12.0" + combined-stream "~1.0.6" + extend "~3.0.2" + forever-agent "~0.6.1" + form-data "~2.3.2" + http-signature "~1.3.6" + is-typedarray "~1.0.0" + isstream "~0.1.2" + json-stringify-safe "~5.0.1" + mime-types "~2.1.19" + performance-now "^2.1.0" + qs "~6.5.2" + safe-buffer "^5.1.2" + tough-cookie "~2.5.0" + tunnel-agent "^0.6.0" + uuid "^8.3.2" + +"@cypress/xvfb@^1.2.4": + version "1.2.4" + resolved "https://registry.yarnpkg.com/@cypress/xvfb/-/xvfb-1.2.4.tgz#2daf42e8275b39f4aa53c14214e557bd14e7748a" + integrity sha512-skbBzPggOVYCbnGgV+0dmBdW/s77ZkAOXIC1knS8NagwDjBrNC1LuXtQJeiN6l+m7lzmHtaoUw/ctJKdqkG57Q== + dependencies: + debug "^3.1.0" + lodash.once "^4.1.1" + +"@types/node@*": + version "16.11.11" + resolved "https://registry.yarnpkg.com/@types/node/-/node-16.11.11.tgz#6ea7342dfb379ea1210835bada87b3c512120234" + integrity sha512-KB0sixD67CeecHC33MYn+eYARkqTheIRNuu97y2XMjR7Wu3XibO1vaY6VBV6O/a89SPI81cEUIYT87UqUWlZNw== + +"@types/node@^14.14.31": + version "14.17.34" + resolved "https://registry.yarnpkg.com/@types/node/-/node-14.17.34.tgz#fe4b38b3f07617c0fa31ae923fca9249641038f0" + integrity sha512-USUftMYpmuMzeWobskoPfzDi+vkpe0dvcOBRNOscFrGxVp4jomnRxWuVohgqBow2xyIPC0S3gjxV/5079jhmDg== + +"@types/sinonjs__fake-timers@^6.0.2": + version "6.0.4" + resolved "https://registry.yarnpkg.com/@types/sinonjs__fake-timers/-/sinonjs__fake-timers-6.0.4.tgz#0ecc1b9259b76598ef01942f547904ce61a6a77d" + integrity sha512-IFQTJARgMUBF+xVd2b+hIgXWrZEjND3vJtRCvIelcFB5SIXfjV4bOHbHJ0eXKh+0COrBRc8MqteKAz/j88rE0A== + +"@types/sizzle@^2.3.2": + version "2.3.3" + resolved "https://registry.yarnpkg.com/@types/sizzle/-/sizzle-2.3.3.tgz#ff5e2f1902969d305225a047c8a0fd5c915cebef" + integrity sha512-JYM8x9EGF163bEyhdJBpR2QX1R5naCJHC8ucJylJ3w9/CVBaskdQ8WqBf8MmQrd1kRvp/a4TS8HJ+bxzR7ZJYQ== + +"@types/yauzl@^2.9.1": + version "2.9.2" + resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.9.2.tgz#c48e5d56aff1444409e39fa164b0b4d4552a7b7a" + integrity sha512-8uALY5LTvSuHgloDVUvWP3pIauILm+8/0pDMokuDYIoNsOkSwd5AiHBTSEJjKTDcZr5z8UpgOWZkxBF4iJftoA== + dependencies: + "@types/node" "*" + +aggregate-error@^3.0.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/aggregate-error/-/aggregate-error-3.1.0.tgz#92670ff50f5359bdb7a3e0d40d0ec30c5737687a" + integrity sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA== + dependencies: + clean-stack "^2.0.0" + indent-string "^4.0.0" + +ansi-colors@^4.1.1: + version "4.1.1" + resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-4.1.1.tgz#cbb9ae256bf750af1eab344f229aa27fe94ba348" + integrity sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA== + +ansi-escapes@^4.3.0: + version "4.3.2" + resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-4.3.2.tgz#6b2291d1db7d98b6521d5f1efa42d0f3a9feb65e" + integrity sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ== + dependencies: + type-fest "^0.21.3" + +ansi-regex@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304" + integrity sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ== + +ansi-styles@^4.0.0, ansi-styles@^4.1.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937" + integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg== + dependencies: + color-convert "^2.0.1" + +arch@^2.2.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/arch/-/arch-2.2.0.tgz#1bc47818f305764f23ab3306b0bfc086c5a29d11" + integrity sha512-Of/R0wqp83cgHozfIYLbBMnej79U/SVGOOyuB3VVFv1NRM/PSFMK12x9KVtiYzJqmnU5WR2qp0Z5rHb7sWGnFQ== + +asn1@~0.2.3: + version "0.2.6" + resolved "https://registry.yarnpkg.com/asn1/-/asn1-0.2.6.tgz#0d3a7bb6e64e02a90c0303b31f292868ea09a08d" + integrity sha512-ix/FxPn0MDjeyJ7i/yoHGFt/EX6LyNbxSEhPPXODPL+KB0VPk86UYfL0lMdy+KCnv+fmvIzySwaK5COwqVbWTQ== + dependencies: + safer-buffer "~2.1.0" + +assert-plus@1.0.0, assert-plus@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/assert-plus/-/assert-plus-1.0.0.tgz#f12e0f3c5d77b0b1cdd9146942e4e96c1e4dd525" + integrity sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU= + +astral-regex@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/astral-regex/-/astral-regex-2.0.0.tgz#483143c567aeed4785759c0865786dc77d7d2e31" + integrity sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ== + +async@^3.2.0: + version "3.2.2" + resolved "https://registry.yarnpkg.com/async/-/async-3.2.2.tgz#2eb7671034bb2194d45d30e31e24ec7e7f9670cd" + integrity sha512-H0E+qZaDEfx/FY4t7iLRv1W2fFI6+pyCeTw1uN20AQPiwqwM6ojPxHxdLv4z8hi2DtnW9BOckSspLucW7pIE5g== + +asynckit@^0.4.0: + version "0.4.0" + resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79" + integrity sha1-x57Zf380y48robyXkLzDZkdLS3k= + +at-least-node@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/at-least-node/-/at-least-node-1.0.0.tgz#602cd4b46e844ad4effc92a8011a3c46e0238dc2" + integrity sha512-+q/t7Ekv1EDY2l6Gda6LLiX14rU9TV20Wa3ofeQmwPFZbOMo9DXrLbOjFaaclkXKWidIaopwAObQDqwWtGUjqg== + +aws-sign2@~0.7.0: + version "0.7.0" + resolved "https://registry.yarnpkg.com/aws-sign2/-/aws-sign2-0.7.0.tgz#b46e890934a9591f2d2f6f86d7e6a9f1b3fe76a8" + integrity sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg= + +aws4@^1.8.0: + version "1.11.0" + resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.11.0.tgz#d61f46d83b2519250e2784daf5b09479a8b41c59" + integrity sha512-xh1Rl34h6Fi1DC2WWKfxUTVqRsNnr6LsKz2+hfwDxQJWmrx8+c7ylaqBMcHfl1U1r2dsifOvKX3LQuLNZ+XSvA== + +balanced-match@^1.0.0: + version "1.0.2" + resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" + integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== + +bcrypt-pbkdf@^1.0.0: + version "1.0.2" + resolved "https://registry.yarnpkg.com/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz#a4301d389b6a43f9b67ff3ca11a3f6637e360e9e" + integrity sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4= + dependencies: + tweetnacl "^0.14.3" + +blob-util@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/blob-util/-/blob-util-2.0.2.tgz#3b4e3c281111bb7f11128518006cdc60b403a1eb" + integrity sha512-T7JQa+zsXXEa6/8ZhHcQEW1UFfVM49Ts65uBkFL6fz2QmrElqmbajIDJvuA0tEhRe5eIjpV9ZF+0RfZR9voJFQ== + +bluebird@3.7.2: + version "3.7.2" + resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.7.2.tgz#9f229c15be272454ffa973ace0dbee79a1b0c36f" + integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg== + +brace-expansion@^1.1.7: + version "1.1.11" + resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" + integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA== + dependencies: + balanced-match "^1.0.0" + concat-map "0.0.1" + +buffer-crc32@~0.2.3: + version "0.2.13" + resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242" + integrity sha1-DTM+PwDqxQqhRUq9MO+MKl2ackI= + +cachedir@^2.3.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/cachedir/-/cachedir-2.3.0.tgz#0c75892a052198f0b21c7c1804d8331edfcae0e8" + integrity sha512-A+Fezp4zxnit6FanDmv9EqXNAi3vt9DWp51/71UEhXukb7QUuvtv9344h91dyAxuTLoSYJFU299qzR3tzwPAhw== + +caseless@~0.12.0: + version "0.12.0" + resolved "https://registry.yarnpkg.com/caseless/-/caseless-0.12.0.tgz#1b681c21ff84033c826543090689420d187151dc" + integrity sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw= + +chalk@^4.1.0: + version "4.1.2" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.2.tgz#aac4e2b7734a740867aeb16bf02aad556a1e7a01" + integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA== + dependencies: + ansi-styles "^4.1.0" + supports-color "^7.1.0" + +check-more-types@^2.24.0: + version "2.24.0" + resolved "https://registry.yarnpkg.com/check-more-types/-/check-more-types-2.24.0.tgz#1420ffb10fd444dcfc79b43891bbfffd32a84600" + integrity sha1-FCD/sQ/URNz8ebQ4kbv//TKoRgA= + +ci-info@^3.2.0: + version "3.3.0" + resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.3.0.tgz#b4ed1fb6818dea4803a55c623041f9165d2066b2" + integrity sha512-riT/3vI5YpVH6/qomlDnJow6TBee2PBKSEpx3O32EGPYbWGIRsIlGRms3Sm74wYE1JMo8RnO04Hb12+v1J5ICw== + +clean-stack@^2.0.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-2.2.0.tgz#ee8472dbb129e727b31e8a10a427dee9dfe4008b" + integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A== + +cli-cursor@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/cli-cursor/-/cli-cursor-3.1.0.tgz#264305a7ae490d1d03bf0c9ba7c925d1753af307" + integrity sha512-I/zHAwsKf9FqGoXM4WWRACob9+SNukZTd94DWF57E4toouRulbCxcUh6RKUEOQlYTHJnzkPMySvPNaaSLNfLZw== + dependencies: + restore-cursor "^3.1.0" + +cli-table3@~0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/cli-table3/-/cli-table3-0.6.0.tgz#b7b1bc65ca8e7b5cef9124e13dc2b21e2ce4faee" + integrity sha512-gnB85c3MGC7Nm9I/FkiasNBOKjOiO1RNuXXarQms37q4QMpWdlbBgD/VnOStA2faG1dpXMv31RFApjX1/QdgWQ== + dependencies: + object-assign "^4.1.0" + string-width "^4.2.0" + optionalDependencies: + colors "^1.1.2" + +cli-truncate@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/cli-truncate/-/cli-truncate-2.1.0.tgz#c39e28bf05edcde5be3b98992a22deed5a2b93c7" + integrity sha512-n8fOixwDD6b/ObinzTrp1ZKFzbgvKZvuz/TvejnLn1aQfC6r52XEx85FmuC+3HI+JM7coBRXUvNqEU2PHVrHpg== + dependencies: + slice-ansi "^3.0.0" + string-width "^4.2.0" + +color-convert@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3" + integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ== + dependencies: + color-name "~1.1.4" + +color-name@~1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" + integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== + +colorette@^2.0.16: + version "2.0.16" + resolved "https://registry.yarnpkg.com/colorette/-/colorette-2.0.16.tgz#713b9af84fdb000139f04546bd4a93f62a5085da" + integrity sha512-hUewv7oMjCp+wkBv5Rm0v87eJhq4woh5rSR+42YSQJKecCqgIqNkZ6lAlQms/BwHPJA5NKMRlpxPRv0n8HQW6g== + +colors@^1.1.2: + version "1.4.0" + resolved "https://registry.yarnpkg.com/colors/-/colors-1.4.0.tgz#c50491479d4c1bdaed2c9ced32cf7c7dc2360f78" + integrity sha512-a+UqTh4kgZg/SlGvfbzDHpgRu7AAQOmmqRHJnxhRZICKFUT91brVhNNt58CMWU9PsBbv3PDCZUHbVxuDiH2mtA== + +combined-stream@^1.0.6, combined-stream@~1.0.6: + version "1.0.8" + resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f" + integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg== + dependencies: + delayed-stream "~1.0.0" + +commander@^5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/commander/-/commander-5.1.0.tgz#46abbd1652f8e059bddaef99bbdcb2ad9cf179ae" + integrity sha512-P0CysNDQ7rtVw4QIQtm+MRxV66vKFSvlsQvGYXZWR3qFU0jlMKHZZZgw8e+8DSah4UDKMqnknRDQz+xuQXQ/Zg== + +common-tags@^1.8.0: + version "1.8.2" + resolved "https://registry.yarnpkg.com/common-tags/-/common-tags-1.8.2.tgz#94ebb3c076d26032745fd54face7f688ef5ac9c6" + integrity sha512-gk/Z852D2Wtb//0I+kRFNKKE9dIIVirjoqPoA1wJU+XePVXZfGeBpk45+A1rKO4Q43prqWBNY/MiIeRLbPWUaA== + +concat-map@0.0.1: + version "0.0.1" + resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" + integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s= + +core-util-is@1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.2.tgz#b5fd54220aa2bc5ab57aab7140c940754503c1a7" + integrity sha1-tf1UIgqivFq1eqtxQMlAdUUDwac= + +cross-spawn@^7.0.0: + version "7.0.3" + resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6" + integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== + dependencies: + path-key "^3.1.0" + shebang-command "^2.0.0" + which "^2.0.1" + +cypress@^9.1.0: + version "9.1.0" + resolved "https://registry.yarnpkg.com/cypress/-/cypress-9.1.0.tgz#5d23c1b363b7d4853009c74a422a083a8ad2601c" + integrity sha512-fyXcCN51vixkPrz/vO/Qy6WL3hKYJzCQFeWofOpGOFewVVXrGfmfSOGFntXpzWBXsIwPn3wzW0HOFw51jZajNQ== + dependencies: + "@cypress/request" "^2.88.7" + "@cypress/xvfb" "^1.2.4" + "@types/node" "^14.14.31" + "@types/sinonjs__fake-timers" "^6.0.2" + "@types/sizzle" "^2.3.2" + arch "^2.2.0" + blob-util "^2.0.2" + bluebird "3.7.2" + cachedir "^2.3.0" + chalk "^4.1.0" + check-more-types "^2.24.0" + cli-cursor "^3.1.0" + cli-table3 "~0.6.0" + commander "^5.1.0" + common-tags "^1.8.0" + dayjs "^1.10.4" + debug "^4.3.2" + enquirer "^2.3.6" + eventemitter2 "^6.4.3" + execa "4.1.0" + executable "^4.1.1" + extract-zip "2.0.1" + figures "^3.2.0" + fs-extra "^9.1.0" + getos "^3.2.1" + is-ci "^3.0.0" + is-installed-globally "~0.4.0" + lazy-ass "^1.6.0" + listr2 "^3.8.3" + lodash "^4.17.21" + log-symbols "^4.0.0" + minimist "^1.2.5" + ospath "^1.2.2" + pretty-bytes "^5.6.0" + proxy-from-env "1.0.0" + request-progress "^3.0.0" + supports-color "^8.1.1" + tmp "~0.2.1" + untildify "^4.0.0" + url "^0.11.0" + yauzl "^2.10.0" + +dashdash@^1.12.0: + version "1.14.1" + resolved "https://registry.yarnpkg.com/dashdash/-/dashdash-1.14.1.tgz#853cfa0f7cbe2fed5de20326b8dd581035f6e2f0" + integrity sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA= + dependencies: + assert-plus "^1.0.0" + +dayjs@^1.10.4: + version "1.10.7" + resolved "https://registry.yarnpkg.com/dayjs/-/dayjs-1.10.7.tgz#2cf5f91add28116748440866a0a1d26f3a6ce468" + integrity sha512-P6twpd70BcPK34K26uJ1KT3wlhpuOAPoMwJzpsIWUxHZ7wpmbdZL/hQqBDfz7hGurYSa5PhzdhDHtt319hL3ig== + +debug@^3.1.0: + version "3.2.7" + resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a" + integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ== + dependencies: + ms "^2.1.1" + +debug@^4.1.1, debug@^4.3.2: + version "4.3.3" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.3.tgz#04266e0b70a98d4462e6e288e38259213332b664" + integrity sha512-/zxw5+vh1Tfv+4Qn7a5nsbcJKPaSvCDhojn6FEl9vupwK2VCSDtEiEtqr8DFtzYFOdz63LBkxec7DYuc2jon6Q== + dependencies: + ms "2.1.2" + +delayed-stream@~1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619" + integrity sha1-3zrhmayt+31ECqrgsp4icrJOxhk= + +ecc-jsbn@~0.1.1: + version "0.1.2" + resolved "https://registry.yarnpkg.com/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz#3a83a904e54353287874c564b7549386849a98c9" + integrity sha1-OoOpBOVDUyh4dMVkt1SThoSamMk= + dependencies: + jsbn "~0.1.0" + safer-buffer "^2.1.0" + +emoji-regex@^8.0.0: + version "8.0.0" + resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37" + integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A== + +end-of-stream@^1.1.0: + version "1.4.4" + resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0" + integrity sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q== + dependencies: + once "^1.4.0" + +enquirer@^2.3.6: + version "2.3.6" + resolved "https://registry.yarnpkg.com/enquirer/-/enquirer-2.3.6.tgz#2a7fe5dd634a1e4125a975ec994ff5456dc3734d" + integrity sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg== + dependencies: + ansi-colors "^4.1.1" + +escape-string-regexp@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4" + integrity sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ= + +eventemitter2@^6.4.3: + version "6.4.5" + resolved "https://registry.yarnpkg.com/eventemitter2/-/eventemitter2-6.4.5.tgz#97380f758ae24ac15df8353e0cc27f8b95644655" + integrity sha512-bXE7Dyc1i6oQElDG0jMRZJrRAn9QR2xyyFGmBdZleNmyQX0FqGYmhZIrIrpPfm/w//LTo4tVQGOGQcGCb5q9uw== + +execa@4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/execa/-/execa-4.1.0.tgz#4e5491ad1572f2f17a77d388c6c857135b22847a" + integrity sha512-j5W0//W7f8UxAn8hXVnwG8tLwdiUy4FJLcSupCg6maBYZDpyBvTApK7KyuI4bKj8KOh1r2YH+6ucuYtJv1bTZA== + dependencies: + cross-spawn "^7.0.0" + get-stream "^5.0.0" + human-signals "^1.1.1" + is-stream "^2.0.0" + merge-stream "^2.0.0" + npm-run-path "^4.0.0" + onetime "^5.1.0" + signal-exit "^3.0.2" + strip-final-newline "^2.0.0" + +executable@^4.1.1: + version "4.1.1" + resolved "https://registry.yarnpkg.com/executable/-/executable-4.1.1.tgz#41532bff361d3e57af4d763b70582db18f5d133c" + integrity sha512-8iA79xD3uAch729dUG8xaaBBFGaEa0wdD2VkYLFHwlqosEj/jT66AzcreRDSgV7ehnNLBW2WR5jIXwGKjVdTLg== + dependencies: + pify "^2.2.0" + +extend@~3.0.2: + version "3.0.2" + resolved "https://registry.yarnpkg.com/extend/-/extend-3.0.2.tgz#f8b1136b4071fbd8eb140aff858b1019ec2915fa" + integrity sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g== + +extract-zip@2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-2.0.1.tgz#663dca56fe46df890d5f131ef4a06d22bb8ba13a" + integrity sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg== + dependencies: + debug "^4.1.1" + get-stream "^5.1.0" + yauzl "^2.10.0" + optionalDependencies: + "@types/yauzl" "^2.9.1" + +extsprintf@1.3.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.3.0.tgz#96918440e3041a7a414f8c52e3c574eb3c3e1e05" + integrity sha1-lpGEQOMEGnpBT4xS48V06zw+HgU= + +extsprintf@^1.2.0: + version "1.4.1" + resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.4.1.tgz#8d172c064867f235c0c84a596806d279bf4bcc07" + integrity sha512-Wrk35e8ydCKDj/ArClo1VrPVmN8zph5V4AtHwIuHhvMXsKf73UT3BOD+azBIW+3wOJ4FhEH7zyaJCFvChjYvMA== + +fd-slicer@~1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e" + integrity sha1-JcfInLH5B3+IkbvmHY85Dq4lbx4= + dependencies: + pend "~1.2.0" + +figures@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/figures/-/figures-3.2.0.tgz#625c18bd293c604dc4a8ddb2febf0c88341746af" + integrity sha512-yaduQFRKLXYOGgEn6AZau90j3ggSOyiqXU0F9JZfeXYhNa+Jk4X+s45A2zg5jns87GAFa34BBm2kXw4XpNcbdg== + dependencies: + escape-string-regexp "^1.0.5" + +forever-agent@~0.6.1: + version "0.6.1" + resolved "https://registry.yarnpkg.com/forever-agent/-/forever-agent-0.6.1.tgz#fbc71f0c41adeb37f96c577ad1ed42d8fdacca91" + integrity sha1-+8cfDEGt6zf5bFd60e1C2P2sypE= + +form-data@~2.3.2: + version "2.3.3" + resolved "https://registry.yarnpkg.com/form-data/-/form-data-2.3.3.tgz#dcce52c05f644f298c6a7ab936bd724ceffbf3a6" + integrity sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ== + dependencies: + asynckit "^0.4.0" + combined-stream "^1.0.6" + mime-types "^2.1.12" + +fs-extra@^9.1.0: + version "9.1.0" + resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-9.1.0.tgz#5954460c764a8da2094ba3554bf839e6b9a7c86d" + integrity sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ== + dependencies: + at-least-node "^1.0.0" + graceful-fs "^4.2.0" + jsonfile "^6.0.1" + universalify "^2.0.0" + +fs.realpath@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f" + integrity sha1-FQStJSMVjKpA20onh8sBQRmU6k8= + +get-stream@^5.0.0, get-stream@^5.1.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-5.2.0.tgz#4966a1795ee5ace65e706c4b7beb71257d6e22d3" + integrity sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA== + dependencies: + pump "^3.0.0" + +getos@^3.2.1: + version "3.2.1" + resolved "https://registry.yarnpkg.com/getos/-/getos-3.2.1.tgz#0134d1f4e00eb46144c5a9c0ac4dc087cbb27dc5" + integrity sha512-U56CfOK17OKgTVqozZjUKNdkfEv6jk5WISBJ8SHoagjE6L69zOwl3Z+O8myjY9MEW3i2HPWQBt/LTbCgcC973Q== + dependencies: + async "^3.2.0" + +getpass@^0.1.1: + version "0.1.7" + resolved "https://registry.yarnpkg.com/getpass/-/getpass-0.1.7.tgz#5eff8e3e684d569ae4cb2b1282604e8ba62149fa" + integrity sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo= + dependencies: + assert-plus "^1.0.0" + +glob@^7.1.3: + version "7.2.0" + resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.0.tgz#d15535af7732e02e948f4c41628bd910293f6023" + integrity sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q== + dependencies: + fs.realpath "^1.0.0" + inflight "^1.0.4" + inherits "2" + minimatch "^3.0.4" + once "^1.3.0" + path-is-absolute "^1.0.0" + +global-dirs@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/global-dirs/-/global-dirs-3.0.0.tgz#70a76fe84ea315ab37b1f5576cbde7d48ef72686" + integrity sha512-v8ho2DS5RiCjftj1nD9NmnfaOzTdud7RRnVd9kFNOjqZbISlx5DQ+OrTkywgd0dIt7oFCvKetZSHoHcP3sDdiA== + dependencies: + ini "2.0.0" + +graceful-fs@^4.1.6, graceful-fs@^4.2.0: + version "4.2.8" + resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.8.tgz#e412b8d33f5e006593cbd3cee6df9f2cebbe802a" + integrity sha512-qkIilPUYcNhJpd33n0GBXTB1MMPp14TxEsEs0pTrsSVucApsYzW5V+Q8Qxhik6KU3evy+qkAAowTByymK0avdg== + +has-flag@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b" + integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ== + +http-signature@~1.3.6: + version "1.3.6" + resolved "https://registry.yarnpkg.com/http-signature/-/http-signature-1.3.6.tgz#cb6fbfdf86d1c974f343be94e87f7fc128662cf9" + integrity sha512-3adrsD6zqo4GsTqtO7FyrejHNv+NgiIfAfv68+jVlFmSr9OGy7zrxONceFRLKvnnZA5jbxQBX1u9PpB6Wi32Gw== + dependencies: + assert-plus "^1.0.0" + jsprim "^2.0.2" + sshpk "^1.14.1" + +human-signals@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-1.1.1.tgz#c5b1cd14f50aeae09ab6c59fe63ba3395fe4dfa3" + integrity sha512-SEQu7vl8KjNL2eoGBLF3+wAjpsNfA9XMlXAYj/3EdaNfAlxKthD1xjEQfGOUhllCGGJVNY34bRr6lPINhNjyZw== + +indent-string@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251" + integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg== + +inflight@^1.0.4: + version "1.0.6" + resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9" + integrity sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk= + dependencies: + once "^1.3.0" + wrappy "1" + +inherits@2: + version "2.0.4" + resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" + integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== + +ini@2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/ini/-/ini-2.0.0.tgz#e5fd556ecdd5726be978fa1001862eacb0a94bc5" + integrity sha512-7PnF4oN3CvZF23ADhA5wRaYEQpJ8qygSkbtTXWBeXWXmEVRXK+1ITciHWwHhsjv1TmW0MgacIv6hEi5pX5NQdA== + +is-ci@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/is-ci/-/is-ci-3.0.1.tgz#db6ecbed1bd659c43dac0f45661e7674103d1867" + integrity sha512-ZYvCgrefwqoQ6yTyYUbQu64HsITZ3NfKX1lzaEYdkTDcfKzzCI/wthRRYKkdjHKFVgNiXKAKm65Zo1pk2as/QQ== + dependencies: + ci-info "^3.2.0" + +is-fullwidth-code-point@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz#f116f8064fe90b3f7844a38997c0b75051269f1d" + integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg== + +is-installed-globally@~0.4.0: + version "0.4.0" + resolved "https://registry.yarnpkg.com/is-installed-globally/-/is-installed-globally-0.4.0.tgz#9a0fd407949c30f86eb6959ef1b7994ed0b7b520" + integrity sha512-iwGqO3J21aaSkC7jWnHP/difazwS7SFeIqxv6wEtLU8Y5KlzFTjyqcSIT0d8s4+dDhKytsk9PJZ2BkS5eZwQRQ== + dependencies: + global-dirs "^3.0.0" + is-path-inside "^3.0.2" + +is-path-inside@^3.0.2: + version "3.0.3" + resolved "https://registry.yarnpkg.com/is-path-inside/-/is-path-inside-3.0.3.tgz#d231362e53a07ff2b0e0ea7fed049161ffd16283" + integrity sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ== + +is-stream@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-2.0.1.tgz#fac1e3d53b97ad5a9d0ae9cef2389f5810a5c077" + integrity sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg== + +is-typedarray@~1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/is-typedarray/-/is-typedarray-1.0.0.tgz#e479c80858df0c1b11ddda6940f96011fcda4a9a" + integrity sha1-5HnICFjfDBsR3dppQPlgEfzaSpo= + +is-unicode-supported@^0.1.0: + version "0.1.0" + resolved "https://registry.yarnpkg.com/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz#3f26c76a809593b52bfa2ecb5710ed2779b522a7" + integrity sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw== + +isexe@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10" + integrity sha1-6PvzdNxVb/iUehDcsFctYz8s+hA= + +isstream@~0.1.2: + version "0.1.2" + resolved "https://registry.yarnpkg.com/isstream/-/isstream-0.1.2.tgz#47e63f7af55afa6f92e1500e690eb8b8529c099a" + integrity sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo= + +jsbn@~0.1.0: + version "0.1.1" + resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-0.1.1.tgz#a5e654c2e5a2deb5f201d96cefbca80c0ef2f513" + integrity sha1-peZUwuWi3rXyAdls77yoDA7y9RM= + +json-schema@0.4.0: + version "0.4.0" + resolved "https://registry.yarnpkg.com/json-schema/-/json-schema-0.4.0.tgz#f7de4cf6efab838ebaeb3236474cbba5a1930ab5" + integrity sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA== + +json-stringify-safe@~5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb" + integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus= + +jsonfile@^6.0.1: + version "6.1.0" + resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-6.1.0.tgz#bc55b2634793c679ec6403094eb13698a6ec0aae" + integrity sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ== + dependencies: + universalify "^2.0.0" + optionalDependencies: + graceful-fs "^4.1.6" + +jsprim@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/jsprim/-/jsprim-2.0.2.tgz#77ca23dbcd4135cd364800d22ff82c2185803d4d" + integrity sha512-gqXddjPqQ6G40VdnI6T6yObEC+pDNvyP95wdQhkWkg7crHH3km5qP1FsOXEkzEQwnz6gz5qGTn1c2Y52wP3OyQ== + dependencies: + assert-plus "1.0.0" + extsprintf "1.3.0" + json-schema "0.4.0" + verror "1.10.0" + +lazy-ass@^1.6.0: + version "1.6.0" + resolved "https://registry.yarnpkg.com/lazy-ass/-/lazy-ass-1.6.0.tgz#7999655e8646c17f089fdd187d150d3324d54513" + integrity sha1-eZllXoZGwX8In90YfRUNMyTVRRM= + +listr2@^3.8.3: + version "3.13.5" + resolved "https://registry.yarnpkg.com/listr2/-/listr2-3.13.5.tgz#105a813f2eb2329c4aae27373a281d610ee4985f" + integrity sha512-3n8heFQDSk+NcwBn3CgxEibZGaRzx+pC64n3YjpMD1qguV4nWus3Al+Oo3KooqFKTQEJ1v7MmnbnyyNspgx3NA== + dependencies: + cli-truncate "^2.1.0" + colorette "^2.0.16" + log-update "^4.0.0" + p-map "^4.0.0" + rfdc "^1.3.0" + rxjs "^7.4.0" + through "^2.3.8" + wrap-ansi "^7.0.0" + +lodash.once@^4.1.1: + version "4.1.1" + resolved "https://registry.yarnpkg.com/lodash.once/-/lodash.once-4.1.1.tgz#0dd3971213c7c56df880977d504c88fb471a97ac" + integrity sha1-DdOXEhPHxW34gJd9UEyI+0cal6w= + +lodash@^4.17.21: + version "4.17.21" + resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" + integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== + +log-symbols@^4.0.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/log-symbols/-/log-symbols-4.1.0.tgz#3fbdbb95b4683ac9fc785111e792e558d4abd503" + integrity sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg== + dependencies: + chalk "^4.1.0" + is-unicode-supported "^0.1.0" + +log-update@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/log-update/-/log-update-4.0.0.tgz#589ecd352471f2a1c0c570287543a64dfd20e0a1" + integrity sha512-9fkkDevMefjg0mmzWFBW8YkFP91OrizzkW3diF7CpG+S2EYdy4+TVfGwz1zeF8x7hCx1ovSPTOE9Ngib74qqUg== + dependencies: + ansi-escapes "^4.3.0" + cli-cursor "^3.1.0" + slice-ansi "^4.0.0" + wrap-ansi "^6.2.0" + +merge-stream@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/merge-stream/-/merge-stream-2.0.0.tgz#52823629a14dd00c9770fb6ad47dc6310f2c1f60" + integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w== + +mime-db@1.51.0: + version "1.51.0" + resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.51.0.tgz#d9ff62451859b18342d960850dc3cfb77e63fb0c" + integrity sha512-5y8A56jg7XVQx2mbv1lu49NR4dokRnhZYTtL+KGfaa27uq4pSTXkwQkFJl4pkRMyNFz/EtYDSkiiEHx3F7UN6g== + +mime-types@^2.1.12, mime-types@~2.1.19: + version "2.1.34" + resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.34.tgz#5a712f9ec1503511a945803640fafe09d3793c24" + integrity sha512-6cP692WwGIs9XXdOO4++N+7qjqv0rqxxVvJ3VHPh/Sc9mVZcQP+ZGhkKiTvWMQRr2tbHkJP/Yn7Y0npb3ZBs4A== + dependencies: + mime-db "1.51.0" + +mimic-fn@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-2.1.0.tgz#7ed2c2ccccaf84d3ffcb7a69b57711fc2083401b" + integrity sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg== + +minimatch@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083" + integrity sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA== + dependencies: + brace-expansion "^1.1.7" + +minimist@^1.2.5: + version "1.2.5" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602" + integrity sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw== + +ms@2.1.2: + version "2.1.2" + resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" + integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== + +ms@^2.1.1: + version "2.1.3" + resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2" + integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA== + +npm-run-path@^4.0.0: + version "4.0.1" + resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-4.0.1.tgz#b7ecd1e5ed53da8e37a55e1c2269e0b97ed748ea" + integrity sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw== + dependencies: + path-key "^3.0.0" + +object-assign@^4.1.0: + version "4.1.1" + resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863" + integrity sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM= + +once@^1.3.0, once@^1.3.1, once@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1" + integrity sha1-WDsap3WWHUsROsF9nFC6753Xa9E= + dependencies: + wrappy "1" + +onetime@^5.1.0: + version "5.1.2" + resolved "https://registry.yarnpkg.com/onetime/-/onetime-5.1.2.tgz#d0e96ebb56b07476df1dd9c4806e5237985ca45e" + integrity sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg== + dependencies: + mimic-fn "^2.1.0" + +ospath@^1.2.2: + version "1.2.2" + resolved "https://registry.yarnpkg.com/ospath/-/ospath-1.2.2.tgz#1276639774a3f8ef2572f7fe4280e0ea4550c07b" + integrity sha1-EnZjl3Sj+O8lcvf+QoDg6kVQwHs= + +p-map@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/p-map/-/p-map-4.0.0.tgz#bb2f95a5eda2ec168ec9274e06a747c3e2904d2b" + integrity sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ== + dependencies: + aggregate-error "^3.0.0" + +path-is-absolute@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f" + integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18= + +path-key@^3.0.0, path-key@^3.1.0: + version "3.1.1" + resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375" + integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q== + +pend@~1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/pend/-/pend-1.2.0.tgz#7a57eb550a6783f9115331fcf4663d5c8e007a50" + integrity sha1-elfrVQpng/kRUzH89GY9XI4AelA= + +performance-now@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b" + integrity sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns= + +pify@^2.2.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/pify/-/pify-2.3.0.tgz#ed141a6ac043a849ea588498e7dca8b15330e90c" + integrity sha1-7RQaasBDqEnqWISY59yosVMw6Qw= + +pretty-bytes@^5.6.0: + version "5.6.0" + resolved "https://registry.yarnpkg.com/pretty-bytes/-/pretty-bytes-5.6.0.tgz#356256f643804773c82f64723fe78c92c62beaeb" + integrity sha512-FFw039TmrBqFK8ma/7OL3sDz/VytdtJr044/QUJtH0wK9lb9jLq9tJyIxUwtQJHwar2BqtiA4iCWSwo9JLkzFg== + +proxy-from-env@1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.0.0.tgz#33c50398f70ea7eb96d21f7b817630a55791c7ee" + integrity sha1-M8UDmPcOp+uW0h97gXYwpVeRx+4= + +psl@^1.1.28: + version "1.8.0" + resolved "https://registry.yarnpkg.com/psl/-/psl-1.8.0.tgz#9326f8bcfb013adcc005fdff056acce020e51c24" + integrity sha512-RIdOzyoavK+hA18OGGWDqUTsCLhtA7IcZ/6NCs4fFJaHBDab+pDDmDIByWFRQJq2Cd7r1OoQxBGKOaztq+hjIQ== + +pump@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/pump/-/pump-3.0.0.tgz#b4a2116815bde2f4e1ea602354e8c75565107a64" + integrity sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww== + dependencies: + end-of-stream "^1.1.0" + once "^1.3.1" + +punycode@1.3.2: + version "1.3.2" + resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.3.2.tgz#9653a036fb7c1ee42342f2325cceefea3926c48d" + integrity sha1-llOgNvt8HuQjQvIyXM7v6jkmxI0= + +punycode@^2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec" + integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A== + +qs@~6.5.2: + version "6.5.2" + resolved "https://registry.yarnpkg.com/qs/-/qs-6.5.2.tgz#cb3ae806e8740444584ef154ce8ee98d403f3e36" + integrity sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA== + +querystring@0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.0.tgz#b209849203bb25df820da756e747005878521620" + integrity sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA= + +request-progress@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/request-progress/-/request-progress-3.0.0.tgz#4ca754081c7fec63f505e4faa825aa06cd669dbe" + integrity sha1-TKdUCBx/7GP1BeT6qCWqBs1mnb4= + dependencies: + throttleit "^1.0.0" + +restore-cursor@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/restore-cursor/-/restore-cursor-3.1.0.tgz#39f67c54b3a7a58cea5236d95cf0034239631f7e" + integrity sha512-l+sSefzHpj5qimhFSE5a8nufZYAM3sBSVMAPtYkmC+4EH2anSGaEMXSD0izRQbu9nfyQ9y5JrVmp7E8oZrUjvA== + dependencies: + onetime "^5.1.0" + signal-exit "^3.0.2" + +rfdc@^1.3.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/rfdc/-/rfdc-1.3.0.tgz#d0b7c441ab2720d05dc4cf26e01c89631d9da08b" + integrity sha512-V2hovdzFbOi77/WajaSMXk2OLm+xNIeQdMMuB7icj7bk6zi2F8GGAxigcnDFpJHbNyNcgyJDiP+8nOrY5cZGrA== + +rimraf@^3.0.0: + version "3.0.2" + resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a" + integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA== + dependencies: + glob "^7.1.3" + +rxjs@^7.4.0: + version "7.4.0" + resolved "https://registry.yarnpkg.com/rxjs/-/rxjs-7.4.0.tgz#a12a44d7eebf016f5ff2441b87f28c9a51cebc68" + integrity sha512-7SQDi7xeTMCJpqViXh8gL/lebcwlp3d831F05+9B44A4B0WfsEwUQHR64gsH1kvJ+Ep/J9K2+n1hVl1CsGN23w== + dependencies: + tslib "~2.1.0" + +safe-buffer@^5.0.1, safe-buffer@^5.1.2: + version "5.2.1" + resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6" + integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ== + +safer-buffer@^2.0.2, safer-buffer@^2.1.0, safer-buffer@~2.1.0: + version "2.1.2" + resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" + integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== + +shebang-command@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea" + integrity sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA== + dependencies: + shebang-regex "^3.0.0" + +shebang-regex@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172" + integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A== + +signal-exit@^3.0.2: + version "3.0.6" + resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.6.tgz#24e630c4b0f03fea446a2bd299e62b4a6ca8d0af" + integrity sha512-sDl4qMFpijcGw22U5w63KmD3cZJfBuFlVNbVMKje2keoKML7X2UzWbc4XrmEbDwg0NXJc3yv4/ox7b+JWb57kQ== + +slice-ansi@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-3.0.0.tgz#31ddc10930a1b7e0b67b08c96c2f49b77a789787" + integrity sha512-pSyv7bSTC7ig9Dcgbw9AuRNUb5k5V6oDudjZoMBSr13qpLBG7tB+zgCkARjq7xIUgdz5P1Qe8u+rSGdouOOIyQ== + dependencies: + ansi-styles "^4.0.0" + astral-regex "^2.0.0" + is-fullwidth-code-point "^3.0.0" + +slice-ansi@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-4.0.0.tgz#500e8dd0fd55b05815086255b3195adf2a45fe6b" + integrity sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ== + dependencies: + ansi-styles "^4.0.0" + astral-regex "^2.0.0" + is-fullwidth-code-point "^3.0.0" + +sshpk@^1.14.1: + version "1.16.1" + resolved "https://registry.yarnpkg.com/sshpk/-/sshpk-1.16.1.tgz#fb661c0bef29b39db40769ee39fa70093d6f6877" + integrity sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg== + dependencies: + asn1 "~0.2.3" + assert-plus "^1.0.0" + bcrypt-pbkdf "^1.0.0" + dashdash "^1.12.0" + ecc-jsbn "~0.1.1" + getpass "^0.1.1" + jsbn "~0.1.0" + safer-buffer "^2.0.2" + tweetnacl "~0.14.0" + +string-width@^4.1.0, string-width@^4.2.0: + version "4.2.3" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" + integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== + dependencies: + emoji-regex "^8.0.0" + is-fullwidth-code-point "^3.0.0" + strip-ansi "^6.0.1" + +strip-ansi@^6.0.0, strip-ansi@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" + integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== + dependencies: + ansi-regex "^5.0.1" + +strip-final-newline@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad" + integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA== + +supports-color@^7.1.0: + version "7.2.0" + resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da" + integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw== + dependencies: + has-flag "^4.0.0" + +supports-color@^8.1.1: + version "8.1.1" + resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-8.1.1.tgz#cd6fc17e28500cff56c1b86c0a7fd4a54a73005c" + integrity sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q== + dependencies: + has-flag "^4.0.0" + +throttleit@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/throttleit/-/throttleit-1.0.0.tgz#9e785836daf46743145a5984b6268d828528ac6c" + integrity sha1-nnhYNtr0Z0MUWlmEtiaNgoUorGw= + +through@^2.3.8: + version "2.3.8" + resolved "https://registry.yarnpkg.com/through/-/through-2.3.8.tgz#0dd4c9ffaabc357960b1b724115d7e0e86a2e1f5" + integrity sha1-DdTJ/6q8NXlgsbckEV1+Doai4fU= + +tmp@~0.2.1: + version "0.2.1" + resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.2.1.tgz#8457fc3037dcf4719c251367a1af6500ee1ccf14" + integrity sha512-76SUhtfqR2Ijn+xllcI5P1oyannHNHByD80W1q447gU3mp9G9PSpGdWmjUOHRDPiHYacIk66W7ubDTuPF3BEtQ== + dependencies: + rimraf "^3.0.0" + +tough-cookie@~2.5.0: + version "2.5.0" + resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.5.0.tgz#cd9fb2a0aa1d5a12b473bd9fb96fa3dcff65ade2" + integrity sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g== + dependencies: + psl "^1.1.28" + punycode "^2.1.1" + +tslib@~2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.1.0.tgz#da60860f1c2ecaa5703ab7d39bc05b6bf988b97a" + integrity sha512-hcVC3wYEziELGGmEEXue7D75zbwIIVUMWAVbHItGPx0ziyXxrOMQx4rQEVEV45Ut/1IotuEvwqPopzIOkDMf0A== + +tunnel-agent@^0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd" + integrity sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0= + dependencies: + safe-buffer "^5.0.1" + +tweetnacl@^0.14.3, tweetnacl@~0.14.0: + version "0.14.5" + resolved "https://registry.yarnpkg.com/tweetnacl/-/tweetnacl-0.14.5.tgz#5ae68177f192d4456269d108afa93ff8743f4f64" + integrity sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q= + +type-fest@^0.21.3: + version "0.21.3" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.21.3.tgz#d260a24b0198436e133fa26a524a6d65fa3b2e37" + integrity sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w== + +universalify@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/universalify/-/universalify-2.0.0.tgz#75a4984efedc4b08975c5aeb73f530d02df25717" + integrity sha512-hAZsKq7Yy11Zu1DE0OzWjw7nnLZmJZYTDZZyEFHZdUhV8FkH5MCfoU1XMaxXovpyW5nq5scPqq0ZDP9Zyl04oQ== + +untildify@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/untildify/-/untildify-4.0.0.tgz#2bc947b953652487e4600949fb091e3ae8cd919b" + integrity sha512-KK8xQ1mkzZeg9inewmFVDNkg3l5LUhoq9kN6iWYB/CC9YMG8HA+c1Q8HwDe6dEX7kErrEVNVBO3fWsVq5iDgtw== + +url@^0.11.0: + version "0.11.0" + resolved "https://registry.yarnpkg.com/url/-/url-0.11.0.tgz#3838e97cfc60521eb73c525a8e55bfdd9e2e28f1" + integrity sha1-ODjpfPxgUh63PFJajlW/3Z4uKPE= + dependencies: + punycode "1.3.2" + querystring "0.2.0" + +uuid@^8.3.2: + version "8.3.2" + resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2" + integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg== + +verror@1.10.0: + version "1.10.0" + resolved "https://registry.yarnpkg.com/verror/-/verror-1.10.0.tgz#3a105ca17053af55d6e270c1f8288682e18da400" + integrity sha1-OhBcoXBTr1XW4nDB+CiGguGNpAA= + dependencies: + assert-plus "^1.0.0" + core-util-is "1.0.2" + extsprintf "^1.2.0" + +which@^2.0.1: + version "2.0.2" + resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1" + integrity sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA== + dependencies: + isexe "^2.0.0" + +wrap-ansi@^6.2.0: + version "6.2.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-6.2.0.tgz#e9393ba07102e6c91a3b221478f0257cd2856e53" + integrity sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + +wrap-ansi@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" + integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + +wrappy@1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f" + integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8= + +yauzl@^2.10.0: + version "2.10.0" + resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9" + integrity sha1-x+sXyT4RLLEIb6bY5R+wZnt5pfk= + dependencies: + buffer-crc32 "~0.2.3" + fd-slicer "~1.1.0" diff --git a/spark-lineage/README.md b/spark-lineage/README.md new file mode 100644 index 00000000000000..89ef07d45df564 --- /dev/null +++ b/spark-lineage/README.md @@ -0,0 +1,88 @@ +# Spark lineage emitter +The Spark lineage emitter is a java library that provides a Spark listener implementation "DatahubLineageEmitter". The DatahubLineageEmitter listens to events such application start/end, and SQLExecution start/end to create pipelines (i.e. DataJob) and tasks (i.e. DataFlow) in Datahub along with lineage. + +## Configuring Spark emitter +Listener configuration can be done using a config file or while creating a spark Session. + +### Config file for spark-submit +When running jobs using spark-submit, the listener is to be configured in the config file. + +``` +spark.master spark://spark-master:7077 + +#Configuring datahub spark listener jar +spark.jars.packages io.acryl:spark-lineage:0.0.1 +spark.extraListeners com.linkedin.datahub.lineage.spark.interceptor.DatahubLineageEmitter +spark.datahub.lineage.mcpEmitter.gmsUrl http://localhost:8080 +``` + +### Configuring with SparkSession Builder for notebooks +When running interactive jobs from a notebook, the listener can be configured while building the Spark Session. + +```python +spark = SparkSession.builder \ + .master("spark://spark-master:7077") \ + .appName("test-application") \ + .config("spark.jars.packages","io.acryl:spark-lineage:0.0.1") \ + .config("spark.extraListeners","com.linkedin.datahub.lineage.interceptor.spark.DatahubLineageEmitter") \ + .config("spark.datahub.lineage.mcpEmitter.gmsUrl", "http://localhost:8080") \ + .enableHiveSupport() \ + .getOrCreate() +``` + +## Model mapping +A pipeline is created per Spark . +A task is created per unique Spark query execution within an app. + +### Custom properties & relating to Spark UI +The following custom properties in pipelines and tasks relate to the Spark UI: +- appName and appId in a pipeline can be used to determine the Spark application +- description and SQLQueryId in a task can be used to determine the Query Execution within the application on the SQL tab of Spark UI + +Other custom properties of pipelines and tasks capture the start and end times of execution etc. +The query plan is captured in the *queryPlan* property of a task. + +## Release notes for v0.0.1 +In this version, basic dataset-level lineage is captured using the model mapping as mentioned earlier. + +### Spark versions supported +The primary version tested is Spark/Scala version 2.4.8/2_11. +We anticipate this to work well with other Spark 2.4.x versions and Scala 2_11. + +Support for other Spark versions is planned in the very near future. + +### Environments tested with +This initial release has been tested with the following environments: +- spark-submit of Python/Java applications to local and remote servers +- notebooks + +Note that testing for other environments such as Databricks and standalone applications is planned in near future. + +### Spark commands supported +Below is a list of Spark commands that are parsed currently: +- InsertIntoHadoopFsRelationCommand +- SaveIntoDataSourceCommand (jdbc) +- CreateHiveTableAsSelectCommand +- InsertIntoHiveTable + +Effectively, these support data sources/sinks corresponding to Hive, HDFS and JDBC. + +### Spark commands not yet supported +- View related commands +- Cache commands and implications on lineage +- RDD jobs + +### Important notes on usage + +- It is advisable to ensure appName is used appropriately to ensure you can trace lineage from a pipeline back to your source code. + +- If multiple apps with the same appName run concurrently, dataset-lineage will be captured correctly but the custom-properties e.g. app-id, SQLQueryId would be unreliable. We expect this to be quite rare. + +- If spark execution fails, then an empty pipeline would still get created, but it may not have any tasks. + +- For HDFS sources, the folder (name) is regarded as the dataset (name) to align with typical storage of parquet/csv formats. + +## Known limitations +- Only postgres supported for JDBC sources in this initial release. Support for other driver URL formats will be added in future. +- Behavior with cached datasets is not fully specified/defined in context of lineage. +- There is a possibility that very short-lived jobs that run within a few milliseconds may not be captured by the listener. This should not cause an issue for realistic Spark applications. diff --git a/spark-lineage/bin/.gitignore b/spark-lineage/bin/.gitignore new file mode 100644 index 00000000000000..7eed456bec8db3 --- /dev/null +++ b/spark-lineage/bin/.gitignore @@ -0,0 +1,2 @@ +/main/ +/test/ diff --git a/spark-lineage/build.gradle b/spark-lineage/build.gradle new file mode 100644 index 00000000000000..660969de23ab10 --- /dev/null +++ b/spark-lineage/build.gradle @@ -0,0 +1,138 @@ +apply plugin: 'java' +apply plugin: 'com.github.johnrengelman.shadow' +apply plugin: 'maven' +apply plugin: 'signing' + +dependencies { + + //Needed for tie breaking of guava version need for spark and wiremock + compile(externalDependency.hadoopMapreduceClient) { + force = true + } + + compile(externalDependency.hadoopCommon) { + force = true + } // required for org.apache.hadoop.util.StopWatch + + compile(externalDependency.commonsIo) { + force = true + } // required for org.apache.commons.io.Charsets that is used internally + + compileOnly externalDependency.lombok + annotationProcessor externalDependency.lombok + + implementation(project(':metadata-models')) { + exclude group: "org.antlr" + exclude group: "com.google.guava" // causes issues with Guava Stopwatch constructor + } + + implementation(externalDependency.sparkSql){ + exclude group: "org.apache.hadoop" + } + implementation(externalDependency.sparkHive){ + exclude group: "org.apache.hadoop" + } + + testImplementation(externalDependency.postgresql) + + testImplementation externalDependency.mockito + + testImplementation(externalDependency.wiremock){ + exclude group: "com.fasterxml.jackson.core" + } // older version to allow older guava + + testImplementation(externalDependency.testContainersPostgresql) // older version to allow older jackson +} + + + +shadowJar { + zip64=true + classifier='' + dependencies { + exclude(dependency("org.apache.hadoop::")) + exclude(dependency("org.apache.spark::")) + exclude(dependency(externalDependency.commonsIo)) + } +} + + + +test { + useJUnit() +} + +assemble { + dependsOn shadowJar +} + +task sourceJar(type: Jar) { + classifier 'sources' + from sourceSets.main.allJava +} + +task javadocJar(type: Jar, dependsOn: javadoc) { + classifier 'javadoc' + from javadoc.destinationDir +} + +artifacts { + archives shadowJar +} + +// uploadArchives { +// repositories { +// mavenDeployer { +// def ossrhUsername = System.getenv('RELEASE_USERNAME') +// def ossrhPassword = System.getenv('RELEASE_PASSWORD') +// beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) } + +// repository(url: "https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/") { +// authentication(userName: ossrhUsername, password: ossrhPassword) +// } + +// snapshotRepository(url: "https://s01.oss.sonatype.org/content/repositories/snapshots/") { +// authentication(userName: ossrhUsername, password: ossrhPassword) +// } + +// pom.project { +// //No need to specify name here. Name is always picked up from project name +// //name 'spark-lineage' +// packaging 'jar' +// // optionally artifactId can be defined here +// description 'Library to push data lineage from spark to datahub' +// url 'https://datahubproject.io' + +// scm { +// connection 'scm:git:git://github.com/linkedin/datahub.git' +// developerConnection 'scm:git:ssh://github.com:linkedin/datahub.git' +// url 'https://github.com/linkedin/datahub.git' +// } + +// licenses { +// license { +// name 'The Apache License, Version 2.0' +// url 'http://www.apache.org/licenses/LICENSE-2.0.txt' +// } +// } + +// developers { +// developer { +// id 'datahub' +// name 'datahub' +// +// } +// } +// } +// } +// } +// } + + +// signing { +// def signingKey = findProperty("signingKey") +// def signingPassword = findProperty("signingPassword") +// useInMemoryPgpKeys(signingKey, signingPassword) +// sign configurations.archives +// } + diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/consumer/impl/MCPEmitter.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/consumer/impl/MCPEmitter.java new file mode 100644 index 00000000000000..017d75c6078375 --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/consumer/impl/MCPEmitter.java @@ -0,0 +1,67 @@ +package com.linkedin.datahub.lineage.consumer.impl; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.spark.SparkConf; +import org.apache.spark.SparkEnv; + +import com.linkedin.datahub.lineage.spark.model.LineageConsumer; +import com.linkedin.datahub.lineage.spark.model.LineageEvent; +import com.linkedin.mxe.MetadataChangeProposal; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class MCPEmitter implements LineageConsumer { + + private static final String GMS_URL_KEY = "spark.datahub.lineage.mcpEmitter.gmsUrl"; + private static final String SENTINEL = "moot"; + + private ConcurrentHashMap singleton = new ConcurrentHashMap<>(); + + private void emit(List mcps) { + RESTEmitter emitter = emitter(); + if (emitter != null) { + mcps.forEach(mcp -> { + log.debug("Emitting \n" + mcp); + try { + emitter.emit(mcp); + } catch (IOException e) { + // log error, but don't impact thread + StringWriter s = new StringWriter(); + PrintWriter p = new PrintWriter(s); + e.printStackTrace(p); + log.error(s.toString()); + p.close(); + } + }); + } + } + + // TODO ideally the impl here should not be tied to Spark; the LineageConsumer + // API needs tweaking to include configs + private RESTEmitter emitter() { + singleton.computeIfAbsent(SENTINEL, x -> { + SparkConf conf = SparkEnv.get().conf(); + if (conf.contains(GMS_URL_KEY)) { + String gmsUrl = conf.get(GMS_URL_KEY); + log.debug("REST emitter configured with GMS url " + gmsUrl); + return RESTEmitter.create(gmsUrl); + } + + log.error("GMS URL not configured."); + return null; + }); + + return singleton.get(SENTINEL); + } + + @Override + public void accept(LineageEvent evt) { + emit(evt.toMcps()); + } +} diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/consumer/impl/RESTEmitter.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/consumer/impl/RESTEmitter.java new file mode 100644 index 00000000000000..6fd3ffee83e3b8 --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/consumer/impl/RESTEmitter.java @@ -0,0 +1,85 @@ +package com.linkedin.datahub.lineage.consumer.impl; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.HashMap; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.data.template.JacksonDataTemplateCodec; +import com.linkedin.mxe.MetadataChangeProposal; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@RequiredArgsConstructor +public class RESTEmitter { + + private static final JacksonDataTemplateCodec DATA_TEMPLATE_CODEC = new JacksonDataTemplateCodec(); + + @Getter + private final String gmsUrl; + + public void emit(MetadataChangeProposal mcp) throws IOException { + String payloadJson = DATA_TEMPLATE_CODEC.mapToString(mcp.data()); + ObjectMapper om = new ObjectMapper(); + TypeReference> typeRef = new TypeReference>() { + }; + HashMap o = om.readValue(payloadJson, typeRef); + while (o.values().remove(null)) { + + } + + payloadJson = om.writeValueAsString(o); + payloadJson = "{" + " \"proposal\" :" + payloadJson + "}"; + log.debug("Emitting payload: " + payloadJson + "\n to URL " + this.gmsUrl + "/aspects?action=ingestProposal"); + RESTEmitter.makeRequest(this.gmsUrl + "/aspects?action=ingestProposal", "POST", payloadJson); + } + + public static boolean makeRequest(String urlStr, String method, String payloadJson) throws IOException { + URL url = new URL(urlStr); + HttpURLConnection con = (HttpURLConnection) url.openConnection(); + con.setRequestMethod(method); + con.setRequestProperty("Content-Type", "application/json"); + con.setRequestProperty("X-RestLi-Protocol-Version", "2.0.0"); +// con.setRequestProperty("Accept", "application/json"); + con.setDoOutput(true); + if (payloadJson != null) { + try (OutputStream os = con.getOutputStream()) { + byte[] input = payloadJson.getBytes("utf-8"); + os.write(input, 0, input.length); + } + } + try (BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream(), "utf-8"))) { + StringBuilder response = new StringBuilder(); + String responseLine = null; + while ((responseLine = br.readLine()) != null) { + response.append(responseLine.trim()); + } + log.debug("URL: " + urlStr + " Response: " + response.toString()); + } + return true; + + } + + public boolean testConnection() { + try { + RESTEmitter.makeRequest(this.gmsUrl + "/config", "GET", null); + return true; + + } catch (IOException e) { + e.printStackTrace(); + return false; + } + } + + public static RESTEmitter create(String gmsUrl) { + return new RESTEmitter(gmsUrl); + } +} \ No newline at end of file diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/DatahubLineageEmitter.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/DatahubLineageEmitter.java new file mode 100644 index 00000000000000..8c70f4f8acff65 --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/DatahubLineageEmitter.java @@ -0,0 +1,290 @@ +package com.linkedin.datahub.lineage.spark.interceptor; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import org.apache.spark.SparkConf; +import org.apache.spark.SparkContext; +import org.apache.spark.SparkEnv; +import org.apache.spark.scheduler.SparkListener; +import org.apache.spark.scheduler.SparkListenerApplicationEnd; +import org.apache.spark.scheduler.SparkListenerApplicationStart; +import org.apache.spark.scheduler.SparkListenerEvent; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.catalyst.plans.QueryPlan; +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan; +import org.apache.spark.sql.execution.QueryExecution; +import org.apache.spark.sql.execution.SQLExecution; +import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd; +import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart; + +import com.google.common.base.Splitter; +import com.linkedin.datahub.lineage.spark.model.AppEndEvent; +import com.linkedin.datahub.lineage.spark.model.AppStartEvent; +import com.linkedin.datahub.lineage.spark.model.DatasetLineage; +import com.linkedin.datahub.lineage.spark.model.LineageConsumer; +import com.linkedin.datahub.lineage.spark.model.SQLQueryExecEndEvent; +import com.linkedin.datahub.lineage.spark.model.SQLQueryExecStartEvent; +import com.linkedin.datahub.lineage.spark.model.dataset.SparkDataset; + +import lombok.extern.slf4j.Slf4j; +import scala.collection.JavaConversions; +import scala.runtime.AbstractFunction1; +import scala.runtime.AbstractPartialFunction; + +@Slf4j +public class DatahubLineageEmitter extends SparkListener { + + private static final int THREAD_CNT = 10; + public static final String CONSUMER_TYPE_KEY = "spark.datahub.lineage.consumerTypes"; + + private final Map appDetails = new ConcurrentHashMap<>(); + private final Map> appSqlDetails = new ConcurrentHashMap<>(); + private final Map appPoolDetails = new ConcurrentHashMap<>(); + +// private static LineageConsumer loggingConsumer() { +// log.warn("Lineage consumer not specified. Defaulting to LoggingConsumer."); +// return LineageUtils.LOGGING_CONSUMER; +// } + + private class SqlStartTask implements Runnable { + + private SparkListenerSQLExecutionStart sqlStart; + private SparkContext ctx; + private LogicalPlan plan; + + public SqlStartTask(SparkListenerSQLExecutionStart sqlStart, LogicalPlan plan, SparkContext ctx) { + this.sqlStart = sqlStart; + this.plan = plan; + this.ctx = ctx; + } + + @Override + public void run() { + appSqlDetails.get(ctx.appName()).put(sqlStart.executionId(), + new SQLQueryExecStartEvent(ctx.conf().get("spark.master"), ctx.appName(), ctx.applicationId(), + sqlStart.time(), sqlStart.executionId(), null)); + log.debug("PLAN for execution id: " + ctx.appName() + ":" + sqlStart.executionId() + "\n"); + log.debug(plan.toString()); + + DatasetExtractor extractor = new DatasetExtractor(); + Optional outputDS = extractor.asDataset(plan, ctx, true); + if (!outputDS.isPresent()) { + log.debug("Skipping execution as no output dataset present for execution id: " + ctx.appName() + ":" + + sqlStart.executionId()); + return; + } + + DatasetLineage lineage = new DatasetLineage(sqlStart.description(), plan.toString(), outputDS.get()); + Collection> allInners = new ArrayList<>(); + + plan.collect(new AbstractPartialFunction() { + + @Override + public Void apply(LogicalPlan plan) { + log.debug("CHILD " + plan.getClass() + "\n" + plan + "\n-------------\n"); + Optional inputDS = extractor.asDataset(plan, ctx, false); + inputDS.ifPresent(x -> lineage.addSource(x)); + allInners.addAll(JavaConversions.asJavaCollection(plan.innerChildren())); + return null; + } + + @Override + public boolean isDefinedAt(LogicalPlan x) { + return true; + } + }); + + for (QueryPlan qp : allInners) { + if (!(qp instanceof LogicalPlan)) { + continue; + } + LogicalPlan nestedPlan = (LogicalPlan) qp; + + nestedPlan.collect(new AbstractPartialFunction() { + + @Override + public Void apply(LogicalPlan plan) { + log.debug("INNER CHILD " + plan.getClass() + "\n" + plan + "\n-------------\n"); + Optional inputDS = extractor.asDataset(plan, ctx, false); + inputDS.ifPresent( + x -> log.debug("source added for " + ctx.appName() + "/" + sqlStart.executionId() + ": " + x)); + inputDS.ifPresent(x -> lineage.addSource(x)); + return null; + } + + @Override + public boolean isDefinedAt(LogicalPlan x) { + return true; + } + }); + } + + SQLQueryExecStartEvent evt = new SQLQueryExecStartEvent(ctx.conf().get("spark.master"), ctx.appName(), + ctx.applicationId(), + sqlStart.time(), sqlStart.executionId(), lineage); + + appSqlDetails.get(ctx.appName()).put(sqlStart.executionId(), evt); + + consumers().forEach(c -> c.accept(evt)); // TODO parallel stream here? + + log.debug("LINEAGE \n" + lineage + "\n"); + log.debug("Parsed execution id " + ctx.appName() + ":" + sqlStart.executionId()); + + return; + } + + } + + @Override + public void onApplicationStart(SparkListenerApplicationStart applicationStart) { + try { + log.debug("App started: " + applicationStart); + LineageUtils.findSparkCtx().foreach(new AbstractFunction1() { + + @Override + public Void apply(SparkContext sc) { + String appId = applicationStart.appId().isDefined() ? applicationStart.appId().get() : ""; + AppStartEvent evt = new AppStartEvent(LineageUtils.getMaster(sc), applicationStart.appName(), appId, + applicationStart.time(), applicationStart.sparkUser()); + + consumers().forEach(x -> x.accept(evt)); + // TODO keyed by appName; only latest will be considered. Potential + // inconsistencies not mapped. + appDetails.put(applicationStart.appName(), evt); + appSqlDetails.put(applicationStart.appName(), new ConcurrentHashMap<>()); + ExecutorService pool = Executors.newFixedThreadPool(THREAD_CNT); + appPoolDetails.put(applicationStart.appName(), pool); + return null; + } + }); + super.onApplicationStart(applicationStart); + } catch (Exception e) { + // log error, but don't impact thread + StringWriter s = new StringWriter(); + PrintWriter p = new PrintWriter(s); + e.printStackTrace(p); + log.error(s.toString()); + p.close(); + } + } + + @Override + public void onApplicationEnd(SparkListenerApplicationEnd applicationEnd) { + try { + LineageUtils.findSparkCtx().foreach(new AbstractFunction1() { + + @Override + public Void apply(SparkContext sc) { + log.debug("Application end event received for appId :" + sc.appName()); + AppStartEvent start = appDetails.remove(sc.appName()); + appPoolDetails.remove(sc.appName()).shutdown(); + appSqlDetails.remove(sc.appName()); + if (start == null) { + log.error( + "Application end event received, but start event missing for appId " + sc.applicationId()); + } else { + AppEndEvent evt = new AppEndEvent(LineageUtils.getMaster(sc), sc.appName(), sc.applicationId(), + applicationEnd.time(), start); + + consumers().forEach(x -> x.accept(evt)); + } + return null; + } + }); + super.onApplicationEnd(applicationEnd); + } catch (Exception e) { + // log error, but don't impact thread + StringWriter s = new StringWriter(); + PrintWriter p = new PrintWriter(s); + e.printStackTrace(p); + log.error(s.toString()); + p.close(); + } + } + + @Override + public void onOtherEvent(SparkListenerEvent event) { + try { + if (event instanceof SparkListenerSQLExecutionStart) { + SparkListenerSQLExecutionStart sqlEvt = (SparkListenerSQLExecutionStart) event; + log.debug("SQL Exec start event with id " + sqlEvt.executionId()); + processExecution(sqlEvt); + } else if (event instanceof SparkListenerSQLExecutionEnd) { + SparkListenerSQLExecutionEnd sqlEvt = (SparkListenerSQLExecutionEnd) event; + log.debug("SQL Exec end event with id " + sqlEvt.executionId()); + processExecutionEnd(sqlEvt); + } + } catch (Exception e) { + // log error, but don't impact thread + StringWriter s = new StringWriter(); + PrintWriter p = new PrintWriter(s); + e.printStackTrace(p); + log.error(s.toString()); + p.close(); + } + } + + public void processExecutionEnd(SparkListenerSQLExecutionEnd sqlEnd) { + LineageUtils.findSparkCtx().foreach(new AbstractFunction1() { + + @Override + public Void apply(SparkContext sc) { + SQLQueryExecStartEvent start = appSqlDetails.get(sc.appName()).remove(sqlEnd.executionId()); + if (start == null) { + log.error("Execution end event received, but start event missing for appId/sql exec Id " + sc.applicationId() + + ":" + sqlEnd.executionId()); + } else if (start.getDatasetLineage() != null) { +// JobStatus status = jobEnd.jobResult().equals(org.apache.spark.scheduler.JobSucceeded$.MODULE$) +// ? JobStatus.COMPLETED +// : JobStatus.FAILED; + SQLQueryExecEndEvent evt = new SQLQueryExecEndEvent(LineageUtils.getMaster(sc), sc.appName(), + sc.applicationId(), + sqlEnd.time(), sqlEnd.executionId(), start); + + consumers().forEach(x -> x.accept(evt)); + } + return null; + } + }); + } + + // TODO sqlEvt.details() unused + private void processExecution(SparkListenerSQLExecutionStart sqlStart) { + QueryExecution queryExec = SQLExecution.getQueryExecution(sqlStart.executionId()); + if (queryExec == null) { + log.error("Skipping processing for sql exec Id" + sqlStart.executionId() + " as Query execution context could not be read from current spark state"); + return; + } + LogicalPlan plan = queryExec.optimizedPlan(); + SparkSession sess = queryExec.sparkSession(); + SparkContext ctx = sess.sparkContext(); + ExecutorService pool = appPoolDetails.get(ctx.appName()); + pool.execute(new SqlStartTask(sqlStart, plan, ctx)); + } + + private static List consumers() { + SparkConf conf = SparkEnv.get().conf(); + if (conf.contains(CONSUMER_TYPE_KEY)) { + String consumerTypes = conf.get(CONSUMER_TYPE_KEY); + + return StreamSupport.stream(Splitter.on(",").trimResults().split(consumerTypes).spliterator(), false) + .map(x -> LineageUtils.getConsumer(x)).filter(x -> x != null).collect(Collectors.toList()); + } else { + return Collections.singletonList(LineageUtils.getConsumer("mcpEmitter")); + } + + } + +} diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/DatasetExtractor.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/DatasetExtractor.java new file mode 100644 index 00000000000000..5b4578588fe57b --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/DatasetExtractor.java @@ -0,0 +1,156 @@ +package com.linkedin.datahub.lineage.spark.interceptor; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.spark.SparkContext; +import org.apache.spark.sql.catalyst.catalog.HiveTableRelation; +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan; +import org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand; +import org.apache.spark.sql.execution.datasources.HadoopFsRelation; +import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand; +import org.apache.spark.sql.execution.datasources.LogicalRelation; +import org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand; +import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions; +import org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation; +import org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand; +import org.apache.spark.sql.hive.execution.InsertIntoHiveTable; +import org.apache.spark.sql.sources.BaseRelation; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.datahub.lineage.spark.model.dataset.CatalogTableDataset; +import com.linkedin.datahub.lineage.spark.model.dataset.HdfsPathDataset; +import com.linkedin.datahub.lineage.spark.model.dataset.JdbcDataset; +import com.linkedin.datahub.lineage.spark.model.dataset.SparkDataset; + +import scala.Option; +import scala.collection.JavaConversions; + +public class DatasetExtractor { + private static final Map, PlanToDataset> PLAN_TO_DATASET = new HashMap<>(); + private static final Map, RelationToDataset> REL_TO_DATASET = new HashMap<>(); + private static final Set> OUTPUT_CMD = ImmutableSet + .of(InsertIntoHadoopFsRelationCommand.class, SaveIntoDataSourceCommand.class, + CreateDataSourceTableAsSelectCommand.class, CreateHiveTableAsSelectCommand.class, + InsertIntoHiveTable.class); + // TODO InsertIntoHiveDirCommand, InsertIntoDataSourceDirCommand + + private static interface PlanToDataset { + Optional fromPlanNode(LogicalPlan plan, SparkContext ctx); + } + + private static interface RelationToDataset { + Optional fromRelation(BaseRelation rel, SparkContext ctx); + } + + static { + PLAN_TO_DATASET.put(InsertIntoHadoopFsRelationCommand.class, (p, ctx) -> { + InsertIntoHadoopFsRelationCommand cmd = (InsertIntoHadoopFsRelationCommand) p; + if (cmd.catalogTable().isDefined()) { + return Optional.of(new CatalogTableDataset(cmd.catalogTable().get())); + } + return Optional.of(new HdfsPathDataset(cmd.outputPath())); + }); + + PLAN_TO_DATASET.put(LogicalRelation.class, (p, ctx) -> { + BaseRelation baseRel = ((LogicalRelation) p).relation(); + if (!REL_TO_DATASET.containsKey(baseRel.getClass())) { + return Optional.empty(); + } + return REL_TO_DATASET.get(baseRel.getClass()).fromRelation(baseRel, ctx); + }); + + PLAN_TO_DATASET.put(SaveIntoDataSourceCommand.class, (p, ctx) -> { + /* + * BaseRelation relation; if (((SaveIntoDataSourceCommand) p).dataSource() + * instanceof RelationProvider) { RelationProvider relProvider = + * (RelationProvider) ((SaveIntoDataSourceCommand) p).dataSource(); relation = + * relProvider.createRelation(ctx, ((SaveIntoDataSourceCommand) p).options()); } + * else { SchemaRelationProvider relProvider = (SchemaRelationProvider) + * ((SaveIntoDataSourceCommand) p).dataSource(); relation = + * p.createRelation(ctx, ((SaveIntoDataSourceCommand) p).options(), p.schema()); + * } + */ + SaveIntoDataSourceCommand cmd = (SaveIntoDataSourceCommand) p; + + Map options = JavaConversions.mapAsJavaMap(cmd.options()); + String url = options.get("url"); // e.g. jdbc:postgresql://localhost:5432/sparktestdb + if (!url.contains("jdbc")) { + return Optional.empty(); + } + + String tbl = options.get("dbtable"); + return Optional.of(new JdbcDataset(url, tbl)); + }); + + PLAN_TO_DATASET.put(CreateDataSourceTableAsSelectCommand.class, (p, ctx) -> { + CreateDataSourceTableAsSelectCommand cmd = (CreateDataSourceTableAsSelectCommand) p; + // TODO what of cmd.mode() + return Optional.of(new CatalogTableDataset(cmd.table())); + }); + PLAN_TO_DATASET.put(CreateHiveTableAsSelectCommand.class, (p, ctx) -> { + CreateHiveTableAsSelectCommand cmd = (CreateHiveTableAsSelectCommand) p; + return Optional.of(new CatalogTableDataset(cmd.tableDesc())); + }); + PLAN_TO_DATASET.put(InsertIntoHiveTable.class, (p, ctx) -> { + InsertIntoHiveTable cmd = (InsertIntoHiveTable) p; + return Optional.of(new CatalogTableDataset(cmd.table())); + }); + + PLAN_TO_DATASET.put(HiveTableRelation.class, (p, ctx) -> { + HiveTableRelation cmd = (HiveTableRelation) p; + return Optional.of(new CatalogTableDataset(cmd.tableMeta())); + }); + + REL_TO_DATASET.put(HadoopFsRelation.class, (r, ctx) -> { + List res = JavaConversions.asJavaCollection(((HadoopFsRelation) r).location().rootPaths()).stream() + .map(p -> getDirectoryPath(p, ctx.hadoopConfiguration())) + .distinct() + .collect(Collectors.toList()); + + // TODO mapping to URN TBD + return Optional.of(new HdfsPathDataset(res.get(0))); + }); + REL_TO_DATASET.put(JDBCRelation.class, (r, ctx) -> { + JDBCRelation rel = (JDBCRelation) r; + Option tbl = rel.jdbcOptions().parameters().get(JDBCOptions.JDBC_TABLE_NAME()); + if (tbl.isEmpty()) { + return Optional.empty(); + } + + return Optional.of(new JdbcDataset(rel.jdbcOptions().url(), tbl.get())); + }); + } + + Optional asDataset(LogicalPlan logicalPlan, SparkContext ctx, boolean outputNode) { + if (!outputNode && OUTPUT_CMD.contains(logicalPlan.getClass())) { + return Optional.empty(); + } + + if (!PLAN_TO_DATASET.containsKey(logicalPlan.getClass())) { + return Optional.empty(); + } + + return PLAN_TO_DATASET.get(logicalPlan.getClass()).fromPlanNode(logicalPlan, ctx); + } + + private static Path getDirectoryPath(Path p, Configuration hadoopConf) { + try { + if (p.getFileSystem(hadoopConf).getFileStatus(p).isFile()) { + return p.getParent(); + } else { + return p; + } + } catch (IOException e) { + // log.warn("Unable to get file system for path ", e); + return p; + } + } +} diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/LineageUtils.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/LineageUtils.java new file mode 100644 index 00000000000000..aab25fc85e2ef5 --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/interceptor/LineageUtils.java @@ -0,0 +1,125 @@ +package com.linkedin.datahub.lineage.spark.interceptor; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import javax.annotation.Nonnull; + +import org.apache.spark.SparkContext; +import org.apache.spark.SparkContext$; +import org.apache.spark.sql.SparkSession; + +import com.linkedin.common.urn.DataFlowUrn; +import com.linkedin.data.ByteString; +import com.linkedin.data.template.JacksonDataTemplateCodec; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.datahub.lineage.consumer.impl.MCPEmitter; +import com.linkedin.datahub.lineage.spark.model.LineageConsumer; +import com.linkedin.mxe.GenericAspect; + +import lombok.extern.slf4j.Slf4j; +import scala.Option; +import scala.runtime.AbstractFunction0; +import scala.runtime.AbstractFunction1; + +@Slf4j +public class LineageUtils { + private static final JacksonDataTemplateCodec DATA_TEMPLATE_CODEC = new JacksonDataTemplateCodec(); + + private static Map consumers = new ConcurrentHashMap<>(); + + public static final LineageConsumer LOGGING_CONSUMER = (x -> log.info(x.toString())); + + // hook for replacing paths during testing. Not the cleanest way, TODO improve. + /* This is for generating urn from a hash of the plan */ + // private static Function PATH_REPLACER = (x -> x); + + static { + // system defined consumers + registerConsumer("mcpEmitter", new MCPEmitter()); + } + + private LineageUtils() { + + } + + // overwrites existing consumer entry of same type + public static void registerConsumer(String consumerType, LineageConsumer consumer) { + consumers.put(consumerType, consumer); + } + + public static LineageConsumer getConsumer(String consumerType) { + return consumers.get(consumerType); + } + + public static DataFlowUrn flowUrn(String master, String appName) { + return new DataFlowUrn("spark", appName, master.replaceAll(":", "_").replaceAll("/", "_").replaceAll("[_]+", "_")); + } + + // Taken from GenericAspectUtils + public static GenericAspect serializeAspect(@Nonnull RecordTemplate aspect) { + GenericAspect genericAspect = new GenericAspect(); + + try { + String aspectStr = DATA_TEMPLATE_CODEC.mapToString(aspect.data()); + genericAspect.setValue( + ByteString.unsafeWrap(aspectStr.getBytes(StandardCharsets.UTF_8))); + genericAspect.setContentType("application/json"); + return genericAspect; + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + + public static Option findSparkCtx() { + return SparkSession.getActiveSession() + .map(new AbstractFunction1() { + + @Override + public SparkContext apply(SparkSession sess) { + return sess.sparkContext(); + } + }) + .orElse(new AbstractFunction0>() { + + @Override + public Option apply() { + return SparkContext$.MODULE$.getActive(); + } + }); + } + + public static String getMaster(SparkContext ctx) { + return ctx.conf().get("spark.master"); + } + + /* This is for generating urn from a hash of the plan */ + +/* + public static String scrubPlan(String plan) { + String s = plan.replaceAll("#[0-9]*", ""); + s = s.replaceAll("JdbcRelationProvider@[0-9a-zA-Z]*,", "JdbcRelationProvider,"); + s = s.replaceAll("InMemoryFileIndex@[0-9a-zA-Z]*,", "InMemoryFileIndex,"); + s = s.replaceAll("Created Time:[^\n]+\n", ""); + s = s.replaceAll("Last Access:[^\n]+\n", ""); + s = s.replaceAll("Owner:[^\n]+\n", ""); + s = s.replaceAll("Statistics:[^\n]+\n", ""); + s = s.replaceAll("Table Properties:[^\n]+\n", ""); + // System.out.println("CLEAN: " + s); + return s; + } + + public static void setPathReplacer(Function replacer) { + PATH_REPLACER = replacer; + } + + public static String hash(String s) { + s = PATH_REPLACER.apply(s); + log.debug("PATH REPLACED " + s); + return Hashing.md5().hashString(s, Charset.forName("US-ASCII")).toString(); + } + */ +} diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/AppEndEvent.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/AppEndEvent.java new file mode 100644 index 00000000000000..4e06d2fd923e51 --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/AppEndEvent.java @@ -0,0 +1,46 @@ +package com.linkedin.datahub.lineage.spark.model; + +import java.util.Arrays; +import java.util.List; + +import com.linkedin.common.urn.DataFlowUrn; +import com.linkedin.data.template.StringMap; +import com.linkedin.datahub.lineage.spark.interceptor.LineageUtils; +import com.linkedin.datajob.DataFlowInfo; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.mxe.MetadataChangeProposal; + +import lombok.Getter; +import lombok.ToString; + +@ToString +@Getter +public class AppEndEvent extends LineageEvent { + + private final AppStartEvent start; + + public AppEndEvent(String master, String appName, String appId, long time, AppStartEvent start) { + super(master, appName, appId, time); + this.start = start; + } + + @Override + public List toMcps() { + DataFlowUrn flowUrn = LineageUtils.flowUrn(getMaster(), getAppName()); + + StringMap customProps = start.customProps(); + customProps.put("completedAt", timeStr()); + + DataFlowInfo flowInfo = new DataFlowInfo() + .setName(getAppName()) + .setCustomProperties(customProps); + + MetadataChangeProposal mcpFlowInfo = new MetadataChangeProposal(); + mcpFlowInfo.setAspectName("dataFlowInfo"); + mcpFlowInfo.setAspect(LineageUtils.serializeAspect(flowInfo)); + mcpFlowInfo.setEntityUrn(flowUrn); + mcpFlowInfo.setEntityType("dataFlow"); + mcpFlowInfo.setChangeType(ChangeType.UPSERT); + return Arrays.asList(mcpFlowInfo); + } +} \ No newline at end of file diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/AppStartEvent.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/AppStartEvent.java new file mode 100644 index 00000000000000..97c5ecec99f086 --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/AppStartEvent.java @@ -0,0 +1,52 @@ +package com.linkedin.datahub.lineage.spark.model; + +import java.util.Arrays; +import java.util.List; + +import com.linkedin.common.urn.DataFlowUrn; +import com.linkedin.data.template.StringMap; +import com.linkedin.datahub.lineage.spark.interceptor.LineageUtils; +import com.linkedin.datajob.DataFlowInfo; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.mxe.MetadataChangeProposal; + +import lombok.Getter; +import lombok.ToString; + +@ToString +@Getter +public class AppStartEvent extends LineageEvent { + + private final String sparkUser; + + public AppStartEvent(String master, String appName, String appId, long time, String sparkUser) { + super(master, appName, appId, time); + this.sparkUser = sparkUser; + } + + @Override + public List toMcps() { + DataFlowUrn flowUrn = LineageUtils.flowUrn(getMaster(), getAppName()); + + DataFlowInfo flowInfo = new DataFlowInfo() + .setName(getAppName()) + .setCustomProperties(customProps()); + + MetadataChangeProposal mcpFlowInfo = new MetadataChangeProposal(); + mcpFlowInfo.setAspectName("dataFlowInfo"); + mcpFlowInfo.setAspect(LineageUtils.serializeAspect(flowInfo)); + mcpFlowInfo.setEntityUrn(flowUrn); + mcpFlowInfo.setEntityType("dataFlow"); + mcpFlowInfo.setChangeType(ChangeType.UPSERT); + return Arrays.asList(mcpFlowInfo); + } + + StringMap customProps() { + StringMap customProps = new StringMap(); + customProps.put("startedAt", timeStr()); + customProps.put("appId", getAppId()); + customProps.put("appName", getAppName()); + customProps.put("sparkUser", sparkUser); + return customProps; + } +} \ No newline at end of file diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/DatasetLineage.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/DatasetLineage.java new file mode 100644 index 00000000000000..82446cee80adb0 --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/DatasetLineage.java @@ -0,0 +1,35 @@ +package com.linkedin.datahub.lineage.spark.model; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import com.linkedin.datahub.lineage.spark.model.dataset.SparkDataset; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; + +@RequiredArgsConstructor +@ToString +public class DatasetLineage { + + private final Set sources = new HashSet<>(); + + @Getter + private final String callSiteShort; + + @Getter + private final String plan; + + @Getter + private final SparkDataset sink; + + public void addSource(SparkDataset source) { + sources.add(source); + } + + public Set getSources() { + return Collections.unmodifiableSet(sources); + } +} diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/LineageConsumer.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/LineageConsumer.java new file mode 100644 index 00000000000000..1c9341116b2b1e --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/LineageConsumer.java @@ -0,0 +1,6 @@ +package com.linkedin.datahub.lineage.spark.model; + +import io.netty.util.internal.shaded.org.jctools.queues.MessagePassingQueue.Consumer; + +public interface LineageConsumer extends Consumer { +} diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/LineageEvent.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/LineageEvent.java new file mode 100644 index 00000000000000..dcfa514329fa4d --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/LineageEvent.java @@ -0,0 +1,22 @@ +package com.linkedin.datahub.lineage.spark.model; + +import java.util.Date; +import java.util.List; + +import com.linkedin.mxe.MetadataChangeProposal; + +import lombok.Data; + +@Data +public abstract class LineageEvent { + private final String master; + private final String appName; + private final String appId; + private final long time; + + public abstract List toMcps(); + + protected String timeStr() { + return new Date(getTime()).toInstant().toString(); + } +} \ No newline at end of file diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/SQLQueryExecEndEvent.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/SQLQueryExecEndEvent.java new file mode 100644 index 00000000000000..f5fa389e605d1c --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/SQLQueryExecEndEvent.java @@ -0,0 +1,47 @@ +package com.linkedin.datahub.lineage.spark.model; + +import java.util.Arrays; +import java.util.List; + +import com.linkedin.common.urn.DataJobUrn; +import com.linkedin.data.template.StringMap; +import com.linkedin.datahub.lineage.spark.interceptor.LineageUtils; +import com.linkedin.datajob.DataJobInfo; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.mxe.MetadataChangeProposal; + +import lombok.Getter; +import lombok.ToString; + +@ToString +@Getter +public class SQLQueryExecEndEvent extends LineageEvent { + + private final long sqlQueryExecId; + private final SQLQueryExecStartEvent start; + + public SQLQueryExecEndEvent(String master, String appName, String appId, long time, long sqlQueryExecId, SQLQueryExecStartEvent start) { + super(master, appName, appId, time); + this.sqlQueryExecId = sqlQueryExecId; + this.start = start; + } + + @Override + public List toMcps() { + DataJobUrn jobUrn = start.jobUrn(); + StringMap customProps = start.customProps(); + customProps.put("completedAt", timeStr()); + + DataJobInfo jobInfo = start.jobInfo() + .setCustomProperties(customProps); + + MetadataChangeProposal mcpJobInfo = new MetadataChangeProposal(); + mcpJobInfo.setAspectName("dataJobInfo"); + mcpJobInfo.setAspect(LineageUtils.serializeAspect(jobInfo)); + mcpJobInfo.setEntityUrn(jobUrn); + mcpJobInfo.setEntityType("dataJob"); + mcpJobInfo.setChangeType(ChangeType.UPSERT); + + return Arrays.asList(mcpJobInfo); + } +} \ No newline at end of file diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/SQLQueryExecStartEvent.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/SQLQueryExecStartEvent.java new file mode 100644 index 00000000000000..16909afcb69f7d --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/SQLQueryExecStartEvent.java @@ -0,0 +1,119 @@ +package com.linkedin.datahub.lineage.spark.model; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; + +import com.linkedin.common.DatasetUrnArray; +import com.linkedin.common.urn.DataFlowUrn; +import com.linkedin.common.urn.DataJobUrn; +import com.linkedin.data.template.StringMap; +import com.linkedin.datahub.lineage.spark.interceptor.LineageUtils; +import com.linkedin.datahub.lineage.spark.model.dataset.SparkDataset; +import com.linkedin.datajob.DataJobInfo; +import com.linkedin.datajob.DataJobInputOutput; +import com.linkedin.datajob.JobStatus; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.mxe.MetadataChangeProposal; + +import lombok.Getter; +import lombok.ToString; + +@ToString +@Getter +public class SQLQueryExecStartEvent extends LineageEvent { + private final long sqlQueryExecId; + private final DatasetLineage datasetLineage; + + public SQLQueryExecStartEvent(String master, String appName, String appId, long time, long sqlQueryExecId, + DatasetLineage datasetLineage) { + super(master, appName, appId, time); + this.sqlQueryExecId = sqlQueryExecId; + this.datasetLineage = datasetLineage; + } + + @Override + public List toMcps() { + DataJobUrn jobUrn = jobUrn(); + MetadataChangeProposal mcpJobIO = new MetadataChangeProposal(); + mcpJobIO.setAspectName("dataJobInputOutput"); + mcpJobIO.setAspect(LineageUtils.serializeAspect(jobIO())); + mcpJobIO.setEntityUrn(jobUrn); + mcpJobIO.setEntityType("dataJob"); + mcpJobIO.setChangeType(ChangeType.UPSERT); + + DataJobInfo jobInfo = jobInfo(); + jobInfo.setCustomProperties(customProps()); + jobInfo.setStatus(JobStatus.IN_PROGRESS); + + MetadataChangeProposal mcpJobInfo = new MetadataChangeProposal(); + mcpJobInfo.setAspectName("dataJobInfo"); + mcpJobInfo.setAspect(LineageUtils.serializeAspect(jobInfo)); + mcpJobInfo.setEntityUrn(jobUrn); + mcpJobInfo.setEntityType("dataJob"); + mcpJobInfo.setChangeType(ChangeType.UPSERT); + + return Arrays.asList(mcpJobIO, mcpJobInfo); + } + + DataJobInfo jobInfo() { + return new DataJobInfo() + .setName(datasetLineage.getCallSiteShort()) + .setType(DataJobInfo.Type.create("sparkJob")); + } + + DataJobUrn jobUrn() { + /* This is for generating urn from a hash of the plan */ + /* + * Set sourceUrns = datasetLineage.getSources() .parallelStream() .map(x + * -> x.urn().toString()) .collect(Collectors.toSet()); sourceUrns = new + * TreeSet<>(sourceUrns); //sort for consistency + * + * String sinkUrn = datasetLineage.getSink().urn().toString(); String plan = + * LineageUtils.scrubPlan(datasetLineage.getPlan()); String id = + * Joiner.on(",").join(sinkUrn, sourceUrns, plan); + * + * return new DataJobUrn(flowUrn(), "planHash_" + LineageUtils.hash(id)); + */ + return new DataJobUrn(flowUrn(), "QueryExecId_" + sqlQueryExecId); + } + + DataFlowUrn flowUrn() { + return LineageUtils.flowUrn(getMaster(), getAppName()); + } + + StringMap customProps() { + StringMap customProps = new StringMap(); + customProps.put("startedAt", timeStr()); + customProps.put("description", datasetLineage.getCallSiteShort()); + customProps.put("SQLQueryId", Long.toString(sqlQueryExecId)); + customProps.put("appId", getAppId()); + customProps.put("appName", getAppName()); + customProps.put("queryPlan", datasetLineage.getPlan()); + return customProps; + } + + private DataJobInputOutput jobIO() { + DatasetUrnArray out = new DatasetUrnArray(); + out.add(datasetLineage.getSink().urn()); + + DatasetUrnArray in = new DatasetUrnArray(); + + Set sources = new TreeSet<>(new Comparator() { + @Override + public int compare(SparkDataset x, SparkDataset y) { + return x.urn().toString().compareTo(y.urn().toString()); + } + + }); + sources.addAll(datasetLineage.getSources()); // maintain ordering + for (SparkDataset source : sources) { + in.add(source.urn()); + } + + DataJobInputOutput io = new DataJobInputOutput().setInputDatasets(in).setOutputDatasets(out); + return io; + } +} \ No newline at end of file diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/dataset/CatalogTableDataset.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/dataset/CatalogTableDataset.java new file mode 100644 index 00000000000000..1903d541352055 --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/dataset/CatalogTableDataset.java @@ -0,0 +1,29 @@ +package com.linkedin.datahub.lineage.spark.model.dataset; + +import org.apache.spark.sql.catalyst.catalog.CatalogTable; + +import com.linkedin.common.FabricType; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; + +import lombok.EqualsAndHashCode; +import lombok.ToString; + +@EqualsAndHashCode +@ToString +public class CatalogTableDataset implements SparkDataset { + private final DatasetUrn urn; + + public CatalogTableDataset(CatalogTable table) { + this(table.qualifiedName()); + } + + public CatalogTableDataset(String dsName) { + this.urn = new DatasetUrn(new DataPlatformUrn("hive"), dsName, FabricType.PROD); + } + + @Override + public DatasetUrn urn() { + return this.urn; + } +} diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/dataset/HdfsPathDataset.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/dataset/HdfsPathDataset.java new file mode 100644 index 00000000000000..1dec3423ebea75 --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/dataset/HdfsPathDataset.java @@ -0,0 +1,31 @@ +package com.linkedin.datahub.lineage.spark.model.dataset; + +import org.apache.hadoop.fs.Path; + +import com.linkedin.common.FabricType; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; + +import lombok.EqualsAndHashCode; +import lombok.ToString; + +@EqualsAndHashCode +@ToString +public class HdfsPathDataset implements SparkDataset { + private final DatasetUrn urn; + + public HdfsPathDataset(Path path) { + // TODO check static partitions? + this(path.toUri().toString()); + } + + public HdfsPathDataset(String pathUri) { + // TODO check static partitions? + this.urn = new DatasetUrn(new DataPlatformUrn("hdfs"), pathUri, FabricType.PROD); + } + + @Override + public DatasetUrn urn() { + return this.urn; + } +} diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/dataset/JdbcDataset.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/dataset/JdbcDataset.java new file mode 100644 index 00000000000000..6d33578743c152 --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/dataset/JdbcDataset.java @@ -0,0 +1,39 @@ +package com.linkedin.datahub.lineage.spark.model.dataset; + +import com.linkedin.common.FabricType; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; + +import lombok.EqualsAndHashCode; +import lombok.ToString; + +@EqualsAndHashCode +@ToString +public class JdbcDataset implements SparkDataset { + private final DatasetUrn urn; + + public JdbcDataset(String url, String tbl) { + this.urn = new DatasetUrn(new DataPlatformUrn(platformName(url)), dsName(url, tbl), FabricType.PROD); + } + + @Override + public DatasetUrn urn() { + return this.urn; + } + + private static String platformName(String url) { + if (url.contains("postgres")) { + return "postgres"; + } + return "unknownJdbc"; + } + + private static String dsName(String url, String tbl) { + url = url.replaceFirst("jdbc:", ""); + if (url.contains("postgres")) { + url = url.substring(url.lastIndexOf('/') + 1); + } + // TODO different DBs have different formats. TBD mapping to data source names + return url + "." + tbl; + } +} diff --git a/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/dataset/SparkDataset.java b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/dataset/SparkDataset.java new file mode 100644 index 00000000000000..5da4dc2debfbcd --- /dev/null +++ b/spark-lineage/src/main/java/com/linkedin/datahub/lineage/spark/model/dataset/SparkDataset.java @@ -0,0 +1,7 @@ +package com.linkedin.datahub.lineage.spark.model.dataset; + +import com.linkedin.common.urn.DatasetUrn; + +public interface SparkDataset { + DatasetUrn urn(); +} diff --git a/spark-lineage/src/test/java/com/linkedin/datahub/lineage/TestSparkJobsLineage.java b/spark-lineage/src/test/java/com/linkedin/datahub/lineage/TestSparkJobsLineage.java new file mode 100644 index 00000000000000..ba470fcad191e4 --- /dev/null +++ b/spark-lineage/src/test/java/com/linkedin/datahub/lineage/TestSparkJobsLineage.java @@ -0,0 +1,522 @@ +package com.linkedin.datahub.lineage; + +import static com.github.tomakehurst.wiremock.client.WireMock.ok; +import static com.github.tomakehurst.wiremock.client.WireMock.post; +import static com.github.tomakehurst.wiremock.client.WireMock.postRequestedFor; +import static com.github.tomakehurst.wiremock.client.WireMock.urlEqualTo; +import static com.github.tomakehurst.wiremock.client.WireMock.verify; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.sql.Connection; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Properties; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestRule; +import org.junit.rules.TestWatcher; +import org.junit.runner.Description; +import org.testcontainers.containers.PostgreSQLContainer; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.github.tomakehurst.wiremock.WireMockServer; +import com.github.tomakehurst.wiremock.client.MappingBuilder; +import com.github.tomakehurst.wiremock.client.WireMock; +import com.github.tomakehurst.wiremock.core.Admin; +import com.github.tomakehurst.wiremock.core.WireMockConfiguration; +import com.github.tomakehurst.wiremock.extension.Parameters; +import com.github.tomakehurst.wiremock.extension.PostServeAction; +import com.github.tomakehurst.wiremock.matching.MatchResult; +import com.github.tomakehurst.wiremock.matching.StringValuePattern; +import com.github.tomakehurst.wiremock.stubbing.ServeEvent; +import com.linkedin.datahub.lineage.spark.interceptor.LineageUtils; +import com.linkedin.datahub.lineage.spark.model.DatasetLineage; +import com.linkedin.datahub.lineage.spark.model.LineageConsumer; +import com.linkedin.datahub.lineage.spark.model.LineageEvent; +import com.linkedin.datahub.lineage.spark.model.SQLQueryExecStartEvent; +import com.linkedin.datahub.lineage.spark.model.dataset.CatalogTableDataset; +import com.linkedin.datahub.lineage.spark.model.dataset.HdfsPathDataset; +import com.linkedin.datahub.lineage.spark.model.dataset.JdbcDataset; +import com.linkedin.datahub.lineage.spark.model.dataset.SparkDataset; + + + +public class TestSparkJobsLineage { + private static final boolean MOCK_GMS = Boolean.valueOf("true"); // if false, MCPs get written to real GMS server (see GMS_PORT) + private static final boolean VERIFY_EXPECTED = MOCK_GMS && Boolean.valueOf("true"); // if false, "expected" JSONs are overwritten. + + private static final String APP_NAME = "sparkTestApp"; + + private static final String RESOURCE_DIR = "src/test/resources"; + private static final String DATA_DIR = RESOURCE_DIR + "/data"; + private static final String WAREHOUSE_LOC = DATA_DIR + "/hive/warehouse"; + private static final String TEST_DB = "sparktestdb"; + + private static final String MASTER = "local"; + + private static final int N = 3; // num of GMS requests per spark job + + private static final int GMS_PORT = MOCK_GMS ? 8089 : 8080; + + private static final String EXPECTED_JSON_ROOT = "src/test/resources/expected/"; + + private static SparkSession spark; + private static Properties jdbcConnnProperties; + private static DatasetLineageAccumulator acc; + + @SuppressWarnings("rawtypes") + private static final class McpContentPattern extends StringValuePattern { + + public McpContentPattern(String expectedValue) { + super(expectedValue); + } + + // dataflow case, we do not match against expected string + public McpContentPattern() { + super(""); + } + + @Override + public MatchResult match(String actual) { + if (actual.contains("dataJobInputOutput")) { + return expectedValue.contains(relPaths(actual)) ? MatchResult.exactMatch() : MatchResult.noMatch(); + } + try { + HashMap body = new ObjectMapper().readValue(actual, HashMap.class); + HashMap proposal = (HashMap) body.get("proposal"); + String aspectName = (String) proposal.get("aspectName"); + if (aspectName.equals("dataFlowInfo")) { + return checkFlowInfo(proposal); + } + if (actual.contains("dataJobInfo")) { + return checkJobInfo(proposal); + } + return MatchResult.noMatch(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private MatchResult checkJobInfo(HashMap proposal) { + // TODO check custom props etc. + return MatchResult.exactMatch(); + } + + private MatchResult checkFlowInfo(HashMap proposal) { + // TODO check custom props etc. + return MatchResult.exactMatch(); + } + + } + + private static String relPaths(String s) { + return s.replaceAll("file:[0-9|a-z|A-Z|\\-|\\/|_|\\.]*" + RESOURCE_DIR, "file:/" + RESOURCE_DIR); + } + + private static final class RequestFileWriter extends PostServeAction { + + @Override + public String getName() { + return "writeReqJson"; + } + + @SuppressWarnings({ "rawtypes", "unused" }) + public void doAction(ServeEvent serveEvent, Admin admin, Parameters parameters) { + String currentTestRes = parameters.getString("filename"); + if (currentTestRes == null || VERIFY_EXPECTED) { + return; + } + + Path p = Paths.get(EXPECTED_JSON_ROOT, currentTestRes); + System.out.println("Writing json to file " + p); + String json = serveEvent.getRequest().getBodyAsString(); + try { + HashMap body = new ObjectMapper().readValue(json, HashMap.class); + HashMap proposal = (HashMap) body.get("proposal"); + String aspectName = (String) proposal.get("aspectName"); + + // this effectively checks URNs and lineages; other jobInfo/flowInfo are checked + // in McpContentPattern + if (aspectName.equals("dataJobInputOutput")) { + json = relPaths(json); + Files.write(p, Collections.singletonList(json), StandardOpenOption.CREATE, StandardOpenOption.APPEND); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + + private static class DatasetLineageAccumulator implements LineageConsumer { + + private final List lineages = new ArrayList<>(); + + public void flushJobs() { + lineages.clear(); + } + + public List getLineages() { + return Collections.unmodifiableList(lineages); + } + + @Override + public void accept(LineageEvent e) { + if (e instanceof SQLQueryExecStartEvent) { + lineages.add(((SQLQueryExecStartEvent) e).getDatasetLineage()); + } + } + } + + @Rule + public TestRule watcher = new TestWatcher() { + protected void starting(Description description) { + if (!MOCK_GMS) { + return; + } + String currentTestRes = description.getMethodName() + ".json"; + + MappingBuilder mapping = baseMapping(); + + try { + if (VERIFY_EXPECTED) { + // setup the request body that we expect + List expected = Files.readAllLines(Paths.get(EXPECTED_JSON_ROOT, currentTestRes)); + mapping.withRequestBody(new McpContentPattern(expected.toString())); + } else { + // overwrite "expected" json file with the MCP request bodies + mapping.withPostServeAction("writeReqJson", Parameters.one("filename", currentTestRes)); + Files.deleteIfExists(Paths.get(EXPECTED_JSON_ROOT, currentTestRes)); + } + } catch (IOException e) { + e.printStackTrace(); + } + wireMock.stubFor(mapping); + } + + @Override + protected void finished(Description description) { + if (!VERIFY_EXPECTED) { + return; + } + assertTrue(WireMock.findUnmatchedRequests().isEmpty()); + wireMock.resetRequests(); + wireMock.resetMappings(); + super.finished(description); + } + }; + + public static WireMockServer wireMock = new WireMockServer(WireMockConfiguration + .options() + .port(8089) + .extensions(new RequestFileWriter())); + + @ClassRule + public static PostgreSQLContainer db = new PostgreSQLContainer<>("postgres:9.6.12") + .withDatabaseName("sparktestdb"); + + private static MappingBuilder baseMapping() { + return post("/aspects?action=ingestProposal") + .willReturn(ok() + .withBody("SUCCESS")); + } + + @BeforeClass + public static void setup() { + + acc = new DatasetLineageAccumulator(); + LineageUtils.registerConsumer("accumulator", acc); + + wireMock.start(); + WireMock.configureFor("localhost", 8089); + MappingBuilder mapping = baseMapping(); + if (VERIFY_EXPECTED) { + mapping.withRequestBody(new McpContentPattern()); + } + wireMock.stubFor(mapping); + + spark = SparkSession + .builder() + .appName(APP_NAME) + .config("spark.master", MASTER) + .config("spark.extraListeners", + "com.linkedin.datahub.lineage.spark.interceptor.DatahubLineageEmitter") + .config("spark.datahub.lineage.consumerTypes", "accumulator, mcpEmitter") + .config("spark.datahub.lineage.mcpEmitter.gmsUrl", "http://localhost:" + GMS_PORT) + .config("spark.sql.warehouse.dir", new File(WAREHOUSE_LOC).getAbsolutePath()) + .enableHiveSupport() + .getOrCreate(); + + spark.sql("drop database if exists " + TEST_DB + " cascade"); + spark.sql("create database " + TEST_DB); + jdbcConnnProperties = new Properties(); + jdbcConnnProperties.put("user", db.getUsername()); + jdbcConnnProperties.put("password", db.getPassword()); + + if (VERIFY_EXPECTED) { + verify(1, postRequestedFor(urlEqualTo("/aspects?action=ingestProposal"))); + assertTrue(WireMock.findUnmatchedRequests().isEmpty()); + } + wireMock.resetRequests(); + wireMock.resetMappings(); + } + + @AfterClass + public static void tearDown() throws Exception { + wireMock.resetRequests(); + wireMock.resetMappings(); + MappingBuilder mapping = baseMapping(); + if (VERIFY_EXPECTED) { + mapping.withRequestBody(new McpContentPattern()); + } + wireMock.stubFor(mapping); + spark.stop(); + if (VERIFY_EXPECTED) { + verify(1, postRequestedFor(urlEqualTo("/aspects?action=ingestProposal"))); + assertTrue(WireMock.findUnmatchedRequests().isEmpty()); + } + wireMock.stop(); + } + + @Before + public void before() { + acc.flushJobs(); + } + + @Test + public void testHdfsInOut() throws Exception { + + Dataset df1 = spark.read().option("header", "true").csv(DATA_DIR + "/in1.csv"); + Dataset df2 = spark.read().option("header", "true").csv(DATA_DIR + "/in2.csv"); + df1.createOrReplaceTempView("v1"); + df2.createOrReplaceTempView("v2"); + + Dataset df = spark + .sql("select v1.c1 as a, v1.c2 as b, v2.c1 as c, v2.c2 as d from v1 join v2 on v1.id = v2.id"); + + // InsertIntoHadoopFsRelationCommand + df.write().mode(SaveMode.Overwrite).csv(DATA_DIR + "/out.csv"); + Thread.sleep(5000); + check(dsl(hdfsDs("out.csv"), hdfsDs("in1.csv"), hdfsDs("in2.csv")), acc.getLineages().get(0)); + if (VERIFY_EXPECTED) { + verify(1 * N, postRequestedFor(urlEqualTo("/aspects?action=ingestProposal"))); + } + } + + @Test + public void testHdfsInJdbcOut() throws Exception { + Dataset df1 = spark.read() + .option("header", "true").csv(DATA_DIR + "/in1.csv") + .withColumnRenamed("c1", "a").withColumnRenamed("c2", "b"); + + Dataset df2 = spark.read() + .option("header", "true").csv(DATA_DIR + "/in2.csv") + .withColumnRenamed("c1", "c").withColumnRenamed("c2", "d"); + + Dataset df = df1.join(df2, "id").drop("id"); + + // SaveIntoDataSourceCommand + // HadoopFsRelation input + df.write().mode(SaveMode.Overwrite).jdbc( + db.getJdbcUrl(), + "foo1", jdbcConnnProperties); + Thread.sleep(5000); + check(dsl(pgDs("foo1"), hdfsDs("in1.csv"), hdfsDs("in2.csv")), acc.getLineages().get(0)); + if (VERIFY_EXPECTED) { + verify(1 * N, postRequestedFor(urlEqualTo("/aspects?action=ingestProposal"))); + } + } + + @Test + public void testHdfsJdbcInJdbcOut() throws Exception { + Connection c = db.createConnection(""); + c.createStatement().execute("create table foo2 (a varchar(5), b int);"); + c.createStatement().execute("insert into foo2 values('a', 4);"); + c.close(); + + Dataset df1 = spark.read() + .option("header", "true").csv(DATA_DIR + "/in1.csv") + .withColumnRenamed("c1", "a").withColumnRenamed("c2", "b2"); + + Dataset df2 = spark.read() + .jdbc(db.getJdbcUrl(), "foo2", jdbcConnnProperties); + + Dataset df = df1.join(df2, "a"); + + // SaveIntoDataSourceCommand + // JDBCRelation input + df.write().mode(SaveMode.Overwrite).jdbc( + db.getJdbcUrl(), + "foo3", jdbcConnnProperties); + Thread.sleep(5000); + check(dsl(pgDs("foo3"), hdfsDs("in1.csv"), pgDs("foo2")), acc.getLineages().get(0)); + if (VERIFY_EXPECTED) { + verify(1 * N, postRequestedFor(urlEqualTo("/aspects?action=ingestProposal"))); + } + } + + @Test + public void testHdfsInHiveOut() throws Exception { + Dataset df1 = spark.read() + .option("header", "true").csv(DATA_DIR + "/in1.csv") + .withColumnRenamed("c1", "a").withColumnRenamed("c2", "b"); + + Dataset df2 = spark.read() + .option("header", "true").csv(DATA_DIR + "/in2.csv") + .withColumnRenamed("c1", "c").withColumnRenamed("c2", "d"); + + Dataset df = df1.join(df2, "id").drop("id"); + + df.write().mode(SaveMode.Overwrite).saveAsTable(tbl("foo4")); // CreateDataSourceTableAsSelectCommand + df.write().mode(SaveMode.Append).saveAsTable(tbl("foo4")); // CreateDataSourceTableAsSelectCommand + df.write().insertInto(tbl("foo4")); // InsertIntoHadoopFsRelationCommand + + Thread.sleep(5000); + // TODO same data accessed as Hive Table or Path URI ?? + + DatasetLineage exp = dsl(catTblDs("foo4"), hdfsDs("in1.csv"), hdfsDs("in2.csv")); + check(Collections.nCopies(3, exp), acc.getLineages()); + if (VERIFY_EXPECTED) { + verify(3 * N, postRequestedFor(urlEqualTo("/aspects?action=ingestProposal"))); + } + } + + @Test + public void testHiveInHiveOut() throws Exception { + Dataset df1 = spark.read() + .option("header", "true").csv(DATA_DIR + "/in1.csv") + .withColumnRenamed("c1", "a").withColumnRenamed("c2", "b"); + + Dataset df2 = spark.read() + .option("header", "true").csv(DATA_DIR + "/in2.csv") + .withColumnRenamed("c1", "c").withColumnRenamed("c2", "d"); + + df1.createOrReplaceTempView("v1"); + df2.createOrReplaceTempView("v2"); + + // CreateHiveTableAsSelectCommand + spark.sql("create table " + tbl("foo5") + " as " + + "(select v1.a, v1.b, v2.c, v2.d from v1 join v2 on v1.id = v2.id)"); + + check(dsl(catTblDs("foo5"), hdfsDs("in1.csv"), hdfsDs("in2.csv")), acc.getLineages().get(0)); + + // CreateHiveTableAsSelectCommand + spark.sql("create table " + tbl("hivetab") + " as " + + "(select * from " + tbl("foo5") + ")"); + + check(dsl(catTblDs("hivetab"), catTblDs("foo5")), acc.getLineages().get(1)); + + // InsertIntoHiveTable + spark.sql("insert into " + tbl("hivetab") + " (select * from " + tbl("foo5") + ")"); + check(dsl(catTblDs("hivetab"), catTblDs("foo5")), acc.getLineages().get(2)); + + Dataset df = spark.sql("select * from " + tbl("foo5")); + + // InsertIntoHiveTable + df.write().insertInto(tbl("hivetab")); + Thread.sleep(5000); + check(dsl(catTblDs("hivetab"), catTblDs("foo5")), acc.getLineages().get(3)); + if (VERIFY_EXPECTED) { + verify(4 * N, postRequestedFor(urlEqualTo("/aspects?action=ingestProposal"))); + } + } + + @Test + public void testHdfsJdbcInJdbcOutTwoLevel() throws Exception { + Connection c = db.createConnection(""); + c.createStatement().execute("create table foo6 (a varchar(5), b int);"); + c.createStatement().execute("insert into foo6 values('a', 4);"); + c.close(); + + Dataset df1 = spark.read() + .option("header", "true").csv(DATA_DIR + "/in1.csv") + .withColumnRenamed("c1", "a").withColumnRenamed("c2", "b2"); + + Dataset df2 = spark.read() + .jdbc(db.getJdbcUrl(), "foo6", jdbcConnnProperties); + + Dataset df3 = spark.read() + .option("header", "true").csv(DATA_DIR + "/in2.csv") + .withColumnRenamed("c1", "a").withColumnRenamed("c2", "b3"); + + Dataset df = df1.join(df2, "a").drop("id").join(df3, "a"); + + // SaveIntoDataSourceCommand + // JDBCRelation input + df.write().mode(SaveMode.Overwrite).jdbc( + db.getJdbcUrl(), + "foo7", jdbcConnnProperties); + Thread.sleep(5000); + check(dsl(pgDs("foo7"), hdfsDs("in1.csv"), hdfsDs("in2.csv"), pgDs("foo6")), acc.getLineages().get(0)); + if (VERIFY_EXPECTED) { + verify(1 * N, postRequestedFor(urlEqualTo("/aspects?action=ingestProposal"))); + } + } + + private static void check(List expected, List actual) { + assertEquals(expected.size(), actual.size()); + for (int i = 0; i < expected.size(); i++) { + check(expected.get(i), actual.get(i)); + } + } + + private static void check(DatasetLineage expected, DatasetLineage actual) { + assertEquals(expected.getSink().toString(), actual.getSink().toString()); + assertEquals(dsToStrings(expected.getSources()), dsToStrings(actual.getSources())); + assertTrue(actual.getCallSiteShort().contains("TestSparkJobsLineage")); + } + + private static Set dsToStrings(Set datasets) { + return datasets.stream().map(x -> x.toString()).collect(Collectors.toSet()); + } + + private static DatasetLineage dsl(SparkDataset sink, SparkDataset... source) { + return dsl(null, sink, source); + } + + private static DatasetLineage dsl(String callSite, SparkDataset sink, SparkDataset... source) { + DatasetLineage lineage = new DatasetLineage(callSite, "unknownPlan", sink); + Arrays.asList(source).forEach(x -> lineage.addSource(x)); + return lineage; + } + + private static HdfsPathDataset hdfsDs(String fileName) { + return new HdfsPathDataset("file:" + abs(DATA_DIR + "/" + fileName)); + } + + private static JdbcDataset pgDs(String tbl) { + return new JdbcDataset(db.getJdbcUrl(), tbl); + } + + private static CatalogTableDataset catTblDs(String tbl) { + return new CatalogTableDataset(tbl(tbl)); + } + + private static String tbl(String tbl) { + return TEST_DB + "." + tbl; + } + + private static String abs(String relPath) { + return new File(relPath).getAbsolutePath(); + } +} \ No newline at end of file diff --git a/spark-lineage/src/test/resources/data/in1.csv/part1.csv b/spark-lineage/src/test/resources/data/in1.csv/part1.csv new file mode 100644 index 00000000000000..b65449abf61931 --- /dev/null +++ b/spark-lineage/src/test/resources/data/in1.csv/part1.csv @@ -0,0 +1,3 @@ +id,c1,c2 +1,a,4 +2,a,5 diff --git a/spark-lineage/src/test/resources/data/in2.csv/part1.csv b/spark-lineage/src/test/resources/data/in2.csv/part1.csv new file mode 100644 index 00000000000000..cd1853d694a0d2 --- /dev/null +++ b/spark-lineage/src/test/resources/data/in2.csv/part1.csv @@ -0,0 +1,4 @@ +id,c1,c2 +1,a,4 +2,b,5 +3,b,6 diff --git a/spark-lineage/src/test/resources/expected/testHdfsInHiveOut.json b/spark-lineage/src/test/resources/expected/testHdfsInHiveOut.json new file mode 100644 index 00000000000000..b327969620aeab --- /dev/null +++ b/spark-lineage/src/test/resources/expected/testHdfsInHiveOut.json @@ -0,0 +1,3 @@ +{ "proposal" :{"aspectName":"dataJobInputOutput","entityUrn":"urn:li:dataJob:(urn:li:dataFlow:(spark,sparkTestApp,local),QueryExecId_9)","entityType":"dataJob","aspect":{"value":"{\"inputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in1.csv,PROD)\",\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in2.csv,PROD)\"],\"outputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hive,sparktestdb.foo4,PROD)\"]}","contentType":"application/json"},"changeType":"UPSERT"}} +{ "proposal" :{"aspectName":"dataJobInputOutput","entityUrn":"urn:li:dataJob:(urn:li:dataFlow:(spark,sparkTestApp,local),QueryExecId_10)","entityType":"dataJob","aspect":{"value":"{\"inputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in1.csv,PROD)\",\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in2.csv,PROD)\"],\"outputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hive,sparktestdb.foo4,PROD)\"]}","contentType":"application/json"},"changeType":"UPSERT"}} +{ "proposal" :{"aspectName":"dataJobInputOutput","entityUrn":"urn:li:dataJob:(urn:li:dataFlow:(spark,sparkTestApp,local),QueryExecId_11)","entityType":"dataJob","aspect":{"value":"{\"inputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in1.csv,PROD)\",\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in2.csv,PROD)\"],\"outputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hive,sparktestdb.foo4,PROD)\"]}","contentType":"application/json"},"changeType":"UPSERT"}} diff --git a/spark-lineage/src/test/resources/expected/testHdfsInJdbcOut.json b/spark-lineage/src/test/resources/expected/testHdfsInJdbcOut.json new file mode 100644 index 00000000000000..e89db8b2b82395 --- /dev/null +++ b/spark-lineage/src/test/resources/expected/testHdfsInJdbcOut.json @@ -0,0 +1 @@ +{ "proposal" :{"aspectName":"dataJobInputOutput","entityUrn":"urn:li:dataJob:(urn:li:dataFlow:(spark,sparkTestApp,local),QueryExecId_24)","entityType":"dataJob","aspect":{"value":"{\"inputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in1.csv,PROD)\",\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in2.csv,PROD)\"],\"outputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:postgres,sparktestdb.foo1,PROD)\"]}","contentType":"application/json"},"changeType":"UPSERT"}} diff --git a/spark-lineage/src/test/resources/expected/testHdfsInOut.json b/spark-lineage/src/test/resources/expected/testHdfsInOut.json new file mode 100644 index 00000000000000..3da8ec90028789 --- /dev/null +++ b/spark-lineage/src/test/resources/expected/testHdfsInOut.json @@ -0,0 +1 @@ +{ "proposal" :{"aspectName":"dataJobInputOutput","entityUrn":"urn:li:dataJob:(urn:li:dataFlow:(spark,sparkTestApp,local),QueryExecId_6)","entityType":"dataJob","aspect":{"value":"{\"inputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in1.csv,PROD)\",\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in2.csv,PROD)\"],\"outputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/out.csv,PROD)\"]}","contentType":"application/json"},"changeType":"UPSERT"}} diff --git a/spark-lineage/src/test/resources/expected/testHdfsJdbcInJdbcOut.json b/spark-lineage/src/test/resources/expected/testHdfsJdbcInJdbcOut.json new file mode 100644 index 00000000000000..06ea7714670700 --- /dev/null +++ b/spark-lineage/src/test/resources/expected/testHdfsJdbcInJdbcOut.json @@ -0,0 +1 @@ +{ "proposal" :{"aspectName":"dataJobInputOutput","entityUrn":"urn:li:dataJob:(urn:li:dataFlow:(spark,sparkTestApp,local),QueryExecId_13)","entityType":"dataJob","aspect":{"value":"{\"inputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in1.csv,PROD)\",\"urn:li:dataset:(urn:li:dataPlatform:postgres,sparktestdb.foo2,PROD)\"],\"outputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:postgres,sparktestdb.foo3,PROD)\"]}","contentType":"application/json"},"changeType":"UPSERT"}} diff --git a/spark-lineage/src/test/resources/expected/testHdfsJdbcInJdbcOutTwoLevel.json b/spark-lineage/src/test/resources/expected/testHdfsJdbcInJdbcOutTwoLevel.json new file mode 100644 index 00000000000000..763b3319a0511d --- /dev/null +++ b/spark-lineage/src/test/resources/expected/testHdfsJdbcInJdbcOutTwoLevel.json @@ -0,0 +1 @@ +{ "proposal" :{"aspectName":"dataJobInputOutput","entityUrn":"urn:li:dataJob:(urn:li:dataFlow:(spark,sparkTestApp,local),QueryExecId_27)","entityType":"dataJob","aspect":{"value":"{\"inputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in1.csv,PROD)\",\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in2.csv,PROD)\",\"urn:li:dataset:(urn:li:dataPlatform:postgres,sparktestdb.foo6,PROD)\"],\"outputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:postgres,sparktestdb.foo7,PROD)\"]}","contentType":"application/json"},"changeType":"UPSERT"}} diff --git a/spark-lineage/src/test/resources/expected/testHiveInHiveOut.json b/spark-lineage/src/test/resources/expected/testHiveInHiveOut.json new file mode 100644 index 00000000000000..9cd7268a203813 --- /dev/null +++ b/spark-lineage/src/test/resources/expected/testHiveInHiveOut.json @@ -0,0 +1,4 @@ +{ "proposal" :{"aspectName":"dataJobInputOutput","entityUrn":"urn:li:dataJob:(urn:li:dataFlow:(spark,sparkTestApp,local),QueryExecId_18)","entityType":"dataJob","aspect":{"value":"{\"inputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in1.csv,PROD)\",\"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/src/test/resources/data/in2.csv,PROD)\"],\"outputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hive,sparktestdb.foo5,PROD)\"]}","contentType":"application/json"},"changeType":"UPSERT"}} +{ "proposal" :{"aspectName":"dataJobInputOutput","entityUrn":"urn:li:dataJob:(urn:li:dataFlow:(spark,sparkTestApp,local),QueryExecId_19)","entityType":"dataJob","aspect":{"value":"{\"inputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hive,sparktestdb.foo5,PROD)\"],\"outputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hive,sparktestdb.hivetab,PROD)\"]}","contentType":"application/json"},"changeType":"UPSERT"}} +{ "proposal" :{"aspectName":"dataJobInputOutput","entityUrn":"urn:li:dataJob:(urn:li:dataFlow:(spark,sparkTestApp,local),QueryExecId_20)","entityType":"dataJob","aspect":{"value":"{\"inputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hive,sparktestdb.foo5,PROD)\"],\"outputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hive,sparktestdb.hivetab,PROD)\"]}","contentType":"application/json"},"changeType":"UPSERT"}} +{ "proposal" :{"aspectName":"dataJobInputOutput","entityUrn":"urn:li:dataJob:(urn:li:dataFlow:(spark,sparkTestApp,local),QueryExecId_21)","entityType":"dataJob","aspect":{"value":"{\"inputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hive,sparktestdb.foo5,PROD)\"],\"outputDatasets\":[\"urn:li:dataset:(urn:li:dataPlatform:hive,sparktestdb.hivetab,PROD)\"]}","contentType":"application/json"},"changeType":"UPSERT"}} diff --git a/spark-lineage/src/test/resources/org/apache/spark/log4j-defaults.properties b/spark-lineage/src/test/resources/org/apache/spark/log4j-defaults.properties new file mode 100644 index 00000000000000..29007560d25313 --- /dev/null +++ b/spark-lineage/src/test/resources/org/apache/spark/log4j-defaults.properties @@ -0,0 +1,8 @@ +# Set everything to be logged to the console +log4j.rootCategory=INFO, console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.out +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +log4j.logger.com.linkedin.datahub.lineage=DEBUG \ No newline at end of file