From 6a169357283790e158472957f87f8c6cfbe67136 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Fri, 15 Dec 2023 11:23:04 -0600 Subject: [PATCH 1/6] fix(operations): fix get index sizes integer wrap (#9450) --- .../ElasticSearchTimeseriesAspectService.java | 8 +- .../TimeseriesAspectServiceUnitTest.java | 78 +++++++++++++++++++ .../timeseries/TimeseriesIndexSizeResult.pdl | 3 + ...nkedin.operations.operations.snapshot.json | 5 ++ 4 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index eec7680a56ecb..f9ab86d41335d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -206,10 +206,10 @@ public List getIndexSizes() { elemResult.setEntityName(indexEntityAndAspect.get().getFirst()); elemResult.setAspectName(indexEntityAndAspect.get().getSecond()); } - int sizeBytes = - entry.getValue().get("primaries").get("store").get("size_in_bytes").asInt(); - float sizeMb = (float) sizeBytes / 1000; - elemResult.setSizeMb(sizeMb); + long sizeBytes = + entry.getValue().get("primaries").get("store").get("size_in_bytes").asLong(); + double sizeMb = (double) sizeBytes / 1000000; + elemResult.setSizeInMb(sizeMb); res.add(elemResult); }); return res; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java new file mode 100644 index 0000000000000..a23267dcf6f55 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java @@ -0,0 +1,78 @@ +package com.linkedin.metadata.timeseries.search; + +import static org.mockito.Mockito.*; + +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.NumericNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService; +import com.linkedin.metadata.timeseries.elastic.indexbuilder.TimeseriesAspectIndexBuilders; +import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import com.linkedin.timeseries.TimeseriesIndexSizeResult; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import org.apache.commons.io.IOUtils; +import org.apache.http.HttpEntity; +import org.opensearch.client.Request; +import org.opensearch.client.Response; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestHighLevelClient; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * Test using mocks instead of integration for testing functionality not dependent on a real server + * response + */ +public class TimeseriesAspectServiceUnitTest { + + private final RestHighLevelClient _searchClient = mock(RestHighLevelClient.class); + private final IndexConvention _indexConvention = mock(IndexConvention.class); + private final TimeseriesAspectIndexBuilders _timeseriesAspectIndexBuilders = + mock(TimeseriesAspectIndexBuilders.class); + private final EntityRegistry _entityRegistry = mock(EntityRegistry.class); + private final ESBulkProcessor _bulkProcessor = mock(ESBulkProcessor.class); + private final RestClient _restClient = mock(RestClient.class); + private final TimeseriesAspectService _timeseriesAspectService = + new ElasticSearchTimeseriesAspectService( + _searchClient, + _indexConvention, + _timeseriesAspectIndexBuilders, + _entityRegistry, + _bulkProcessor, + 0); + + private static final String INDEX_PATTERN = "indexPattern"; + + @Test + public void testGetIndicesIntegerWrap() throws IOException { + when(_indexConvention.getAllTimeseriesAspectIndicesPattern()).thenReturn(INDEX_PATTERN); + when(_searchClient.getLowLevelClient()).thenReturn(_restClient); + ObjectNode jsonNode = JsonNodeFactory.instance.objectNode(); + ObjectNode indicesNode = JsonNodeFactory.instance.objectNode(); + ObjectNode indexNode = JsonNodeFactory.instance.objectNode(); + ObjectNode primariesNode = JsonNodeFactory.instance.objectNode(); + ObjectNode storeNode = JsonNodeFactory.instance.objectNode(); + NumericNode bytesNode = JsonNodeFactory.instance.numberNode(8078398031L); + storeNode.set("size_in_bytes", bytesNode); + primariesNode.set("store", storeNode); + indexNode.set("primaries", primariesNode); + indicesNode.set("someIndexName", indexNode); + jsonNode.set("indices", indicesNode); + + Response response = mock(Response.class); + HttpEntity responseEntity = mock(HttpEntity.class); + when(response.getEntity()).thenReturn(responseEntity); + when(responseEntity.getContent()) + .thenReturn(IOUtils.toInputStream(jsonNode.toString(), StandardCharsets.UTF_8)); + when(_restClient.performRequest(any(Request.class))).thenReturn(response); + + List results = _timeseriesAspectService.getIndexSizes(); + + Assert.assertEquals(results.get(0).getSizeInMb(), 8078.398031); + } +} diff --git a/metadata-models/src/main/pegasus/com/linkedin/timeseries/TimeseriesIndexSizeResult.pdl b/metadata-models/src/main/pegasus/com/linkedin/timeseries/TimeseriesIndexSizeResult.pdl index b888ef7c0716b..35297314187bf 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/timeseries/TimeseriesIndexSizeResult.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/timeseries/TimeseriesIndexSizeResult.pdl @@ -22,5 +22,8 @@ record TimeseriesIndexSizeResult{ /** * Size */ + @deprecated = "use sizeInMb instead" sizeMb: float = 0 + + sizeInMb: double = 0 } diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index 339ce62de6298..eae0eed2dd50b 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -3668,6 +3668,11 @@ "name" : "sizeMb", "type" : "float", "doc" : "Size", + "default" : 0.0, + "deprecated" : "use sizeInMb instead" + }, { + "name" : "sizeInMb", + "type" : "double", "default" : 0.0 } ] }, { From 824df5a6a3e9fed2f18f3e454c40b8d822011b5c Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Fri, 15 Dec 2023 13:28:33 -0600 Subject: [PATCH 2/6] feat(build): gradle 8, jdk17, neo4j 5 (#9458) --- .github/workflows/airflow-plugin.yml | 5 + .github/workflows/build-and-test.yml | 4 +- .github/workflows/check-datahub-jars.yml | 4 +- .github/workflows/docker-unified.yml | 39 +- .github/workflows/documentation.yml | 4 +- .github/workflows/metadata-ingestion.yml | 5 + .github/workflows/metadata-io.yml | 4 +- .github/workflows/metadata-model.yml | 5 + .github/workflows/publish-datahub-jars.yml | 4 +- .github/workflows/spark-smoke-test.yml | 4 +- build.gradle | 137 +- buildSrc/build.gradle | 13 +- .../pegasus/gradle/PegasusPlugin.java | 2444 +++++++++++++++++ .../gradle/tasks/ChangedFileReportTask.java | 124 + datahub-frontend/build.gradle | 22 +- datahub-frontend/play.gradle | 19 +- datahub-graphql-core/build.gradle | 3 +- datahub-web-react/build.gradle | 10 +- docker/datahub-frontend/Dockerfile | 7 +- docker/datahub-frontend/start.sh | 2 + docker/datahub-gms/Dockerfile | 4 +- docker/datahub-ingestion/build.gradle | 6 +- docker/datahub-mae-consumer/Dockerfile | 4 +- docker/datahub-mce-consumer/Dockerfile | 4 +- docker/datahub-upgrade/Dockerfile | 4 +- docker/kafka-setup/Dockerfile | 2 +- docs-website/build.gradle | 18 +- docs-website/vercel-setup.sh | 2 +- docs/developers.md | 10 +- docs/how/updating-datahub.md | 4 + docs/troubleshooting/build.md | 4 +- entity-registry/build.gradle | 7 +- gradle/wrapper/gradle-wrapper.properties | 2 +- li-utils/build.gradle | 20 +- metadata-auth/auth-api/build.gradle | 9 +- metadata-events/mxe-utils-avro/build.gradle | 5 +- .../java/datahub-client/build.gradle | 16 +- .../datahub-protobuf-example/build.gradle | 4 - .../java/datahub-protobuf/build.gradle | 8 +- .../java/examples/build.gradle | 16 +- .../java/spark-lineage/build.gradle | 68 +- .../java/spark-lineage/scripts/check_jar.sh | 4 +- .../docker/SparkBase.Dockerfile | 2 +- .../python_test_run.sh | 13 +- .../spark-smoke-test/spark-docker.conf | 4 + .../test-spark-lineage/build.gradle | 11 - .../datahub/spark/TestCoalesceJobLineage.java | 5 +- .../datahub/spark/TestSparkJobsLineage.java | 3 + metadata-io/build.gradle | 5 +- .../graph/neo4j/Neo4jGraphService.java | 4 +- metadata-jobs/mae-consumer/build.gradle | 1 + metadata-jobs/mce-consumer/build.gradle | 3 +- metadata-jobs/pe-consumer/build.gradle | 3 +- metadata-models-custom/build.gradle | 2 +- metadata-models-validator/build.gradle | 4 +- metadata-models/build.gradle | 20 +- metadata-service/auth-config/build.gradle | 4 +- metadata-service/auth-filter/build.gradle | 4 +- metadata-service/auth-impl/build.gradle | 4 +- ...formInstanceFieldResolverProviderTest.java | 4 +- .../auth-servlet-impl/build.gradle | 4 +- metadata-service/factories/build.gradle | 4 +- .../graphql-servlet-impl/build.gradle | 4 +- metadata-service/openapi-servlet/build.gradle | 4 +- metadata-service/plugin/build.gradle | 6 +- .../src/test/sample-test-plugins/build.gradle | 4 +- metadata-service/restli-api/build.gradle | 6 +- metadata-service/restli-client/build.gradle | 6 +- .../restli-servlet-impl/build.gradle | 6 +- .../schema-registry-api/build.gradle | 7 +- .../schema-registry-servlet/build.gradle | 4 +- metadata-service/services/build.gradle | 6 +- metadata-service/servlet/build.gradle | 4 +- metadata-utils/build.gradle | 4 +- mock-entity-registry/build.gradle | 4 +- smoke-test/build.gradle | 7 +- test-models/build.gradle | 16 +- vercel.json | 2 +- 78 files changed, 3008 insertions(+), 266 deletions(-) create mode 100644 buildSrc/src/main/java/com/linkedin/pegasus/gradle/PegasusPlugin.java create mode 100644 buildSrc/src/main/java/com/linkedin/pegasus/gradle/tasks/ChangedFileReportTask.java diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index d0c0f52781b9a..cd1e159b7d53c 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -49,6 +49,11 @@ jobs: extra_pip_extras: plugin-v2 fail-fast: false steps: + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 10c137a206531..dab64cf2dca5e 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -37,11 +37,11 @@ jobs: with: timezoneLinux: ${{ matrix.timezone }} - uses: hsheth2/sane-checkout-action@v1 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: 11 + java-version: 17 - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/.github/workflows/check-datahub-jars.yml b/.github/workflows/check-datahub-jars.yml index 8e507ea40fd96..46d97ffec8861 100644 --- a/.github/workflows/check-datahub-jars.yml +++ b/.github/workflows/check-datahub-jars.yml @@ -28,11 +28,11 @@ jobs: runs-on: ubuntu-latest steps: - uses: hsheth2/sane-checkout-action@v1 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: 11 + java-version: 17 - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index fef23f9efa85f..169a86000adcc 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -79,6 +79,11 @@ jobs: runs-on: ubuntu-latest needs: setup steps: + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - name: Pre-build artifacts for docker image @@ -135,6 +140,11 @@ jobs: runs-on: ubuntu-latest needs: setup steps: + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - name: Pre-build artifacts for docker image @@ -191,6 +201,11 @@ jobs: runs-on: ubuntu-latest needs: setup steps: + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - name: Pre-build artifacts for docker image @@ -247,6 +262,11 @@ jobs: runs-on: ubuntu-latest needs: setup steps: + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - name: Pre-build artifacts for docker image @@ -303,6 +323,11 @@ jobs: runs-on: ubuntu-latest needs: setup steps: + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - name: Pre-build artifacts for docker image @@ -537,6 +562,11 @@ jobs: needs_artifact_download: ${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.publish != 'true' }} needs: [setup, datahub_ingestion_base_slim_build] steps: + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - uses: dorny/paths-filter@v2 @@ -618,6 +648,11 @@ jobs: needs_artifact_download: ${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.publish != 'true' }} needs: [setup, datahub_ingestion_base_full_build] steps: + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - uses: dorny/paths-filter@v2 @@ -720,11 +755,11 @@ jobs: run: df -h . && docker images - name: Check out the repo uses: actions/checkout@v3 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: 11 + java-version: 17 - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index c94282938120e..29953b8b70d91 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -27,11 +27,11 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: 11 + java-version: 17 - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index ec6bd4141cc6f..4e04fef3b3980 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -44,6 +44,11 @@ jobs: - python-version: "3.10" fail-fast: false steps: + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 48f230ce14c8d..2188fcb07c77a 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -29,11 +29,11 @@ jobs: timeout-minutes: 60 steps: - uses: actions/checkout@v3 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: 11 + java-version: 17 - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/.github/workflows/metadata-model.yml b/.github/workflows/metadata-model.yml index eb098a327e4cb..d0112f1b14e7a 100644 --- a/.github/workflows/metadata-model.yml +++ b/.github/workflows/metadata-model.yml @@ -29,6 +29,11 @@ jobs: runs-on: ubuntu-latest needs: setup steps: + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: diff --git a/.github/workflows/publish-datahub-jars.yml b/.github/workflows/publish-datahub-jars.yml index ec7985ef3b3d0..24d1c5436b315 100644 --- a/.github/workflows/publish-datahub-jars.yml +++ b/.github/workflows/publish-datahub-jars.yml @@ -49,11 +49,11 @@ jobs: if: ${{ needs.check-secret.outputs.publish-enabled == 'true' }} steps: - uses: hsheth2/sane-checkout-action@v1 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: 11 + java-version: 17 - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml index 70b66d6452b26..60e183cce5179 100644 --- a/.github/workflows/spark-smoke-test.yml +++ b/.github/workflows/spark-smoke-test.yml @@ -30,11 +30,11 @@ jobs: runs-on: ubuntu-latest steps: - uses: hsheth2/sane-checkout-action@v1 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: "zulu" - java-version: 11 + java-version: 17 - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/build.gradle b/build.gradle index b16e3ca169c71..a7a85db0398e2 100644 --- a/build.gradle +++ b/build.gradle @@ -1,17 +1,20 @@ buildscript { + ext.jdkVersion = 17 + ext.javaClassVersion = 11 + ext.junitJupiterVersion = '5.6.1' // Releases: https://github.com/linkedin/rest.li/blob/master/CHANGELOG.md - ext.pegasusVersion = '29.46.8' + ext.pegasusVersion = '29.48.4' ext.mavenVersion = '3.6.3' ext.springVersion = '5.3.29' ext.springBootVersion = '2.7.14' ext.openTelemetryVersion = '1.18.0' - ext.neo4jVersion = '4.4.9' - ext.neo4jTestVersion = '4.4.25' - ext.neo4jApocVersion = '4.4.0.20:all' + ext.neo4jVersion = '5.14.0' + ext.neo4jTestVersion = '5.14.0' + ext.neo4jApocVersion = '5.14.0' ext.testContainersVersion = '1.17.4' ext.elasticsearchVersion = '2.9.0' // ES 7.10, Opensearch 1.x, 2.x - ext.jacksonVersion = '2.15.2' + ext.jacksonVersion = '2.15.3' ext.jettyVersion = '9.4.46.v20220331' ext.playVersion = '2.8.18' ext.log4jVersion = '2.19.0' @@ -29,19 +32,19 @@ buildscript { buildscript.repositories.addAll(project.repositories) dependencies { classpath 'com.linkedin.pegasus:gradle-plugins:' + pegasusVersion - classpath 'com.github.node-gradle:gradle-node-plugin:2.2.4' + classpath 'com.github.node-gradle:gradle-node-plugin:7.0.1' classpath 'io.acryl.gradle.plugin:gradle-avro-plugin:0.2.0' classpath 'org.springframework.boot:spring-boot-gradle-plugin:' + springBootVersion classpath "io.codearte.gradle.nexus:gradle-nexus-staging-plugin:0.30.0" classpath "com.palantir.gradle.gitversion:gradle-git-version:3.0.0" classpath "org.gradle.playframework:gradle-playframework:0.14" - classpath "gradle.plugin.org.hidetake:gradle-swagger-generator-plugin:2.19.1" + classpath "gradle.plugin.org.hidetake:gradle-swagger-generator-plugin:2.19.2" } } plugins { - id 'com.gorylenko.gradle-git-properties' version '2.4.0-rc2' - id 'com.github.johnrengelman.shadow' version '6.1.0' + id 'com.gorylenko.gradle-git-properties' version '2.4.1' + id 'com.github.johnrengelman.shadow' version '8.1.1' apply false id 'com.palantir.docker' version '0.35.0' apply false id "com.diffplug.spotless" version "6.23.3" // https://blog.ltgt.net/javax-jakarta-mess-and-gradle-solution/ @@ -149,19 +152,20 @@ project.ext.externalDependency = [ 'log4jApi': "org.apache.logging.log4j:log4j-api:$log4jVersion", 'log4j12Api': "org.slf4j:log4j-over-slf4j:$slf4jVersion", 'log4j2Api': "org.apache.logging.log4j:log4j-to-slf4j:$log4jVersion", - 'lombok': 'org.projectlombok:lombok:1.18.16', + 'lombok': 'org.projectlombok:lombok:1.18.30', 'mariadbConnector': 'org.mariadb.jdbc:mariadb-java-client:2.6.0', 'mavenArtifact': "org.apache.maven:maven-artifact:$mavenVersion", 'mixpanel': 'com.mixpanel:mixpanel-java:1.4.4', - 'mockito': 'org.mockito:mockito-core:3.0.0', - 'mockitoInline': 'org.mockito:mockito-inline:3.0.0', + 'mockito': 'org.mockito:mockito-core:4.11.0', + 'mockitoInline': 'org.mockito:mockito-inline:4.11.0', 'mockServer': 'org.mock-server:mockserver-netty:5.11.2', 'mockServerClient': 'org.mock-server:mockserver-client-java:5.11.2', 'mysqlConnector': 'mysql:mysql-connector-java:8.0.20', 'neo4jHarness': 'org.neo4j.test:neo4j-harness:' + neo4jTestVersion, 'neo4jJavaDriver': 'org.neo4j.driver:neo4j-java-driver:' + neo4jVersion, 'neo4jTestJavaDriver': 'org.neo4j.driver:neo4j-java-driver:' + neo4jTestVersion, - 'neo4jApoc': 'org.neo4j.procedure:apoc:' + neo4jApocVersion, + 'neo4jApocCore': 'org.neo4j.procedure:apoc-core:' + neo4jApocVersion, + 'neo4jApocCommon': 'org.neo4j.procedure:apoc-common:' + neo4jApocVersion, 'opentelemetryApi': 'io.opentelemetry:opentelemetry-api:' + openTelemetryVersion, 'opentelemetryAnnotations': 'io.opentelemetry:opentelemetry-extension-annotations:' + openTelemetryVersion, 'opentracingJdbc':'io.opentracing.contrib:opentracing-jdbc:0.2.15', @@ -190,8 +194,8 @@ project.ext.externalDependency = [ 'servletApi': 'javax.servlet:javax.servlet-api:3.1.0', 'shiroCore': 'org.apache.shiro:shiro-core:1.11.0', 'snakeYaml': 'org.yaml:snakeyaml:2.0', - 'sparkSql' : 'org.apache.spark:spark-sql_2.11:2.4.8', - 'sparkHive' : 'org.apache.spark:spark-hive_2.11:2.4.8', + 'sparkSql' : 'org.apache.spark:spark-sql_2.12:3.0.3', + 'sparkHive' : 'org.apache.spark:spark-hive_2.12:3.0.3', 'springBeans': "org.springframework:spring-beans:$springVersion", 'springContext': "org.springframework:spring-context:$springVersion", 'springCore': "org.springframework:spring-core:$springVersion", @@ -210,7 +214,6 @@ project.ext.externalDependency = [ 'springActuator': "org.springframework.boot:spring-boot-starter-actuator:$springBootVersion", 'swaggerAnnotations': 'io.swagger.core.v3:swagger-annotations:2.2.15', 'swaggerCli': 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.46', - 'testngJava8': 'org.testng:testng:7.5.1', 'testng': 'org.testng:testng:7.8.0', 'testContainers': 'org.testcontainers:testcontainers:' + testContainersVersion, 'testContainersJunit': 'org.testcontainers:junit-jupiter:' + testContainersVersion, @@ -226,13 +229,69 @@ project.ext.externalDependency = [ 'charle': 'com.charleskorn.kaml:kaml:0.53.0', 'common': 'commons-io:commons-io:2.7', 'jline':'jline:jline:1.4.1', - 'jetbrains':' org.jetbrains.kotlin:kotlin-stdlib:1.6.0' + 'jetbrains':' org.jetbrains.kotlin:kotlin-stdlib:1.6.0', + 'annotationApi': 'javax.annotation:javax.annotation-api:1.3.2' ] allprojects { apply plugin: 'idea' apply plugin: 'eclipse' // apply plugin: 'org.gradlex.java-ecosystem-capabilities' + + tasks.withType(Test).configureEach { + // https://docs.gradle.org/current/userguide/performance.html + maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1 + + if (project.configurations.getByName("testImplementation").getDependencies() + .any{ it.getName().contains("testng") }) { + useTestNG() + } + } + + if (project.plugins.hasPlugin('java') + || project.plugins.hasPlugin('java-library') + || project.plugins.hasPlugin('application') + || project.plugins.hasPlugin('pegasus')) { + + java { + toolchain { + languageVersion = JavaLanguageVersion.of(jdkVersion) + } + } + + compileJava { + options.release = javaClassVersion + } + tasks.withType(JavaCompile).configureEach { + javaCompiler = javaToolchains.compilerFor { + languageVersion = JavaLanguageVersion.of(jdkVersion) + } + } + + tasks.withType(JavaExec).configureEach { + javaLauncher = javaToolchains.launcherFor { + languageVersion = JavaLanguageVersion.of(jdkVersion) + } + } + + // not duplicated, need to set this outside and inside afterEvaluate + afterEvaluate { + compileJava { + options.release = javaClassVersion + } + tasks.withType(JavaCompile).configureEach { + javaCompiler = javaToolchains.compilerFor { + languageVersion = JavaLanguageVersion.of(jdkVersion) + } + } + + tasks.withType(JavaExec).configureEach { + javaLauncher = javaToolchains.launcherFor { + languageVersion = JavaLanguageVersion.of(jdkVersion) + } + } + } + } } configure(subprojects.findAll {! it.name.startsWith('spark-lineage')}) { @@ -264,8 +323,9 @@ subprojects { failOnNoGitDirectory = false } - plugins.withType(JavaPlugin) { + plugins.withType(JavaPlugin).configureEach { dependencies { + implementation externalDependency.annotationApi constraints { implementation("com.google.googlejavaformat:google-java-format:$googleJavaFormatVersion") implementation('io.netty:netty-all:4.1.100.Final') @@ -276,18 +336,30 @@ subprojects { implementation("com.fasterxml.jackson.core:jackson-dataformat-cbor:$jacksonVersion") } } + spotless { java { googleJavaFormat() target project.fileTree(project.projectDir) { - include '**/*.java' - exclude 'build/**/*.java' - exclude '**/generated/**/*.*' - exclude '**/mainGeneratedDataTemplate/**/*.*' - exclude '**/mainGeneratedRest/**/*.*' + include 'src/**/*.java' + exclude 'src/**/resources/' + exclude 'src/**/generated/' + exclude 'src/**/mainGeneratedDataTemplate/' + exclude 'src/**/mainGeneratedRest/' + exclude 'src/renamed/avro/' + exclude 'src/test/sample-test-plugins/' } } } + + if (project.plugins.hasPlugin('pegasus')) { + dependencies { + dataTemplateCompile spec.product.pegasus.data + dataTemplateCompile externalDependency.annotationApi // support > jdk8 + restClientCompile spec.product.pegasus.restliClient + } + } + afterEvaluate { def spotlessJavaTask = tasks.findByName('spotlessJava') def processTask = tasks.findByName('processResources') @@ -305,28 +377,11 @@ subprojects { } } - tasks.withType(JavaCompile).configureEach { - javaCompiler = javaToolchains.compilerFor { - languageVersion = JavaLanguageVersion.of(11) - } - } - tasks.withType(Test).configureEach { - javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(11) - } - // https://docs.gradle.org/current/userguide/performance.html - maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1 - - if (project.configurations.getByName("testImplementation").getDependencies() - .any{ it.getName().contains("testng") }) { - useTestNG() - } - } - afterEvaluate { if (project.plugins.hasPlugin('pegasus')) { dependencies { dataTemplateCompile spec.product.pegasus.data + dataTemplateCompile externalDependency.annotationApi // support > jdk8 restClientCompile spec.product.pegasus.restliClient } } diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index 1f9d30d520171..0c2d91e1f7ac1 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -1,9 +1,11 @@ -apply plugin: 'java' - buildscript { apply from: '../repositories.gradle' } +plugins { + id 'java' +} + dependencies { /** * Forked version of abandoned repository: https://github.com/fge/json-schema-avro @@ -21,6 +23,9 @@ dependencies { implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.13.5' implementation 'commons-io:commons-io:2.11.0' - compileOnly 'org.projectlombok:lombok:1.18.14' - annotationProcessor 'org.projectlombok:lombok:1.18.14' + compileOnly 'org.projectlombok:lombok:1.18.30' + annotationProcessor 'org.projectlombok:lombok:1.18.30' + + // pegasus dependency, overrides for tasks + implementation 'com.linkedin.pegasus:gradle-plugins:29.48.4' } \ No newline at end of file diff --git a/buildSrc/src/main/java/com/linkedin/pegasus/gradle/PegasusPlugin.java b/buildSrc/src/main/java/com/linkedin/pegasus/gradle/PegasusPlugin.java new file mode 100644 index 0000000000000..2460abcad6f9e --- /dev/null +++ b/buildSrc/src/main/java/com/linkedin/pegasus/gradle/PegasusPlugin.java @@ -0,0 +1,2444 @@ +/* + * Copyright (c) 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.linkedin.pegasus.gradle; + +import com.linkedin.pegasus.gradle.PegasusOptions.IdlOptions; +import com.linkedin.pegasus.gradle.internal.CompatibilityLogChecker; +import com.linkedin.pegasus.gradle.tasks.ChangedFileReportTask; +import com.linkedin.pegasus.gradle.tasks.CheckIdlTask; +import com.linkedin.pegasus.gradle.tasks.CheckPegasusSnapshotTask; +import com.linkedin.pegasus.gradle.tasks.CheckRestModelTask; +import com.linkedin.pegasus.gradle.tasks.CheckSnapshotTask; +import com.linkedin.pegasus.gradle.tasks.GenerateAvroSchemaTask; +import com.linkedin.pegasus.gradle.tasks.GenerateDataTemplateTask; +import com.linkedin.pegasus.gradle.tasks.GeneratePegasusSnapshotTask; +import com.linkedin.pegasus.gradle.tasks.GenerateRestClientTask; +import com.linkedin.pegasus.gradle.tasks.GenerateRestModelTask; +import com.linkedin.pegasus.gradle.tasks.PublishRestModelTask; +import com.linkedin.pegasus.gradle.tasks.TranslateSchemasTask; +import com.linkedin.pegasus.gradle.tasks.ValidateExtensionSchemaTask; +import com.linkedin.pegasus.gradle.tasks.ValidateSchemaAnnotationTask; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.Method; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.TreeSet; +import java.util.function.Function; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import org.gradle.api.Action; +import org.gradle.api.GradleException; +import org.gradle.api.Plugin; +import org.gradle.api.Project; +import org.gradle.api.Task; +import org.gradle.api.artifacts.Configuration; +import org.gradle.api.artifacts.ConfigurationContainer; +import org.gradle.api.file.FileCollection; +import org.gradle.api.plugins.JavaBasePlugin; +import org.gradle.api.plugins.JavaPlugin; +import org.gradle.api.plugins.JavaPluginConvention; +import org.gradle.api.plugins.JavaPluginExtension; +import org.gradle.api.publish.PublishingExtension; +import org.gradle.api.publish.ivy.IvyPublication; +import org.gradle.api.publish.ivy.plugins.IvyPublishPlugin; +import org.gradle.api.tasks.Copy; +import org.gradle.api.tasks.Delete; +import org.gradle.api.tasks.SourceSet; +import org.gradle.api.tasks.SourceSetContainer; +import org.gradle.api.tasks.Sync; +import org.gradle.api.tasks.TaskProvider; +import org.gradle.api.tasks.bundling.Jar; +import org.gradle.api.tasks.compile.JavaCompile; +import org.gradle.api.tasks.javadoc.Javadoc; +import org.gradle.language.base.plugins.LifecycleBasePlugin; +import org.gradle.language.jvm.tasks.ProcessResources; +import org.gradle.plugins.ide.eclipse.EclipsePlugin; +import org.gradle.plugins.ide.eclipse.model.EclipseModel; +import org.gradle.plugins.ide.idea.IdeaPlugin; +import org.gradle.plugins.ide.idea.model.IdeaModule; +import org.gradle.util.GradleVersion; + + +/** + * Pegasus code generation plugin. + * The supported project layout for this plugin is as follows: + * + *
+ *   --- api/
+ *   |   --- build.gradle
+ *   |   --- src/
+ *   |       --- <sourceSet>/
+ *   |       |   --- idl/
+ *   |       |   |   --- <published idl (.restspec.json) files>
+ *   |       |   --- java/
+ *   |       |   |   --- <packageName>/
+ *   |       |   |       --- <common java files>
+ *   |       |   --- pegasus/
+ *   |       |       --- <packageName>/
+ *   |       |           --- <data schema (.pdsc) files>
+ *   |       --- <sourceSet>GeneratedDataTemplate/
+ *   |       |   --- java/
+ *   |       |       --- <packageName>/
+ *   |       |           --- <data template source files generated from data schema (.pdsc) files>
+ *   |       --- <sourceSet>GeneratedAvroSchema/
+ *   |       |   --- avro/
+ *   |       |       --- <packageName>/
+ *   |       |           --- <avsc avro schema files (.avsc) generated from pegasus schema files>
+ *   |       --- <sourceSet>GeneratedRest/
+ *   |           --- java/
+ *   |               --- <packageName>/
+ *   |                   --- <rest client source (.java) files generated from published idl>
+ *   --- impl/
+ *   |   --- build.gradle
+ *   |   --- src/
+ *   |       --- <sourceSet>/
+ *   |       |   --- java/
+ *   |       |       --- <packageName>/
+ *   |       |           --- <resource class source (.java) files>
+ *   |       --- <sourceSet>GeneratedRest/
+ *   |           --- idl/
+ *   |               --- <generated idl (.restspec.json) files>
+ *   --- <other projects>/
+ * 
+ *
    + *
  • + * api: contains all the files which are commonly depended by the server and + * client implementation. The common files include the data schema (.pdsc) files, + * the idl (.restspec.json) files and potentially Java interface files used by both sides. + *
  • + *
  • + * impl: contains the resource class for server implementation. + *
  • + *
+ *

Performs the following functions:

+ * + *

Generate data model and data template jars for each source set.

+ * + *

Overview:

+ * + *

+ * In the api project, the plugin generates the data template source (.java) files from the + * data schema (.pdsc) files, and furthermore compiles the source files and packages them + * to jar files. Details of jar contents will be explained in following paragraphs. + * In general, data schema files should exist only in api projects. + *

+ * + *

+ * Configure the server and client implementation projects to depend on the + * api project's dataTemplate configuration to get access to the generated data templates + * from within these projects. This allows api classes to be built first so that implementation + * projects can consume them. We recommend this structure to avoid circular dependencies + * (directly or indirectly) among implementation projects. + *

+ * + *

Detail:

+ * + *

+ * Generates data template source (.java) files from data schema (.pdsc) files, + * compiles the data template source (.java) files into class (.class) files, + * creates a data model jar file and a data template jar file. + * The data model jar file contains the source data schema (.pdsc) files. + * The data template jar file contains both the source data schema (.pdsc) files + * and the generated data template class (.class) files. + *

+ * + *

+ * In the data template generation phase, the plugin creates a new target source set + * for the generated files. The new target source set's name is the input source set name's + * suffixed with "GeneratedDataTemplate", e.g. "mainGeneratedDataTemplate". + * The plugin invokes PegasusDataTemplateGenerator to generate data template source (.java) files + * for all data schema (.pdsc) files present in the input source set's pegasus + * directory, e.g. "src/main/pegasus". The generated data template source (.java) files + * will be in the new target source set's java source directory, e.g. + * "src/mainGeneratedDataTemplate/java". In addition to + * the data schema (.pdsc) files in the pegasus directory, the dataModel configuration + * specifies resolver path for the PegasusDataTemplateGenerator. The resolver path + * provides the data schemas and previously generated data template classes that + * may be referenced by the input source set's data schemas. In most cases, the dataModel + * configuration should contain data template jars. + *

+ * + *

+ * The next phase is the data template compilation phase, the plugin compiles the generated + * data template source (.java) files into class files. The dataTemplateCompile configuration + * specifies the pegasus jars needed to compile these classes. The compileClasspath of the + * target source set is a composite of the dataModel configuration which includes the data template + * classes that were previously generated and included in the dependent data template jars, + * and the dataTemplateCompile configuration. + * This configuration should specify a dependency on the Pegasus data jar. + *

+ * + *

+ * The following phase is creating the the data model jar and the data template jar. + * This plugin creates the data model jar that includes the contents of the + * input source set's pegasus directory, and sets the jar file's classification to + * "data-model". Hence, the resulting jar file's name should end with "-data-model.jar". + * It adds the data model jar as an artifact to the dataModel configuration. + * This jar file should only contain data schema (.pdsc) files. + *

+ * + *

+ * This plugin also create the data template jar that includes the contents of the input + * source set's pegasus directory and the java class output directory of the + * target source set. It sets the jar file's classification to "data-template". + * Hence, the resulting jar file's name should end with "-data-template.jar". + * It adds the data template jar file as an artifact to the dataTemplate configuration. + * This jar file contains both data schema (.pdsc) files and generated data template + * class (.class) files. + *

+ * + *

+ * This plugin will ensure that data template source files are generated before + * compiling the input source set and before the idea and eclipse tasks. It + * also adds the generated classes to the compileClasspath of the input source set. + *

+ * + *

+ * The configurations that apply to generating the data model and data template jars + * are as follow: + *

    + *
  • + * The dataTemplateCompile configuration specifies the classpath for compiling + * the generated data template source (.java) files. In most cases, + * it should be the Pegasus data jar. + * (The default compile configuration is not used for compiling data templates because + * it is not desirable to include non data template dependencies in the data template jar.) + * The configuration should not directly include data template jars. Data template jars + * should be included in the dataModel configuration. + *
  • + *
  • + * The dataModel configuration provides the value of the "generator.resolver.path" + * system property that is passed to PegasusDataTemplateGenerator. In most cases, + * this configuration should contain only data template jars. The data template jars + * contain both data schema (.pdsc) files and generated data template (.class) files. + * PegasusDataTemplateGenerator will not generate data template (.java) files for + * classes that can be found in the resolver path. This avoids redundant generation + * of the same classes, and inclusion of these classes in multiple jars. + * The dataModel configuration is also used to publish the data model jar which + * contains only data schema (.pdsc) files. + *
  • + *
  • + * The testDataModel configuration is similar to the dataModel configuration + * except it is used when generating data templates from test source sets. + * It extends from the dataModel configuration. It is also used to publish + * the data model jar from test source sets. + *
  • + *
  • + * The dataTemplate configuration is used to publish the data template + * jar which contains both data schema (.pdsc) files and the data template class + * (.class) files generated from these data schema (.pdsc) files. + *
  • + *
  • + * The testDataTemplate configuration is similar to the dataTemplate configuration + * except it is used when publishing the data template jar files generated from + * test source sets. + *
  • + *
+ *

+ * + *

Performs the following functions:

+ * + *

Generate avro schema jars for each source set.

+ * + *

Overview:

+ * + *

+ * In the api project, the task 'generateAvroSchema' generates the avro schema (.avsc) + * files from pegasus schema (.pdsc) files. In general, data schema files should exist + * only in api projects. + *

+ * + *

+ * Configure the server and client implementation projects to depend on the + * api project's avroSchema configuration to get access to the generated avro schemas + * from within these projects. + *

+ * + *

+ * This plugin also create the avro schema jar that includes the contents of the input + * source set's avro directory and the avsc schema files. + * The resulting jar file's name should end with "-avro-schema.jar". + *

+ * + *

Generate rest model and rest client jars for each source set.

+ * + *

Overview:

+ * + *

+ * In the api project, generates rest client source (.java) files from the idl, + * compiles the rest client source (.java) files to rest client class (.class) files + * and puts them in jar files. In general, the api project should be only place that + * contains the publishable idl files. If the published idl changes an existing idl + * in the api project, the plugin will emit message indicating this has occurred and + * suggest that the entire project be rebuilt if it is desirable for clients of the + * idl to pick up the newly published changes. + *

+ * + *

+ * In the impl project, generates the idl (.restspec.json) files from the input + * source set's resource class files, then compares them against the existing idl + * files in the api project for compatibility checking. If incompatible changes are + * found, the build fails (unless certain flag is specified, see below). If the + * generated idl passes compatibility checks (see compatibility check levels below), + * publishes the generated idl (.restspec.json) to the api project. + *

+ * + *

Detail:

+ * + *

rest client generation phase: in api project

+ * + *

+ * In this phase, the rest client source (.java) files are generated from the + * api project idl (.restspec.json) files using RestRequestBuilderGenerator. + * The generated rest client source files will be in the new target source set's + * java source directory, e.g. "src/mainGeneratedRest/java". + *

+ * + *

+ * RestRequestBuilderGenerator requires access to the data schemas referenced + * by the idl. The dataModel configuration specifies the resolver path needed + * by RestRequestBuilderGenerator to access the data schemas referenced by + * the idl that is not in the source set's pegasus directory. + * This plugin automatically includes the data schema (.pdsc) files in the + * source set's pegasus directory in the resolver path. + * In most cases, the dataModel configuration should contain data template jars. + * The data template jars contains both data schema (.pdsc) files and generated + * data template class (.class) files. By specifying data template jars instead + * of data model jars, redundant generation of data template classes is avoided + * as classes that can be found in the resolver path are not generated. + *

+ * + *

rest client compilation phase: in api project

+ * + *

+ * In this phase, the plugin compiles the generated rest client source (.java) + * files into class files. The restClientCompile configuration specifies the + * pegasus jars needed to compile these classes. The compile classpath is a + * composite of the dataModel configuration which includes the data template + * classes that were previously generated and included in the dependent data template + * jars, and the restClientCompile configuration. + * This configuration should specify a dependency on the Pegasus restli-client jar. + *

+ * + *

+ * The following stage is creating the the rest model jar and the rest client jar. + * This plugin creates the rest model jar that includes the + * generated idl (.restspec.json) files, and sets the jar file's classification to + * "rest-model". Hence, the resulting jar file's name should end with "-rest-model.jar". + * It adds the rest model jar as an artifact to the restModel configuration. + * This jar file should only contain idl (.restspec.json) files. + *

+ * + *

+ * This plugin also create the rest client jar that includes the generated + * idl (.restspec.json) files and the java class output directory of the + * target source set. It sets the jar file's classification to "rest-client". + * Hence, the resulting jar file's name should end with "-rest-client.jar". + * It adds the rest client jar file as an artifact to the restClient configuration. + * This jar file contains both idl (.restspec.json) files and generated rest client + * class (.class) files. + *

+ * + *

idl generation phase: in server implementation project

+ * + *

+ * Before entering this phase, the plugin will ensure that generating idl will + * occur after compiling the input source set. It will also ensure that IDEA + * and Eclipse tasks runs after rest client source (.java) files are generated. + *

+ * + *

+ * In this phase, the plugin creates a new target source set for the generated files. + * The new target source set's name is the input source set name's* suffixed with + * "GeneratedRest", e.g. "mainGeneratedRest". The plugin invokes + * RestLiResourceModelExporter to generate idl (.restspec.json) files for each + * IdlItem in the input source set's pegasus IdlOptions. The generated idl files + * will be in target source set's idl directory, e.g. "src/mainGeneratedRest/idl". + * For example, the following adds an IdlItem to the source set's pegasus IdlOptions. + * This line should appear in the impl project's build.gradle. If no IdlItem is added, + * this source set will be excluded from generating idl and checking idl compatibility, + * even there are existing idl files. + *

+ *   pegasus.main.idlOptions.addIdlItem(["com.linkedin.restli.examples.groups.server"])
+ * 
+ *

+ * + *

+ * After the idl generation phase, each included idl file is checked for compatibility against + * those in the api project. In case the current interface breaks compatibility, + * by default the build fails and reports all compatibility errors and warnings. Otherwise, + * the build tasks in the api project later will package the resource classes into jar files. + * User can change the compatibility requirement between the current and published idl by + * setting the "rest.model.compatibility" project property, i.e. + * "gradle -Prest.model.compatibility= ..." The following levels are supported: + *

    + *
  • ignore: idl compatibility check will occur but its result will be ignored. + * The result will be aggregated and printed at the end of the build.
  • + *
  • backwards: build fails if there are backwards incompatible changes in idl. + * Build continues if there are only compatible changes.
  • + *
  • equivalent (default): build fails if there is any functional changes (compatible or + * incompatible) in the current idl. Only docs and comments are allowed to be different.
  • + *
+ * The plugin needs to know where the api project is. It searches the api project in the + * following steps. If all searches fail, the build fails. + *
    + *
  1. + * Use the specified project from the impl project build.gradle file. The ext.apiProject + * property explicitly assigns the api project. E.g. + *
    + *       ext.apiProject = project(':groups:groups-server-api')
    + *     
    + * If multiple such statements exist, the last will be used. Wrong project path causes Gradle + * evaluation error. + *
  2. + *
  3. + * If no ext.apiProject property is defined, the plugin will try to guess the + * api project name with the following conventions. The search stops at the first successful match. + *
      + *
    1. + * If the impl project name ends with the following suffixes, substitute the suffix with "-api". + *
        + *
      1. -impl
      2. + *
      3. -service
      4. + *
      5. -server
      6. + *
      7. -server-impl
      8. + *
      + * This list can be overridden by inserting the following line to the project build.gradle: + *
      + *           ext.apiProjectSubstitutionSuffixes = ['-new-suffix-1', '-new-suffix-2']
      + *         
      + * Alternatively, this setting could be applied globally to all projects by putting it in + * the subprojects section of the root build.gradle. + *
    2. + *
    3. + * Append "-api" to the impl project name. + *
    4. + *
    + *
  4. + *
+ * The plugin invokes RestLiResourceModelCompatibilityChecker to check compatibility. + *

+ * + *

+ * The idl files in the api project are not generated by the plugin, but rather + * "published" from the impl project. The publishRestModel task is used to copy the + * idl files to the api project. This task is invoked automatically if the idls are + * verified to be "safe". "Safe" is determined by the "rest.model.compatibility" + * property. Because this task is skipped if the idls are functionally equivalent + * (not necessarily identical, e.g. differ in doc fields), if the default "equivalent" + * compatibility level is used, no file will be copied. If such automatic publishing + * is intended to be skip, set the "rest.model.skipPublish" property to true. + * Note that all the properties are per-project and can be overridden in each project's + * build.gradle file. + *

+ * + *

+ * Please always keep in mind that if idl publishing is happened, a subsequent whole-project + * rebuild is necessary to pick up the changes. Otherwise, the Hudson job will fail and + * the source code commit will fail. + *

+ * + *

+ * The configurations that apply to generating the rest model and rest client jars + * are as follow: + *

    + *
  • + * The restClientCompile configuration specifies the classpath for compiling + * the generated rest client source (.java) files. In most cases, + * it should be the Pegasus restli-client jar. + * (The default compile configuration is not used for compiling rest client because + * it is not desirable to include non rest client dependencies, such as + * the rest server implementation classes, in the data template jar.) + * The configuration should not directly include data template jars. Data template jars + * should be included in the dataModel configuration. + *
  • + *
  • + * The dataModel configuration provides the value of the "generator.resolver.path" + * system property that is passed to RestRequestBuilderGenerator. + * This configuration should contain only data template jars. The data template jars + * contain both data schema (.pdsc) files and generated data template (.class) files. + * The RestRequestBuilderGenerator will only generate rest client classes. + * The dataModel configuration is also included in the compile classpath for the + * generated rest client source files. The dataModel configuration does not + * include generated data template classes, then the Java compiler may not able to + * find the data template classes referenced by the generated rest client. + *
  • + *
  • + * The testDataModel configuration is similar to the dataModel configuration + * except it is used when generating rest client source files from + * test source sets. + *
  • + *
  • + * The restModel configuration is used to publish the rest model jar + * which contains generated idl (.restspec.json) files. + *
  • + *
  • + * The testRestModel configuration is similar to the restModel configuration + * except it is used to publish rest model jar files generated from + * test source sets. + *
  • + *
  • + * The restClient configuration is used to publish the rest client jar + * which contains both generated idl (.restspec.json) files and + * the rest client class (.class) files generated from from these + * idl (.restspec.json) files. + *
  • + *
  • + * The testRestClient configuration is similar to the restClient configuration + * except it is used to publish rest client jar files generated from + * test source sets. + *
  • + *
+ *

+ * + *

+ * This plugin considers test source sets whose names begin with 'test' or 'integTest' to be + * test source sets. + *

+ */ +public class PegasusPlugin implements Plugin +{ + public static boolean debug = false; + + private static final GradleVersion MIN_REQUIRED_VERSION = GradleVersion.version("1.0"); // Next: 5.2.1 + private static final GradleVersion MIN_SUGGESTED_VERSION = GradleVersion.version("5.2.1"); // Next: 5.3 + + // + // Constants for generating sourceSet names and corresponding directory names + // for generated code + // + private static final String DATA_TEMPLATE_GEN_TYPE = "DataTemplate"; + private static final String REST_GEN_TYPE = "Rest"; + private static final String AVRO_SCHEMA_GEN_TYPE = "AvroSchema"; + + public static final String DATA_TEMPLATE_FILE_SUFFIX = ".pdsc"; + public static final String PDL_FILE_SUFFIX = ".pdl"; + // gradle property to opt OUT schema annotation validation, by default this feature is enabled. + private static final String DISABLE_SCHEMA_ANNOTATION_VALIDATION = "schema.annotation.validation.disable"; + // gradle property to opt in for destroying stale files from the build directory, + // by default it is disabled, because it triggers hot-reload (even if it results in a no-op) + private static final String DESTROY_STALE_FILES_ENABLE = "enableDestroyStaleFiles"; + public static final Collection DATA_TEMPLATE_FILE_SUFFIXES = new ArrayList<>(); + + public static final String IDL_FILE_SUFFIX = ".restspec.json"; + public static final String SNAPSHOT_FILE_SUFFIX = ".snapshot.json"; + public static final String SNAPSHOT_COMPAT_REQUIREMENT = "rest.model.compatibility"; + public static final String IDL_COMPAT_REQUIREMENT = "rest.idl.compatibility"; + // Pegasus schema compatibility level configuration, which is used to define the {@link CompatibilityLevel}. + public static final String PEGASUS_SCHEMA_SNAPSHOT_REQUIREMENT = "pegasusPlugin.pegasusSchema.compatibility"; + // Pegasus extension schema compatibility level configuration, which is used to define the {@link CompatibilityLevel} + public static final String PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_REQUIREMENT = "pegasusPlugin.extensionSchema.compatibility"; + // CompatibilityOptions Mode configuration, which is used to define the {@link CompatibilityOptions#Mode} in the compatibility checker. + private static final String PEGASUS_COMPATIBILITY_MODE = "pegasusPlugin.pegasusSchemaCompatibilityCheckMode"; + + private static final Pattern TEST_DIR_REGEX = Pattern.compile("^(integ)?[Tt]est"); + private static final String SNAPSHOT_NO_PUBLISH = "rest.model.noPublish"; + private static final String SNAPSHOT_FORCE_PUBLISH = "rest.model.forcePublish"; + private static final String PROCESS_EMPTY_IDL_DIR = "rest.idl.processEmptyIdlDir"; + private static final String IDL_NO_PUBLISH = "rest.idl.noPublish"; + private static final String IDL_FORCE_PUBLISH = "rest.idl.forcePublish"; + private static final String SKIP_IDL_CHECK = "rest.idl.skipCheck"; + // gradle property to skip running GenerateRestModel task. + // Note it affects GenerateRestModel task only, and does not skip tasks depends on GenerateRestModel. + private static final String SKIP_GENERATE_REST_MODEL= "rest.model.skipGenerateRestModel"; + private static final String SUPPRESS_REST_CLIENT_RESTLI_2 = "rest.client.restli2.suppress"; + private static final String SUPPRESS_REST_CLIENT_RESTLI_1 = "rest.client.restli1.suppress"; + + private static final String GENERATOR_CLASSLOADER_NAME = "pegasusGeneratorClassLoader"; + + private static final String CONVERT_TO_PDL_REVERSE = "convertToPdl.reverse"; + private static final String CONVERT_TO_PDL_KEEP_ORIGINAL = "convertToPdl.keepOriginal"; + private static final String CONVERT_TO_PDL_SKIP_VERIFICATION = "convertToPdl.skipVerification"; + private static final String CONVERT_TO_PDL_PRESERVE_SOURCE_CMD = "convertToPdl.preserveSourceCmd"; + + // Below variables are used to collect data across all pegasus projects (sub-projects) and then print information + // to the user at the end after build is finished. + private static StringBuffer _restModelCompatMessage = new StringBuffer(); + private static final Collection _needCheckinFiles = new ArrayList<>(); + private static final Collection _needBuildFolders = new ArrayList<>(); + private static final Collection _possibleMissingFilesInEarlierCommit = new ArrayList<>(); + + private static final String RUN_ONCE = "runOnce"; + private static final Object STATIC_PROJECT_EVALUATED_LOCK = new Object(); + + private static final List UNUSED_CONFIGURATIONS = Arrays.asList( + "dataTemplateGenerator", "restTools", "avroSchemaGenerator"); + // Directory in the dataTemplate jar that holds schemas translated from PDL to PDSC. + private static final String TRANSLATED_SCHEMAS_DIR = "legacyPegasusSchemas"; + // Enable the use of argFiles for the tasks that support them + private static final String ENABLE_ARG_FILE = "pegasusPlugin.enableArgFile"; + // Enable the generation of fluent APIs + private static final String ENABLE_FLUENT_API = "pegasusPlugin.enableFluentApi"; + + // This config impacts GenerateDataTemplateTask and GenerateRestClientTask; + // If not set, by default all paths generated in these two tasks will be lower-case. + // This default behavior is needed because Linux, MacOS, Windows treat case sensitive paths differently, + // and we want to be consistent, so we choose lower-case as default case for path generated + private static final String CODE_GEN_PATH_CASE_SENSITIVE = "pegasusPlugin.generateCaseSensitivePath"; + + private static final String PEGASUS_PLUGIN_CONFIGURATION = "pegasusPlugin"; + + // Enable the use of generic pegasus schema compatibility checker + private static final String ENABLE_PEGASUS_SCHEMA_COMPATIBILITY_CHECK = "pegasusPlugin.enablePegasusSchemaCompatibilityCheck"; + + private static final String PEGASUS_SCHEMA_SNAPSHOT = "PegasusSchemaSnapshot"; + + private static final String PEGASUS_EXTENSION_SCHEMA_SNAPSHOT = "PegasusExtensionSchemaSnapshot"; + + private static final String PEGASUS_SCHEMA_SNAPSHOT_DIR = "pegasusSchemaSnapshot"; + + private static final String PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_DIR = "pegasusExtensionSchemaSnapshot"; + + private static final String PEGASUS_SCHEMA_SNAPSHOT_DIR_OVERRIDE = "overridePegasusSchemaSnapshotDir"; + + private static final String PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_DIR_OVERRIDE = "overridePegasusExtensionSchemaSnapshotDir"; + + private static final String SRC = "src"; + + private static final String SCHEMA_ANNOTATION_HANDLER_CONFIGURATION = "schemaAnnotationHandler"; + + private static final String COMPATIBILITY_OPTIONS_MODE_EXTENSION = "EXTENSION"; + + + @SuppressWarnings("unchecked") + private Class> _thisPluginType = (Class>) + getClass().asSubclass(Plugin.class); + + private Task _generateSourcesJarTask; + private Javadoc _generateJavadocTask; + private Task _generateJavadocJarTask; + private boolean _configureIvyPublications = true; + + public void setPluginType(Class> pluginType) + { + _thisPluginType = pluginType; + } + + public void setSourcesJarTask(Task sourcesJarTask) + { + _generateSourcesJarTask = sourcesJarTask; + } + + public void setJavadocJarTask(Task javadocJarTask) + { + _generateJavadocJarTask = javadocJarTask; + } + + public void setConfigureIvyPublications(boolean configureIvyPublications) { + _configureIvyPublications = configureIvyPublications; + } + + @Override + public void apply(Project project) + { + checkGradleVersion(project); + + project.getPlugins().apply(JavaPlugin.class); + + // this HashMap will have a PegasusOptions per sourceSet + project.getExtensions().getExtraProperties().set("pegasus", new HashMap<>()); + // this map will extract PegasusOptions.GenerationMode to project property + project.getExtensions().getExtraProperties().set("PegasusGenerationMode", + Arrays.stream(PegasusOptions.GenerationMode.values()) + .collect(Collectors.toMap(PegasusOptions.GenerationMode::name, Function.identity()))); + + synchronized (STATIC_PROJECT_EVALUATED_LOCK) + { + // Check if this is the first time the block will run. Pegasus plugin can run multiple times in a build if + // multiple sub-projects applied the plugin. + if (!project.getRootProject().hasProperty(RUN_ONCE) + || !Boolean.parseBoolean(String.valueOf(project.getRootProject().property(RUN_ONCE)))) + { + project.getGradle().projectsEvaluated(gradle -> + gradle.getRootProject().subprojects(subproject -> + UNUSED_CONFIGURATIONS.forEach(configurationName -> { + Configuration conf = subproject.getConfigurations().findByName(configurationName); + if (conf != null && !conf.getDependencies().isEmpty()) { + subproject.getLogger().warn("*** Project {} declares dependency to unused configuration \"{}\". " + + "This configuration is deprecated and you can safely remove the dependency. ***", + subproject.getPath(), configurationName); + } + }) + ) + ); + + // Re-initialize the static variables as they might have stale values from previous run. With Gradle 3.0 and + // gradle daemon enabled, the plugin class might not be loaded for every run. + DATA_TEMPLATE_FILE_SUFFIXES.clear(); + DATA_TEMPLATE_FILE_SUFFIXES.add(DATA_TEMPLATE_FILE_SUFFIX); + DATA_TEMPLATE_FILE_SUFFIXES.add(PDL_FILE_SUFFIX); + + _restModelCompatMessage = new StringBuffer(); + _needCheckinFiles.clear(); + _needBuildFolders.clear(); + _possibleMissingFilesInEarlierCommit.clear(); + + project.getGradle().buildFinished(result -> + { + StringBuilder endOfBuildMessage = new StringBuilder(); + if (_restModelCompatMessage.length() > 0) + { + endOfBuildMessage.append(_restModelCompatMessage); + } + + if (!_needCheckinFiles.isEmpty()) + { + endOfBuildMessage.append(createModifiedFilesMessage(_needCheckinFiles, _needBuildFolders)); + } + + if (!_possibleMissingFilesInEarlierCommit.isEmpty()) + { + endOfBuildMessage.append(createPossibleMissingFilesMessage(_possibleMissingFilesInEarlierCommit)); + } + + if (endOfBuildMessage.length() > 0) + { + result.getGradle().getRootProject().getLogger().quiet(endOfBuildMessage.toString()); + } + }); + + // Set an extra property on the root project to indicate the initialization is complete for the current build. + project.getRootProject().getExtensions().getExtraProperties().set(RUN_ONCE, true); + } + } + + ConfigurationContainer configurations = project.getConfigurations(); + + // configuration for getting the required classes to make pegasus call main methods + configurations.maybeCreate(PEGASUS_PLUGIN_CONFIGURATION); + + // configuration for compiling generated data templates + Configuration dataTemplateCompile = configurations.maybeCreate("dataTemplateCompile"); + dataTemplateCompile.setVisible(false); + + // configuration for running rest client generator + Configuration restClientCompile = configurations.maybeCreate("restClientCompile"); + restClientCompile.setVisible(false); + + // configuration for running data template generator + // DEPRECATED! This configuration is no longer used. Please stop using it. + Configuration dataTemplateGenerator = configurations.maybeCreate("dataTemplateGenerator"); + dataTemplateGenerator.setVisible(false); + + // configuration for running rest client generator + // DEPRECATED! This configuration is no longer used. Please stop using it. + Configuration restTools = configurations.maybeCreate("restTools"); + restTools.setVisible(false); + + // configuration for running Avro schema generator + // DEPRECATED! To skip avro schema generation, use PegasusOptions.generationModes + Configuration avroSchemaGenerator = configurations.maybeCreate("avroSchemaGenerator"); + avroSchemaGenerator.setVisible(false); + + // configuration for depending on data schemas and potentially generated data templates + // and for publishing jars containing data schemas to the project artifacts for including in the ivy.xml + Configuration dataModel = configurations.maybeCreate("dataModel"); + Configuration testDataModel = configurations.maybeCreate("testDataModel"); + testDataModel.extendsFrom(dataModel); + + // configuration for depending on data schemas and potentially generated data templates + // and for publishing jars containing data schemas to the project artifacts for including in the ivy.xml + Configuration avroSchema = configurations.maybeCreate("avroSchema"); + Configuration testAvroSchema = configurations.maybeCreate("testAvroSchema"); + testAvroSchema.extendsFrom(avroSchema); + + // configuration for depending on rest idl and potentially generated client builders + // and for publishing jars containing rest idl to the project artifacts for including in the ivy.xml + Configuration restModel = configurations.maybeCreate("restModel"); + Configuration testRestModel = configurations.maybeCreate("testRestModel"); + testRestModel.extendsFrom(restModel); + + // configuration for publishing jars containing data schemas and generated data templates + // to the project artifacts for including in the ivy.xml + // + // published data template jars depends on the configurations used to compile the classes + // in the jar, this includes the data models/templates used by the data template generator + // and the classes used to compile the generated classes. + Configuration dataTemplate = configurations.maybeCreate("dataTemplate"); + dataTemplate.extendsFrom(dataTemplateCompile, dataModel); + Configuration testDataTemplate = configurations.maybeCreate("testDataTemplate"); + testDataTemplate.extendsFrom(dataTemplate, testDataModel); + + // configuration for processing and validating schema annotation during build time. + // + // The configuration contains dependencies to schema annotation handlers which would process schema annotations + // and validate. + Configuration schemaAnnotationHandler = configurations.maybeCreate(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION); + + // configuration for publishing jars containing rest idl and generated client builders + // to the project artifacts for including in the ivy.xml + // + // published client builder jars depends on the configurations used to compile the classes + // in the jar, this includes the data models/templates (potentially generated by this + // project and) used by the data template generator and the classes used to compile + // the generated classes. + Configuration restClient = configurations.maybeCreate("restClient"); + restClient.extendsFrom(restClientCompile, dataTemplate); + Configuration testRestClient = configurations.maybeCreate("testRestClient"); + testRestClient.extendsFrom(restClient, testDataTemplate); + + Properties properties = new Properties(); + InputStream inputStream = getClass().getResourceAsStream("/pegasus-version.properties"); + if (inputStream != null) + { + try + { + properties.load(inputStream); + } + catch (IOException e) + { + throw new GradleException("Unable to read pegasus-version.properties file.", e); + } + + String version = properties.getProperty("pegasus.version"); + + project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "com.linkedin.pegasus:data:" + version); + project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "com.linkedin.pegasus:data-avro-generator:" + version); + project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "com.linkedin.pegasus:generator:" + version); + project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "com.linkedin.pegasus:restli-tools:" + version); + } + else + { + project.getLogger().lifecycle("Unable to add pegasus dependencies to {}. Please be sure that " + + "'com.linkedin.pegasus:data', 'com.linkedin.pegasus:data-avro-generator', 'com.linkedin.pegasus:generator', 'com.linkedin.pegasus:restli-tools'" + + " are available on the configuration pegasusPlugin", + project.getPath()); + } + project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "org.slf4j:slf4j-simple:1.7.2"); + project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, project.files(System.getProperty("java.home") + "/../lib/tools.jar")); + + // this call has to be here because: + // 1) artifact cannot be published once projects has been evaluated, so we need to first + // create the tasks and artifact handler, then progressively append sources + // 2) in order to append sources progressively, the source and documentation tasks and artifacts must be + // configured/created before configuring and creating the code generation tasks. + + configureGeneratedSourcesAndJavadoc(project); + + ChangedFileReportTask changedFileReportTask = project.getTasks() + .create("changedFilesReport", ChangedFileReportTask.class); + + project.getTasks().getByName("check").dependsOn(changedFileReportTask); + + SourceSetContainer sourceSets = project.getConvention() + .getPlugin(JavaPluginConvention.class).getSourceSets(); + + sourceSets.all(sourceSet -> + { + if (sourceSet.getName().toLowerCase(Locale.US).contains("generated")) + { + return; + } + + checkAvroSchemaExist(project, sourceSet); + + // the idl Generator input options will be inside the PegasusOptions class. Users of the + // plugin can set the inputOptions in their build.gradle + @SuppressWarnings("unchecked") + Map pegasusOptions = (Map) project + .getExtensions().getExtraProperties().get("pegasus"); + + pegasusOptions.put(sourceSet.getName(), new PegasusOptions()); + + // rest model generation could fail on incompatibility + // if it can fail, fail it early + configureRestModelGeneration(project, sourceSet); + + // Do compatibility check for schemas under "pegasus" directory if the configuration property is provided. + if (isPropertyTrue(project, ENABLE_PEGASUS_SCHEMA_COMPATIBILITY_CHECK)) + { + configurePegasusSchemaSnapshotGeneration(project, sourceSet, false); + } + + configurePegasusSchemaSnapshotGeneration(project, sourceSet, true); + + configureConversionUtilities(project, sourceSet); + + GenerateDataTemplateTask generateDataTemplateTask = configureDataTemplateGeneration(project, sourceSet); + + configureAvroSchemaGeneration(project, sourceSet); + + configureRestClientGeneration(project, sourceSet); + + if (!isPropertyTrue(project, DISABLE_SCHEMA_ANNOTATION_VALIDATION)) + { + configureSchemaAnnotationValidation(project, sourceSet, generateDataTemplateTask); + } + + Task cleanGeneratedDirTask = project.task(sourceSet.getTaskName("clean", "GeneratedDir")); + cleanGeneratedDirTask.doLast(new CacheableAction<>(task -> + { + deleteGeneratedDir(project, sourceSet, REST_GEN_TYPE); + deleteGeneratedDir(project, sourceSet, AVRO_SCHEMA_GEN_TYPE); + deleteGeneratedDir(project, sourceSet, DATA_TEMPLATE_GEN_TYPE); + })); + + // make clean depends on deleting the generated directories + project.getTasks().getByName("clean").dependsOn(cleanGeneratedDirTask); + + // Set data schema directories as resource roots + configureDataSchemaResourcesRoot(project, sourceSet); + }); + + project.getExtensions().getExtraProperties().set(GENERATOR_CLASSLOADER_NAME, getClass().getClassLoader()); + } + + protected void configureSchemaAnnotationValidation(Project project, + SourceSet sourceSet, + GenerateDataTemplateTask generateDataTemplatesTask) + { + // Task would execute based on the following order. + // generateDataTemplatesTask -> validateSchemaAnnotationTask + + // Create ValidateSchemaAnnotation task + ValidateSchemaAnnotationTask validateSchemaAnnotationTask = project.getTasks() + .create(sourceSet.getTaskName("validate", "schemaAnnotation"), ValidateSchemaAnnotationTask.class, task -> + { + task.setInputDir(generateDataTemplatesTask.getInputDir()); + task.setResolverPath(getDataModelConfig(project, sourceSet)); // same resolver path as generateDataTemplatesTask + task.setClassPath(project.getConfigurations() .getByName(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION) + .plus(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)) + .plus(project.getConfigurations().getByName(JavaPlugin.RUNTIME_CLASSPATH_CONFIGURATION_NAME))); + task.setHandlerJarPath(project.getConfigurations() .getByName(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION)); + if (isPropertyTrue(project, ENABLE_ARG_FILE)) + { + task.setEnableArgFile(true); + } + } + ); + + // validateSchemaAnnotationTask depend on generateDataTemplatesTask + validateSchemaAnnotationTask.dependsOn(generateDataTemplatesTask); + + // Check depends on validateSchemaAnnotationTask. + project.getTasks().getByName("check").dependsOn(validateSchemaAnnotationTask); + } + + + + @SuppressWarnings("deprecation") + protected void configureGeneratedSourcesAndJavadoc(Project project) + { + _generateJavadocTask = project.getTasks().create("generateJavadoc", Javadoc.class); + + if (_generateSourcesJarTask == null) + { + // + // configuration for publishing jars containing sources for generated classes + // to the project artifacts for including in the ivy.xml + // + ConfigurationContainer configurations = project.getConfigurations(); + Configuration generatedSources = configurations.maybeCreate("generatedSources"); + Configuration testGeneratedSources = configurations.maybeCreate("testGeneratedSources"); + testGeneratedSources.extendsFrom(generatedSources); + + _generateSourcesJarTask = project.getTasks().create("generateSourcesJar", Jar.class, jarTask -> { + jarTask.setGroup(JavaBasePlugin.DOCUMENTATION_GROUP); + jarTask.setDescription("Generates a jar file containing the sources for the generated Java classes."); + // FIXME change to #getArchiveClassifier().set("sources"); breaks backwards-compatibility before 5.1 + // DataHub Note - applied FIXME + jarTask.getArchiveClassifier().set("sources"); + }); + + project.getArtifacts().add("generatedSources", _generateSourcesJarTask); + } + + if (_generateJavadocJarTask == null) + { + // + // configuration for publishing jars containing Javadoc for generated classes + // to the project artifacts for including in the ivy.xml + // + ConfigurationContainer configurations = project.getConfigurations(); + Configuration generatedJavadoc = configurations.maybeCreate("generatedJavadoc"); + Configuration testGeneratedJavadoc = configurations.maybeCreate("testGeneratedJavadoc"); + testGeneratedJavadoc.extendsFrom(generatedJavadoc); + + _generateJavadocJarTask = project.getTasks().create("generateJavadocJar", Jar.class, jarTask -> { + jarTask.dependsOn(_generateJavadocTask); + jarTask.setGroup(JavaBasePlugin.DOCUMENTATION_GROUP); + jarTask.setDescription("Generates a jar file containing the Javadoc for the generated Java classes."); + // FIXME change to #getArchiveClassifier().set("sources"); breaks backwards-compatibility before 5.1 + // DataHub Note - applied FIXME + jarTask.getArchiveClassifier().set("javadoc"); + jarTask.from(_generateJavadocTask.getDestinationDir()); + }); + + project.getArtifacts().add("generatedJavadoc", _generateJavadocJarTask); + } + else + { + // TODO: Tighten the types so that _generateJavadocJarTask must be of type Jar. + ((Jar) _generateJavadocJarTask).from(_generateJavadocTask.getDestinationDir()); + _generateJavadocJarTask.dependsOn(_generateJavadocTask); + } + } + + private static void deleteGeneratedDir(Project project, SourceSet sourceSet, String dirType) + { + String generatedDirPath = getGeneratedDirPath(project, sourceSet, dirType); + project.getLogger().info("Delete generated directory {}", generatedDirPath); + project.delete(generatedDirPath); + } + + private static > Class getCompatibilityLevelClass(Project project) + { + ClassLoader generatorClassLoader = (ClassLoader) project.property(GENERATOR_CLASSLOADER_NAME); + + String className = "com.linkedin.restli.tools.idlcheck.CompatibilityLevel"; + try + { + @SuppressWarnings("unchecked") + Class enumClass = (Class) generatorClassLoader.loadClass(className).asSubclass(Enum.class); + return enumClass; + } + catch (ClassNotFoundException e) + { + throw new RuntimeException("Could not load class " + className); + } + } + + private static void addGeneratedDir(Project project, SourceSet sourceSet, Collection configurations) + { + project.getPlugins().withType(IdeaPlugin.class, ideaPlugin -> { + IdeaModule ideaModule = ideaPlugin.getModel().getModule(); + // stupid if block needed because of stupid assignment required to update source dirs + if (isTestSourceSet(sourceSet)) + { + Set sourceDirs = ideaModule.getTestSourceDirs(); + sourceDirs.addAll(sourceSet.getJava().getSrcDirs()); + // this is stupid but assignment is required + ideaModule.setTestSourceDirs(sourceDirs); + if (debug) + { + System.out.println("Added " + sourceSet.getJava().getSrcDirs() + " to IdeaModule testSourceDirs " + + ideaModule.getTestSourceDirs()); + } + } + else + { + Set sourceDirs = ideaModule.getSourceDirs(); + sourceDirs.addAll(sourceSet.getJava().getSrcDirs()); + // this is stupid but assignment is required + ideaModule.setSourceDirs(sourceDirs); + if (debug) + { + System.out.println("Added " + sourceSet.getJava().getSrcDirs() + " to IdeaModule sourceDirs " + + ideaModule.getSourceDirs()); + } + } + Collection compilePlus = ideaModule.getScopes().get("COMPILE").get("plus"); + compilePlus.addAll(configurations); + ideaModule.getScopes().get("COMPILE").put("plus", compilePlus); + }); + } + + private static void checkAvroSchemaExist(Project project, SourceSet sourceSet) + { + String sourceDir = "src" + File.separatorChar + sourceSet.getName(); + File avroSourceDir = project.file(sourceDir + File.separatorChar + "avro"); + if (avroSourceDir.exists()) + { + project.getLogger().lifecycle("{}'s {} has non-empty avro directory. pegasus plugin does not process avro directory", + project.getName(), sourceDir); + } + } + + // Compute the name of the source set that will contain a type of an input generated code. + // e.g. genType may be 'DataTemplate' or 'Rest' + private static String getGeneratedSourceSetName(SourceSet sourceSet, String genType) + { + return sourceSet.getName() + "Generated" + genType; + } + + // Compute the directory name that will contain a type generated code of an input source set. + // e.g. genType may be 'DataTemplate' or 'Rest' + public static String getGeneratedDirPath(Project project, SourceSet sourceSet, String genType) + { + String override = getOverridePath(project, sourceSet, "overrideGeneratedDir"); + String sourceSetName = getGeneratedSourceSetName(sourceSet, genType); + String base = override == null ? "src" : override; + + return base + File.separatorChar + sourceSetName; + } + + public static String getDataSchemaPath(Project project, SourceSet sourceSet) + { + String override = getOverridePath(project, sourceSet, "overridePegasusDir"); + if (override == null) + { + return "src" + File.separatorChar + sourceSet.getName() + File.separatorChar + "pegasus"; + } + else + { + return override; + } + } + + private static String getExtensionSchemaPath(Project project, SourceSet sourceSet) + { + String override = getOverridePath(project, sourceSet, "overrideExtensionSchemaDir"); + if(override == null) + { + return "src" + File.separatorChar + sourceSet.getName() + File.separatorChar + "extensions"; + } + else + { + return override; + } + } + + private static String getSnapshotPath(Project project, SourceSet sourceSet) + { + String override = getOverridePath(project, sourceSet, "overrideSnapshotDir"); + if (override == null) + { + return "src" + File.separatorChar + sourceSet.getName() + File.separatorChar + "snapshot"; + } + else + { + return override; + } + } + + private static String getIdlPath(Project project, SourceSet sourceSet) + { + String override = getOverridePath(project, sourceSet, "overrideIdlDir"); + if (override == null) + { + return "src" + File.separatorChar + sourceSet.getName() + File.separatorChar + "idl"; + } + else + { + return override; + } + } + + private static String getPegasusSchemaSnapshotPath(Project project, SourceSet sourceSet) + { + String override = getOverridePath(project, sourceSet, PEGASUS_SCHEMA_SNAPSHOT_DIR_OVERRIDE); + if (override == null) + { + return SRC + File.separatorChar + sourceSet.getName() + File.separatorChar + PEGASUS_SCHEMA_SNAPSHOT_DIR; + } + else + { + return override; + } + } + + private static String getPegasusExtensionSchemaSnapshotPath(Project project, SourceSet sourceSet) + { + String override = getOverridePath(project, sourceSet, PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_DIR_OVERRIDE); + if (override == null) + { + return SRC + File.separatorChar + sourceSet.getName() + File.separatorChar + PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_DIR; + } + else + { + return override; + } + } + + private static String getOverridePath(Project project, SourceSet sourceSet, String overridePropertyName) + { + String sourceSetPropertyName = sourceSet.getName() + '.' + overridePropertyName; + String override = getNonEmptyProperty(project, sourceSetPropertyName); + + if (override == null && sourceSet.getName().equals("main")) + { + override = getNonEmptyProperty(project, overridePropertyName); + } + + return override; + } + + private static boolean isTestSourceSet(SourceSet sourceSet) + { + return TEST_DIR_REGEX.matcher(sourceSet.getName()).find(); + } + + private static Configuration getDataModelConfig(Project project, SourceSet sourceSet) + { + return isTestSourceSet(sourceSet) + ? project.getConfigurations().getByName("testDataModel") + : project.getConfigurations().getByName("dataModel"); + } + + private static boolean isTaskSuccessful(Task task) + { + return task.getState().getExecuted() + // Task is not successful if it is not upto date and is skipped. + && !(task.getState().getSkipped() && !task.getState().getUpToDate()) + && task.getState().getFailure() == null; + } + + private static boolean isResultEquivalent(File compatibilityLogFile) + { + return isResultEquivalent(compatibilityLogFile, false); + } + + private static boolean isResultEquivalent(File compatibilityLogFile, boolean restSpecOnly) + { + CompatibilityLogChecker logChecker = new CompatibilityLogChecker(); + try + { + logChecker.write(Files.readAllBytes(compatibilityLogFile.toPath())); + } + catch (IOException e) + { + throw new GradleException("Error while processing compatibility report: " + e.getMessage()); + } + return logChecker.getRestSpecCompatibility().isEmpty() && + (restSpecOnly || logChecker.getModelCompatibility().isEmpty()); + } + + protected void configureRestModelGeneration(Project project, SourceSet sourceSet) + { + if (sourceSet.getAllSource().isEmpty()) + { + project.getLogger().info("No source files found for sourceSet {}. Skipping idl generation.", sourceSet.getName()); + return; + } + + // afterEvaluate needed so that api project can be overridden via ext.apiProject + project.afterEvaluate(p -> + { + // find api project here instead of in each project's plugin configuration + // this allows api project relation options (ext.api*) to be specified anywhere in the build.gradle file + // alternatively, pass closures to task configuration, and evaluate the closures when task is executed + Project apiProject = getCheckedApiProject(project); + + // make sure the api project is evaluated. Important for configure-on-demand mode. + if (apiProject != null) + { + project.evaluationDependsOn(apiProject.getPath()); + + if (!apiProject.getPlugins().hasPlugin(_thisPluginType)) + { + apiProject = null; + } + } + + if (apiProject == null) + { + return; + } + + Task untypedJarTask = project.getTasks().findByName(sourceSet.getJarTaskName()); + if (!(untypedJarTask instanceof Jar)) + { + return; + } + Jar jarTask = (Jar) untypedJarTask; + + String snapshotCompatPropertyName = findProperty(FileCompatibilityType.SNAPSHOT); + if (project.hasProperty(snapshotCompatPropertyName) && "off".equalsIgnoreCase((String) project.property(snapshotCompatPropertyName))) + { + project.getLogger().lifecycle("Project {} snapshot compatibility level \"OFF\" is deprecated. Default to \"IGNORE\".", + project.getPath()); + } + + // generate the rest model + FileCollection restModelCodegenClasspath = project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION) + .plus(project.getConfigurations().getByName(JavaPlugin.RUNTIME_CLASSPATH_CONFIGURATION_NAME)) + .plus(sourceSet.getRuntimeClasspath()); + String destinationDirPrefix = getGeneratedDirPath(project, sourceSet, REST_GEN_TYPE) + File.separatorChar; + FileCollection restModelResolverPath = apiProject.files(getDataSchemaPath(project, sourceSet)) + .plus(getDataModelConfig(apiProject, sourceSet)); + Set watchedRestModelInputDirs = buildWatchedRestModelInputDirs(project, sourceSet); + Set restModelInputDirs = difference(sourceSet.getAllSource().getSrcDirs(), + sourceSet.getResources().getSrcDirs()); + + Task generateRestModelTask = project.getTasks() + .create(sourceSet.getTaskName("generate", "restModel"), GenerateRestModelTask.class, task -> + { + task.dependsOn(project.getTasks().getByName(sourceSet.getClassesTaskName())); + task.setCodegenClasspath(restModelCodegenClasspath); + task.setWatchedCodegenClasspath(restModelCodegenClasspath + .filter(file -> !"main".equals(file.getName()) && !"classes".equals(file.getName()))); + task.setInputDirs(restModelInputDirs); + task.setWatchedInputDirs(watchedRestModelInputDirs.isEmpty() + ? restModelInputDirs : watchedRestModelInputDirs); + // we need all the artifacts from runtime for any private implementation classes the server code might need. + task.setSnapshotDestinationDir(project.file(destinationDirPrefix + "snapshot")); + task.setIdlDestinationDir(project.file(destinationDirPrefix + "idl")); + + @SuppressWarnings("unchecked") + Map pegasusOptions = (Map) project + .getExtensions().getExtraProperties().get("pegasus"); + task.setIdlOptions(pegasusOptions.get(sourceSet.getName()).idlOptions); + + task.setResolverPath(restModelResolverPath); + if (isPropertyTrue(project, ENABLE_ARG_FILE)) + { + task.setEnableArgFile(true); + } + + task.onlyIf(t -> !isPropertyTrue(project, SKIP_GENERATE_REST_MODEL)); + + task.doFirst(new CacheableAction<>(t -> deleteGeneratedDir(project, sourceSet, REST_GEN_TYPE))); + }); + + File apiSnapshotDir = apiProject.file(getSnapshotPath(apiProject, sourceSet)); + File apiIdlDir = apiProject.file(getIdlPath(apiProject, sourceSet)); + apiSnapshotDir.mkdirs(); + + if (!isPropertyTrue(project, SKIP_IDL_CHECK)) + { + apiIdlDir.mkdirs(); + } + + CheckRestModelTask checkRestModelTask = project.getTasks() + .create(sourceSet.getTaskName("check", "RestModel"), CheckRestModelTask.class, task -> + { + task.dependsOn(generateRestModelTask); + task.setCurrentSnapshotFiles(SharedFileUtils.getSnapshotFiles(project, destinationDirPrefix)); + task.setPreviousSnapshotDirectory(apiSnapshotDir); + task.setCurrentIdlFiles(SharedFileUtils.getIdlFiles(project, destinationDirPrefix)); + task.setPreviousIdlDirectory(apiIdlDir); + task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); + task.setModelCompatLevel(PropertyUtil.findCompatLevel(project, FileCompatibilityType.SNAPSHOT)); + task.onlyIf(t -> !isPropertyTrue(project, SKIP_IDL_CHECK)); + + task.doLast(new CacheableAction<>(t -> + { + if (!task.isEquivalent()) + { + _restModelCompatMessage.append(task.getWholeMessage()); + } + })); + }); + + CheckSnapshotTask checkSnapshotTask = project.getTasks() + .create(sourceSet.getTaskName("check", "Snapshot"), CheckSnapshotTask.class, task -> { + task.dependsOn(generateRestModelTask); + task.setCurrentSnapshotFiles(SharedFileUtils.getSnapshotFiles(project, destinationDirPrefix)); + task.setPreviousSnapshotDirectory(apiSnapshotDir); + task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); + task.setSnapshotCompatLevel(PropertyUtil.findCompatLevel(project, FileCompatibilityType.SNAPSHOT)); + + task.onlyIf(t -> isPropertyTrue(project, SKIP_IDL_CHECK)); + }); + + CheckIdlTask checkIdlTask = project.getTasks() + .create(sourceSet.getTaskName("check", "Idl"), CheckIdlTask.class, task -> + { + task.dependsOn(generateRestModelTask); + task.setCurrentIdlFiles(SharedFileUtils.getIdlFiles(project, destinationDirPrefix)); + task.setPreviousIdlDirectory(apiIdlDir); + task.setResolverPath(restModelResolverPath); + task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); + task.setIdlCompatLevel(PropertyUtil.findCompatLevel(project, FileCompatibilityType.IDL)); + if (isPropertyTrue(project, ENABLE_ARG_FILE)) + { + task.setEnableArgFile(true); + } + + + task.onlyIf(t -> !isPropertyTrue(project, SKIP_IDL_CHECK) + && !"OFF".equals(PropertyUtil.findCompatLevel(project, FileCompatibilityType.IDL))); + }); + + // rest model publishing involves cross-project reference + // configure after all projects have been evaluated + // the file copy can be turned off by "rest.model.noPublish" flag + Task publishRestliSnapshotTask = project.getTasks() + .create(sourceSet.getTaskName("publish", "RestliSnapshot"), PublishRestModelTask.class, task -> + { + task.dependsOn(checkRestModelTask, checkSnapshotTask, checkIdlTask); + task.from(SharedFileUtils.getSnapshotFiles(project, destinationDirPrefix)); + task.into(apiSnapshotDir); + task.setSuffix(SNAPSHOT_FILE_SUFFIX); + + task.onlyIf(t -> + isPropertyTrue(project, SNAPSHOT_FORCE_PUBLISH) || + ( + !isPropertyTrue(project, SNAPSHOT_NO_PUBLISH) && + ( + ( + isPropertyTrue(project, SKIP_IDL_CHECK) && + isTaskSuccessful(checkSnapshotTask) && + checkSnapshotTask.getSummaryTarget().exists() && + !isResultEquivalent(checkSnapshotTask.getSummaryTarget()) + ) || + ( + !isPropertyTrue(project, SKIP_IDL_CHECK) && + isTaskSuccessful(checkRestModelTask) && + checkRestModelTask.getSummaryTarget().exists() && + !isResultEquivalent(checkRestModelTask.getSummaryTarget()) + ) + )) + ); + }); + + Task publishRestliIdlTask = project.getTasks() + .create(sourceSet.getTaskName("publish", "RestliIdl"), PublishRestModelTask.class, task -> { + task.dependsOn(checkRestModelTask, checkIdlTask, checkSnapshotTask); + task.from(SharedFileUtils.getIdlFiles(project, destinationDirPrefix)); + task.into(apiIdlDir); + task.setSuffix(IDL_FILE_SUFFIX); + + task.onlyIf(t -> + isPropertyTrue(project, IDL_FORCE_PUBLISH) || + ( + !isPropertyTrue(project, IDL_NO_PUBLISH) && + ( + ( + isPropertyTrue(project, SKIP_IDL_CHECK) && + isTaskSuccessful(checkSnapshotTask) && + checkSnapshotTask.getSummaryTarget().exists() && + !isResultEquivalent(checkSnapshotTask.getSummaryTarget(), true) + ) || + ( + !isPropertyTrue(project, SKIP_IDL_CHECK) && + ( + (isTaskSuccessful(checkRestModelTask) && + checkRestModelTask.getSummaryTarget().exists() && + !isResultEquivalent(checkRestModelTask.getSummaryTarget(), true)) || + (isTaskSuccessful(checkIdlTask) && + checkIdlTask.getSummaryTarget().exists() && + !isResultEquivalent(checkIdlTask.getSummaryTarget())) + ) + ) + )) + ); + }); + + project.getLogger().info("API project selected for {} is {}", + publishRestliIdlTask.getPath(), apiProject.getPath()); + + jarTask.from(SharedFileUtils.getIdlFiles(project, destinationDirPrefix)); + // add generated .restspec.json files as resources to the jar + jarTask.dependsOn(publishRestliSnapshotTask, publishRestliIdlTask); + + ChangedFileReportTask changedFileReportTask = (ChangedFileReportTask) project.getTasks() + .getByName("changedFilesReport"); + + // Use the files from apiDir for generating the changed files report as we need to notify user only when + // source system files are modified. + changedFileReportTask.setIdlFiles(SharedFileUtils.getSuffixedFiles(project, apiIdlDir, IDL_FILE_SUFFIX)); + changedFileReportTask.setSnapshotFiles(SharedFileUtils.getSuffixedFiles(project, apiSnapshotDir, + SNAPSHOT_FILE_SUFFIX)); + changedFileReportTask.mustRunAfter(publishRestliSnapshotTask, publishRestliIdlTask); + changedFileReportTask.doLast(new CacheableAction<>(t -> + { + if (!changedFileReportTask.getNeedCheckinFiles().isEmpty()) + { + project.getLogger().info("Adding modified files to need checkin list..."); + _needCheckinFiles.addAll(changedFileReportTask.getNeedCheckinFiles()); + _needBuildFolders.add(getCheckedApiProject(project).getPath()); + } + })); + }); + } + + protected void configurePegasusSchemaSnapshotGeneration(Project project, SourceSet sourceSet, boolean isExtensionSchema) + { + File schemaDir = isExtensionSchema? project.file(getExtensionSchemaPath(project, sourceSet)) + : project.file(getDataSchemaPath(project, sourceSet)); + + if ((isExtensionSchema && SharedFileUtils.getSuffixedFiles(project, schemaDir, PDL_FILE_SUFFIX).isEmpty()) || + (!isExtensionSchema && SharedFileUtils.getSuffixedFiles(project, schemaDir, DATA_TEMPLATE_FILE_SUFFIXES).isEmpty())) + { + return; + } + + Path publishablePegasusSchemaSnapshotDir = project.getBuildDir().toPath().resolve(sourceSet.getName() + + (isExtensionSchema ? PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: PEGASUS_SCHEMA_SNAPSHOT)); + + Task generatePegasusSchemaSnapshot = generatePegasusSchemaSnapshot(project, sourceSet, + isExtensionSchema ? PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: PEGASUS_SCHEMA_SNAPSHOT, schemaDir, + publishablePegasusSchemaSnapshotDir.toFile(), isExtensionSchema); + + File pegasusSchemaSnapshotDir = project.file(isExtensionSchema ? getPegasusExtensionSchemaSnapshotPath(project, sourceSet) + : getPegasusSchemaSnapshotPath(project, sourceSet)); + pegasusSchemaSnapshotDir.mkdirs(); + + Task checkSchemaSnapshot = project.getTasks().create(sourceSet.getTaskName("check", + isExtensionSchema ? PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: PEGASUS_SCHEMA_SNAPSHOT), + CheckPegasusSnapshotTask.class, task -> + { + task.dependsOn(generatePegasusSchemaSnapshot); + task.setCurrentSnapshotDirectory(publishablePegasusSchemaSnapshotDir.toFile()); + task.setPreviousSnapshotDirectory(pegasusSchemaSnapshotDir); + task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION) + .plus(project.getConfigurations().getByName(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION)) + .plus(project.getConfigurations().getByName(JavaPlugin.RUNTIME_CLASSPATH_CONFIGURATION_NAME))); + task.setCompatibilityLevel(isExtensionSchema ? + PropertyUtil.findCompatLevel(project, FileCompatibilityType.PEGASUS_EXTENSION_SCHEMA_SNAPSHOT) + :PropertyUtil.findCompatLevel(project, FileCompatibilityType.PEGASUS_SCHEMA_SNAPSHOT)); + task.setCompatibilityMode(isExtensionSchema ? COMPATIBILITY_OPTIONS_MODE_EXTENSION : + PropertyUtil.findCompatMode(project, PEGASUS_COMPATIBILITY_MODE)); + task.setExtensionSchema(isExtensionSchema); + task.setHandlerJarPath(project.getConfigurations() .getByName(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION)); + + task.onlyIf(t -> + { + String pegasusSnapshotCompatPropertyName = isExtensionSchema ? + findProperty(FileCompatibilityType.PEGASUS_EXTENSION_SCHEMA_SNAPSHOT) + : findProperty(FileCompatibilityType.PEGASUS_SCHEMA_SNAPSHOT); + return !project.hasProperty(pegasusSnapshotCompatPropertyName) || + !"off".equalsIgnoreCase((String) project.property(pegasusSnapshotCompatPropertyName)); + }); + }); + + Task publishPegasusSchemaSnapshot = publishPegasusSchemaSnapshot(project, sourceSet, + isExtensionSchema ? PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: PEGASUS_SCHEMA_SNAPSHOT, checkSchemaSnapshot, + publishablePegasusSchemaSnapshotDir.toFile(), pegasusSchemaSnapshotDir); + + project.getTasks().getByName(LifecycleBasePlugin.ASSEMBLE_TASK_NAME).dependsOn(publishPegasusSchemaSnapshot); + } + + @SuppressWarnings("deprecation") + protected void configureAvroSchemaGeneration(Project project, SourceSet sourceSet) + { + File dataSchemaDir = project.file(getDataSchemaPath(project, sourceSet)); + File avroDir = project.file(getGeneratedDirPath(project, sourceSet, AVRO_SCHEMA_GEN_TYPE) + + File.separatorChar + "avro"); + + // generate avro schema files from data schema + Task generateAvroSchemaTask = project.getTasks() + .create(sourceSet.getTaskName("generate", "avroSchema"), GenerateAvroSchemaTask.class, task -> { + task.setInputDir(dataSchemaDir); + task.setDestinationDir(avroDir); + task.setResolverPath(getDataModelConfig(project, sourceSet)); + task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); + if (isPropertyTrue(project, ENABLE_ARG_FILE)) + { + task.setEnableArgFile(true); + } + + task.onlyIf(t -> + { + if (task.getInputDir().exists()) + { + @SuppressWarnings("unchecked") + Map pegasusOptions = (Map) project + .getExtensions().getExtraProperties().get("pegasus"); + + if (pegasusOptions.get(sourceSet.getName()).hasGenerationMode(PegasusOptions.GenerationMode.AVRO)) + { + return true; + } + } + + return !project.getConfigurations().getByName("avroSchemaGenerator").isEmpty(); + }); + + task.doFirst(new CacheableAction<>(t -> deleteGeneratedDir(project, sourceSet, AVRO_SCHEMA_GEN_TYPE))); + }); + + project.getTasks().getByName(sourceSet.getCompileJavaTaskName()).dependsOn(generateAvroSchemaTask); + + // create avro schema jar file + + Task avroSchemaJarTask = project.getTasks().create(sourceSet.getName() + "AvroSchemaJar", Jar.class, task -> + { + // add path prefix to each file in the data schema directory + task.from(avroDir, copySpec -> + copySpec.eachFile(fileCopyDetails -> + fileCopyDetails.setPath("avro" + File.separatorChar + fileCopyDetails.getPath()))); + + // FIXME change to #getArchiveAppendix().set(...); breaks backwards-compatibility before 5.1 + // DataHub Note - applied FIXME + task.getArchiveAppendix().set(getAppendix(sourceSet, "avro-schema")); + task.setDescription("Generate an avro schema jar"); + }); + + if (!isTestSourceSet(sourceSet)) + { + project.getArtifacts().add("avroSchema", avroSchemaJarTask); + } + else + { + project.getArtifacts().add("testAvroSchema", avroSchemaJarTask); + } + } + + protected void configureConversionUtilities(Project project, SourceSet sourceSet) + { + File dataSchemaDir = project.file(getDataSchemaPath(project, sourceSet)); + boolean reverse = isPropertyTrue(project, CONVERT_TO_PDL_REVERSE); + boolean keepOriginal = isPropertyTrue(project, CONVERT_TO_PDL_KEEP_ORIGINAL); + boolean skipVerification = isPropertyTrue(project, CONVERT_TO_PDL_SKIP_VERIFICATION); + String preserveSourceCmd = getNonEmptyProperty(project, CONVERT_TO_PDL_PRESERVE_SOURCE_CMD); + + // Utility task for migrating between PDSC and PDL. + project.getTasks().create(sourceSet.getTaskName("convert", "ToPdl"), TranslateSchemasTask.class, task -> + { + task.setInputDir(dataSchemaDir); + task.setDestinationDir(dataSchemaDir); + task.setResolverPath(getDataModelConfig(project, sourceSet)); + task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); + task.setPreserveSourceCmd(preserveSourceCmd); + if (reverse) + { + task.setSourceFormat(SchemaFileType.PDL); + task.setDestinationFormat(SchemaFileType.PDSC); + } + else + { + task.setSourceFormat(SchemaFileType.PDSC); + task.setDestinationFormat(SchemaFileType.PDL); + } + task.setKeepOriginal(keepOriginal); + task.setSkipVerification(skipVerification); + if (isPropertyTrue(project, ENABLE_ARG_FILE)) + { + task.setEnableArgFile(true); + } + + task.onlyIf(t -> task.getInputDir().exists()); + task.doLast(new CacheableAction<>(t -> + { + project.getLogger().lifecycle("Pegasus schema conversion complete."); + project.getLogger().lifecycle("All pegasus schema files in " + dataSchemaDir + " have been converted"); + project.getLogger().lifecycle("You can use '-PconvertToPdl.reverse=true|false' to change the direction of conversion."); + })); + }); + + // Helper task for reformatting existing PDL schemas by generating them again. + project.getTasks().create(sourceSet.getTaskName("reformat", "Pdl"), TranslateSchemasTask.class, task -> + { + task.setInputDir(dataSchemaDir); + task.setDestinationDir(dataSchemaDir); + task.setResolverPath(getDataModelConfig(project, sourceSet)); + task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); + task.setSourceFormat(SchemaFileType.PDL); + task.setDestinationFormat(SchemaFileType.PDL); + task.setKeepOriginal(true); + task.setSkipVerification(true); + if (isPropertyTrue(project, ENABLE_ARG_FILE)) + { + task.setEnableArgFile(true); + } + + task.onlyIf(t -> task.getInputDir().exists()); + task.doLast(new CacheableAction<>(t -> project.getLogger().lifecycle("PDL reformat complete."))); + }); + } + + @SuppressWarnings("deprecation") + protected GenerateDataTemplateTask configureDataTemplateGeneration(Project project, SourceSet sourceSet) + { + File dataSchemaDir = project.file(getDataSchemaPath(project, sourceSet)); + File generatedDataTemplateDir = project.file(getGeneratedDirPath(project, sourceSet, DATA_TEMPLATE_GEN_TYPE) + + File.separatorChar + "java"); + File publishableSchemasBuildDir = project.file(project.getBuildDir().getAbsolutePath() + + File.separatorChar + sourceSet.getName() + "Schemas"); + File publishableLegacySchemasBuildDir = project.file(project.getBuildDir().getAbsolutePath() + + File.separatorChar + sourceSet.getName() + "LegacySchemas"); + File publishableExtensionSchemasBuildDir = project.file(project.getBuildDir().getAbsolutePath() + + File.separatorChar + sourceSet.getName() + "ExtensionSchemas"); + + // generate data template source files from data schema + GenerateDataTemplateTask generateDataTemplatesTask = project.getTasks() + .create(sourceSet.getTaskName("generate", "dataTemplate"), GenerateDataTemplateTask.class, task -> + { + task.setInputDir(dataSchemaDir); + task.setDestinationDir(generatedDataTemplateDir); + task.setResolverPath(getDataModelConfig(project, sourceSet)); + task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); + if (isPropertyTrue(project, ENABLE_ARG_FILE)) + { + task.setEnableArgFile(true); + } + if (isPropertyTrue(project, CODE_GEN_PATH_CASE_SENSITIVE)) + { + task.setGenerateLowercasePath(false); + } + + task.onlyIf(t -> + { + if (task.getInputDir().exists()) + { + @SuppressWarnings("unchecked") + Map pegasusOptions = (Map) project + .getExtensions().getExtraProperties().get("pegasus"); + + return pegasusOptions.get(sourceSet.getName()).hasGenerationMode(PegasusOptions.GenerationMode.PEGASUS); + } + + return false; + }); + + task.doFirst(new CacheableAction<>(t -> deleteGeneratedDir(project, sourceSet, DATA_TEMPLATE_GEN_TYPE))); + }); + + // TODO: Tighten the types so that _generateSourcesJarTask must be of type Jar. + ((Jar) _generateSourcesJarTask).from(generateDataTemplatesTask.getDestinationDir()); + _generateSourcesJarTask.dependsOn(generateDataTemplatesTask); + + _generateJavadocTask.source(generateDataTemplatesTask.getDestinationDir()); + _generateJavadocTask.setClasspath(_generateJavadocTask.getClasspath() + .plus(project.getConfigurations().getByName("dataTemplateCompile")) + .plus(generateDataTemplatesTask.getResolverPath())); + _generateJavadocTask.dependsOn(generateDataTemplatesTask); + + // Add extra dependencies for data model compilation + project.getDependencies().add("dataTemplateCompile", "com.google.code.findbugs:jsr305:3.0.2"); + + // create new source set for generated java source and class files + String targetSourceSetName = getGeneratedSourceSetName(sourceSet, DATA_TEMPLATE_GEN_TYPE); + + SourceSetContainer sourceSets = project.getConvention() + .getPlugin(JavaPluginConvention.class).getSourceSets(); + + SourceSet targetSourceSet = sourceSets.create(targetSourceSetName, ss -> + { + ss.java(sourceDirectorySet -> sourceDirectorySet.srcDir(generatedDataTemplateDir)); + ss.setCompileClasspath(getDataModelConfig(project, sourceSet) + .plus(project.getConfigurations().getByName("dataTemplateCompile"))); + }); + + // idea plugin needs to know about new generated java source directory and its dependencies + addGeneratedDir(project, targetSourceSet, Arrays.asList( + getDataModelConfig(project, sourceSet), + project.getConfigurations().getByName("dataTemplateCompile"))); + + // Set source compatibility to 1.8 as the data-templates now generate code with Java 8 features. + JavaCompile compileTask = project.getTasks() + .withType(JavaCompile.class).getByName(targetSourceSet.getCompileJavaTaskName()); + compileTask.doFirst(new CacheableAction<>(task -> { + ((JavaCompile) task).setSourceCompatibility("1.8"); + ((JavaCompile) task).setTargetCompatibility("1.8"); + })); + // make sure that java source files have been generated before compiling them + compileTask.dependsOn(generateDataTemplatesTask); + + // Dummy task to maintain backward compatibility + // TODO: Delete this task once use cases have had time to reference the new task + Task destroyStaleFiles = project.getTasks().create(sourceSet.getName() + "DestroyStaleFiles", Delete.class); + destroyStaleFiles.onlyIf(task -> { + project.getLogger().lifecycle("{} task is a NO-OP task.", task.getPath()); + return false; + }); + + // Dummy task to maintain backward compatibility, as this task was replaced by CopySchemas + // TODO: Delete this task once use cases have had time to reference the new task + Task copyPdscSchemasTask = project.getTasks().create(sourceSet.getName() + "CopyPdscSchemas", Copy.class); + copyPdscSchemasTask.dependsOn(destroyStaleFiles); + copyPdscSchemasTask.onlyIf(task -> { + project.getLogger().lifecycle("{} task is a NO-OP task.", task.getPath()); + return false; + }); + + // Prepare schema files for publication by syncing schema folders. + Task prepareSchemasForPublishTask = project.getTasks() + .create(sourceSet.getName() + "CopySchemas", Sync.class, task -> + { + task.from(dataSchemaDir, syncSpec -> DATA_TEMPLATE_FILE_SUFFIXES.forEach(suffix -> syncSpec.include("**/*" + suffix))); + task.into(publishableSchemasBuildDir); + }); + prepareSchemasForPublishTask.dependsOn(copyPdscSchemasTask); + + Collection dataTemplateJarDepends = new ArrayList<>(); + dataTemplateJarDepends.add(compileTask); + dataTemplateJarDepends.add(prepareSchemasForPublishTask); + + // Convert all PDL files back to PDSC for publication + // TODO: Remove this conversion permanently once translated PDSCs are no longer needed. + Task prepareLegacySchemasForPublishTask = project.getTasks() + .create(sourceSet.getName() + "TranslateSchemas", TranslateSchemasTask.class, task -> + { + task.setInputDir(dataSchemaDir); + task.setDestinationDir(publishableLegacySchemasBuildDir); + task.setResolverPath(getDataModelConfig(project, sourceSet)); + task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); + task.setSourceFormat(SchemaFileType.PDL); + task.setDestinationFormat(SchemaFileType.PDSC); + task.setKeepOriginal(true); + task.setSkipVerification(true); + if (isPropertyTrue(project, ENABLE_ARG_FILE)) + { + task.setEnableArgFile(true); + } + }); + + prepareLegacySchemasForPublishTask.dependsOn(destroyStaleFiles); + dataTemplateJarDepends.add(prepareLegacySchemasForPublishTask); + + // extension schema directory + File extensionSchemaDir = project.file(getExtensionSchemaPath(project, sourceSet)); + + if (!SharedFileUtils.getSuffixedFiles(project, extensionSchemaDir, PDL_FILE_SUFFIX).isEmpty()) + { + // Validate extension schemas if extension schemas are provided. + ValidateExtensionSchemaTask validateExtensionSchemaTask = project.getTasks() + .create(sourceSet.getTaskName("validate", "ExtensionSchemas"), ValidateExtensionSchemaTask.class, task -> + { + task.setInputDir(extensionSchemaDir); + task.setResolverPath( + getDataModelConfig(project, sourceSet).plus(project.files(getDataSchemaPath(project, sourceSet)))); + task.setClassPath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); + if (isPropertyTrue(project, ENABLE_ARG_FILE)) + { + task.setEnableArgFile(true); + } + }); + + Task prepareExtensionSchemasForPublishTask = project.getTasks() + .create(sourceSet.getName() + "CopyExtensionSchemas", Sync.class, task -> + { + task.from(extensionSchemaDir, syncSpec -> syncSpec.include("**/*" + PDL_FILE_SUFFIX)); + task.into(publishableExtensionSchemasBuildDir); + }); + + prepareExtensionSchemasForPublishTask.dependsOn(validateExtensionSchemaTask); + prepareExtensionSchemasForPublishTask.dependsOn(copyPdscSchemasTask); + dataTemplateJarDepends.add(prepareExtensionSchemasForPublishTask); + } + + // include pegasus files in the output of this SourceSet + project.getTasks().withType(ProcessResources.class).getByName(targetSourceSet.getProcessResourcesTaskName(), it -> + { + it.from(prepareSchemasForPublishTask, copy -> copy.into("pegasus")); + // TODO: Remove this permanently once translated PDSCs are no longer needed. + it.from(prepareLegacySchemasForPublishTask, copy -> copy.into(TRANSLATED_SCHEMAS_DIR)); + Sync copyExtensionSchemasTask = project.getTasks().withType(Sync.class).findByName(sourceSet.getName() + "CopyExtensionSchemas"); + if (copyExtensionSchemasTask != null) + { + it.from(copyExtensionSchemasTask, copy -> copy.into("extensions")); + } + }); + + // create data template jar file + Jar dataTemplateJarTask = project.getTasks() + .create(sourceSet.getName() + "DataTemplateJar", Jar.class, task -> + { + task.dependsOn(dataTemplateJarDepends); + task.from(targetSourceSet.getOutput()); + + // FIXME change to #getArchiveAppendix().set(...); breaks backwards-compatibility before 5.1 + // DataHub Note - applied FIXME + task.getArchiveAppendix().set(getAppendix(sourceSet, "data-template")); + task.setDescription("Generate a data template jar"); + }); + + // add the data model and date template jars to the list of project artifacts. + if (!isTestSourceSet(sourceSet)) + { + project.getArtifacts().add("dataTemplate", dataTemplateJarTask); + } + else + { + project.getArtifacts().add("testDataTemplate", dataTemplateJarTask); + } + + // include additional dependencies into the appropriate configuration used to compile the input source set + // must include the generated data template classes and their dependencies the configuration. + // "compile" and "testCompile" configurations have been removed in Gradle 7, + // but to keep the maximum backward compatibility, here we handle Gradle 7 and earlier version differently + // Once MIN_REQUIRED_VERSION reaches 7.0, we can remove the check of isAtLeastGradle7() + String compileConfigName; + if (isAtLeastGradle7()) { + compileConfigName = isTestSourceSet(sourceSet) ? "testImplementation" : project.getConfigurations().findByName("api") != null ? "api" : "implementation"; + } + else + { + compileConfigName = isTestSourceSet(sourceSet) ? "testCompile" : "compile"; + } + + Configuration compileConfig = project.getConfigurations().maybeCreate(compileConfigName); + compileConfig.extendsFrom( + getDataModelConfig(project, sourceSet), + project.getConfigurations().getByName("dataTemplateCompile")); + + // The getArchivePath() API doesn’t carry any task dependency and has been deprecated. + // Replace it with getArchiveFile() on Gradle 7, + // but keep getArchivePath() to be backwards-compatibility with Gradle version older than 5.1 + // DataHub Note - applied FIXME + project.getDependencies().add(compileConfigName, project.files( + isAtLeastGradle7() ? dataTemplateJarTask.getArchiveFile() : dataTemplateJarTask.getArchivePath())); + + if (_configureIvyPublications) { + // The below Action is only applied when the 'ivy-publish' is applied by the consumer. + // If the consumer does not use ivy-publish, this is a noop. + // this Action prepares the project applying the pegasus plugin to publish artifacts using these steps: + // 1. Registers "feature variants" for pegasus-specific artifacts; + // see https://docs.gradle.org/6.1/userguide/feature_variants.html + // 2. Wires legacy configurations like `dataTemplateCompile` to auto-generated feature variant *Api and + // *Implementation configurations for backwards compatibility. + // 3. Configures the Ivy Publication to include auto-generated feature variant *Api and *Implementation + // configurations and their dependencies. + project.getPlugins().withType(IvyPublishPlugin.class, ivyPublish -> { + if (!isAtLeastGradle61()) + { + throw new GradleException("Using the ivy-publish plugin with the pegasus plugin requires Gradle 6.1 or higher " + + "at build time. Please upgrade."); + } + + JavaPluginExtension java = project.getExtensions().getByType(JavaPluginExtension.class); + // create new capabilities per source set; automatically creates api and implementation configurations + String featureName = mapSourceSetToFeatureName(targetSourceSet); + try + { + /* + reflection is required to preserve compatibility with Gradle 5.2.1 and below + TODO once Gradle 5.3+ is required, remove reflection and replace with: + java.registerFeature(featureName, featureSpec -> { + featureSpec.usingSourceSet(targetSourceSet); + }); + */ + Method registerFeature = JavaPluginExtension.class.getDeclaredMethod("registerFeature", String.class, Action.class); + Action/**/ featureSpecAction = createFeatureVariantFromSourceSet(targetSourceSet); + registerFeature.invoke(java, featureName, featureSpecAction); + } + catch (ReflectiveOperationException e) + { + throw new GradleException("Unable to register new feature variant", e); + } + + // expose transitive dependencies to consumers via variant configurations + Configuration featureConfiguration = project.getConfigurations().getByName(featureName); + Configuration mainGeneratedDataTemplateApi = project.getConfigurations().getByName(targetSourceSet.getApiConfigurationName()); + featureConfiguration.extendsFrom(mainGeneratedDataTemplateApi); + mainGeneratedDataTemplateApi.extendsFrom( + getDataModelConfig(project, targetSourceSet), + project.getConfigurations().getByName("dataTemplateCompile")); + + // Configure the existing IvyPublication + // For backwards-compatibility, make the legacy dataTemplate/testDataTemplate configurations extend + // their replacements, auto-created when we registered the new feature variant + project.afterEvaluate(p -> { + PublishingExtension publishing = p.getExtensions().getByType(PublishingExtension.class); + // When configuring a Gradle Publication, use this value to find the name of the publication to configure. Defaults to "ivy". + String publicationName = p.getExtensions().getExtraProperties().getProperties().getOrDefault("PegasusPublicationName", "ivy").toString(); + IvyPublication ivyPublication = publishing.getPublications().withType(IvyPublication.class).getByName(publicationName); + ivyPublication.configurations(configurations -> configurations.create(featureName, legacyConfiguration -> { + legacyConfiguration.extend(p.getConfigurations().getByName(targetSourceSet.getApiElementsConfigurationName()).getName()); + legacyConfiguration.extend(p.getConfigurations().getByName(targetSourceSet.getRuntimeElementsConfigurationName()).getName()); + })); + }); + }); + } + + if (debug) + { + System.out.println("configureDataTemplateGeneration sourceSet " + sourceSet.getName()); + System.out.println(compileConfigName + ".allDependencies : " + + project.getConfigurations().getByName(compileConfigName).getAllDependencies()); + System.out.println(compileConfigName + ".extendsFrom: " + + project.getConfigurations().getByName(compileConfigName).getExtendsFrom()); + System.out.println(compileConfigName + ".transitive: " + + project.getConfigurations().getByName(compileConfigName).isTransitive()); + } + + project.getTasks().getByName(sourceSet.getCompileJavaTaskName()).dependsOn(dataTemplateJarTask); + return generateDataTemplatesTask; + } + + private String mapSourceSetToFeatureName(SourceSet sourceSet) { + String featureName = ""; + switch (sourceSet.getName()) { + case "mainGeneratedDataTemplate": + featureName = "dataTemplate"; + break; + case "testGeneratedDataTemplate": + featureName = "testDataTemplate"; + break; + case "mainGeneratedRest": + featureName = "restClient"; + break; + case "testGeneratedRest": + featureName = "testRestClient"; + break; + case "mainGeneratedAvroSchema": + featureName = "avroSchema"; + break; + case "testGeneratedAvroSchema": + featureName = "testAvroSchema"; + break; + default: + String msg = String.format("Unable to map %s to an appropriate feature name", sourceSet); + throw new GradleException(msg); + } + return featureName; + } + + // Generate rest client from idl files generated from java source files in the specified source set. + // + // This generates rest client source files from idl file generated from java source files + // in the source set. The generated rest client source files will be in a new source set. + // It also compiles the rest client source files into classes, and creates both the + // rest model and rest client jar files. + // + @SuppressWarnings("deprecation") + protected void configureRestClientGeneration(Project project, SourceSet sourceSet) + { + // idl directory for api project + File idlDir = project.file(getIdlPath(project, sourceSet)); + if (SharedFileUtils.getSuffixedFiles(project, idlDir, IDL_FILE_SUFFIX).isEmpty() && !isPropertyTrue(project, + PROCESS_EMPTY_IDL_DIR)) + { + return; + } + File generatedRestClientDir = project.file(getGeneratedDirPath(project, sourceSet, REST_GEN_TYPE) + + File.separatorChar + "java"); + + // always include imported data template jars in compileClasspath of rest client + FileCollection dataModelConfig = getDataModelConfig(project, sourceSet); + + // if data templates generated from this source set, add the generated data template jar to compileClasspath + // of rest client. + String dataTemplateSourceSetName = getGeneratedSourceSetName(sourceSet, DATA_TEMPLATE_GEN_TYPE); + + Jar dataTemplateJarTask = null; + + SourceSetContainer sourceSets = project.getConvention() + .getPlugin(JavaPluginConvention.class).getSourceSets(); + + FileCollection dataModels; + if (sourceSets.findByName(dataTemplateSourceSetName) != null) + { + if (debug) + { + System.out.println("sourceSet " + sourceSet.getName() + " has generated sourceSet " + dataTemplateSourceSetName); + } + dataTemplateJarTask = (Jar) project.getTasks().getByName(sourceSet.getName() + "DataTemplateJar"); + // The getArchivePath() API doesn’t carry any task dependency and has been deprecated. + // Replace it with getArchiveFile() on Gradle 7, + // but keep getArchivePath() to be backwards-compatibility with Gradle version older than 5.1 + // DataHub Note - applied FIXME + dataModels = dataModelConfig.plus(project.files( + isAtLeastGradle7() ? dataTemplateJarTask.getArchiveFile() : dataTemplateJarTask.getArchivePath())); + } + else + { + dataModels = dataModelConfig; + } + + // create source set for generated rest model, rest client source and class files. + String targetSourceSetName = getGeneratedSourceSetName(sourceSet, REST_GEN_TYPE); + SourceSet targetSourceSet = sourceSets.create(targetSourceSetName, ss -> + { + ss.java(sourceDirectorySet -> sourceDirectorySet.srcDir(generatedRestClientDir)); + ss.setCompileClasspath(dataModels.plus(project.getConfigurations().getByName("restClientCompile"))); + }); + + project.getPlugins().withType(EclipsePlugin.class, eclipsePlugin -> { + EclipseModel eclipseModel = (EclipseModel) project.getExtensions().findByName("eclipse"); + eclipseModel.getClasspath().getPlusConfigurations() + .add(project.getConfigurations().getByName("restClientCompile")); + }); + + // idea plugin needs to know about new rest client source directory and its dependencies + addGeneratedDir(project, targetSourceSet, Arrays.asList( + getDataModelConfig(project, sourceSet), + project.getConfigurations().getByName("restClientCompile"))); + + // generate the rest client source files + GenerateRestClientTask generateRestClientTask = project.getTasks() + .create(targetSourceSet.getTaskName("generate", "restClient"), GenerateRestClientTask.class, task -> + { + task.dependsOn(project.getConfigurations().getByName("dataTemplate")); + task.setInputDir(idlDir); + task.setResolverPath(dataModels.plus(project.getConfigurations().getByName("restClientCompile"))); + task.setRuntimeClasspath(project.getConfigurations().getByName("dataModel") + .plus(project.getConfigurations().getByName("dataTemplate").getArtifacts().getFiles())); + task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); + task.setDestinationDir(generatedRestClientDir); + task.setRestli2FormatSuppressed(project.hasProperty(SUPPRESS_REST_CLIENT_RESTLI_2)); + task.setRestli1FormatSuppressed(project.hasProperty(SUPPRESS_REST_CLIENT_RESTLI_1)); + if (isPropertyTrue(project, ENABLE_ARG_FILE)) + { + task.setEnableArgFile(true); + } + if (isPropertyTrue(project, CODE_GEN_PATH_CASE_SENSITIVE)) + { + task.setGenerateLowercasePath(false); + } + if (isPropertyTrue(project, ENABLE_FLUENT_API)) + { + task.setGenerateFluentApi(true); + } + task.doFirst(new CacheableAction<>(t -> project.delete(generatedRestClientDir))); + }); + + if (dataTemplateJarTask != null) + { + generateRestClientTask.dependsOn(dataTemplateJarTask); + } + + // TODO: Tighten the types so that _generateSourcesJarTask must be of type Jar. + ((Jar) _generateSourcesJarTask).from(generateRestClientTask.getDestinationDir()); + _generateSourcesJarTask.dependsOn(generateRestClientTask); + + _generateJavadocTask.source(generateRestClientTask.getDestinationDir()); + _generateJavadocTask.setClasspath(_generateJavadocTask.getClasspath() + .plus(project.getConfigurations().getByName("restClientCompile")) + .plus(generateRestClientTask.getResolverPath())); + _generateJavadocTask.dependsOn(generateRestClientTask); + + // make sure rest client source files have been generated before compiling them + JavaCompile compileGeneratedRestClientTask = (JavaCompile) project.getTasks() + .getByName(targetSourceSet.getCompileJavaTaskName()); + compileGeneratedRestClientTask.dependsOn(generateRestClientTask); + compileGeneratedRestClientTask.getOptions().getCompilerArgs().add("-Xlint:-deprecation"); + + // create the rest model jar file + Task restModelJarTask = project.getTasks().create(sourceSet.getName() + "RestModelJar", Jar.class, task -> + { + task.from(idlDir, copySpec -> + { + copySpec.eachFile(fileCopyDetails -> project.getLogger() + .info("Add idl file: {}", fileCopyDetails)); + copySpec.setIncludes(Collections.singletonList('*' + IDL_FILE_SUFFIX)); + }); + // FIXME change to #getArchiveAppendix().set(...); breaks backwards-compatibility before 5.1 + // DataHub Note - applied FIXME + task.getArchiveAppendix().set(getAppendix(sourceSet, "rest-model")); + task.setDescription("Generate rest model jar"); + }); + + // create the rest client jar file + Task restClientJarTask = project.getTasks() + .create(sourceSet.getName() + "RestClientJar", Jar.class, task -> + { + task.dependsOn(compileGeneratedRestClientTask); + task.from(idlDir, copySpec -> { + copySpec.eachFile(fileCopyDetails -> { + project.getLogger().info("Add interface file: {}", fileCopyDetails); + fileCopyDetails.setPath("idl" + File.separatorChar + fileCopyDetails.getPath()); + }); + copySpec.setIncludes(Collections.singletonList('*' + IDL_FILE_SUFFIX)); + }); + task.from(targetSourceSet.getOutput()); + // FIXME change to #getArchiveAppendix().set(...); breaks backwards-compatibility before 5.1 + // DataHub Note - applied FIXME + task.getArchiveAppendix().set(getAppendix(sourceSet, "rest-client")); + task.setDescription("Generate rest client jar"); + }); + + // add the rest model jar and the rest client jar to the list of project artifacts. + if (!isTestSourceSet(sourceSet)) + { + project.getArtifacts().add("restModel", restModelJarTask); + project.getArtifacts().add("restClient", restClientJarTask); + } + else + { + project.getArtifacts().add("testRestModel", restModelJarTask); + project.getArtifacts().add("testRestClient", restClientJarTask); + } + } + + // Return the appendix for generated jar files. + // The source set name is not included for the main source set. + private static String getAppendix(SourceSet sourceSet, String suffix) + { + return sourceSet.getName().equals("main") ? suffix : sourceSet.getName() + '-' + suffix; + } + + private static Project getApiProject(Project project) + { + if (project.getExtensions().getExtraProperties().has("apiProject")) + { + return (Project) project.getExtensions().getExtraProperties().get("apiProject"); + } + + List subsSuffixes; + if (project.getExtensions().getExtraProperties().has("apiProjectSubstitutionSuffixes")) + { + @SuppressWarnings("unchecked") + List suffixValue = (List) project.getExtensions() + .getExtraProperties().get("apiProjectSubstitutionSuffixes"); + + subsSuffixes = suffixValue; + } + else + { + subsSuffixes = Arrays.asList("-impl", "-service", "-server", "-server-impl"); + } + + for (String suffix : subsSuffixes) + { + if (project.getPath().endsWith(suffix)) + { + String searchPath = project.getPath().substring(0, project.getPath().length() - suffix.length()) + "-api"; + Project apiProject = project.findProject(searchPath); + if (apiProject != null) + { + return apiProject; + } + } + } + + return project.findProject(project.getPath() + "-api"); + } + + private static Project getCheckedApiProject(Project project) + { + Project apiProject = getApiProject(project); + + if (apiProject == project) + { + throw new GradleException("The API project of ${project.path} must not be itself."); + } + + return apiProject; + } + + /** + * return the property value if the property exists and is not empty (-Pname=value) + * return null if property does not exist or the property is empty (-Pname) + * + * @param project the project where to look for the property + * @param propertyName the name of the property + */ + public static String getNonEmptyProperty(Project project, String propertyName) + { + if (!project.hasProperty(propertyName)) + { + return null; + } + + String propertyValue = project.property(propertyName).toString(); + if (propertyValue.isEmpty()) + { + return null; + } + + return propertyValue; + } + + /** + * Return true if the given property exists and its value is true + * + * @param project the project where to look for the property + * @param propertyName the name of the property + */ + public static boolean isPropertyTrue(Project project, String propertyName) + { + return project.hasProperty(propertyName) && Boolean.valueOf(project.property(propertyName).toString()); + } + + private static String createModifiedFilesMessage(Collection nonEquivExpectedFiles, + Collection foldersToBeBuilt) + { + StringBuilder builder = new StringBuilder(); + builder.append("\nRemember to checkin the changes to the following new or modified files:\n"); + for (String file : nonEquivExpectedFiles) + { + builder.append(" "); + builder.append(file); + builder.append("\n"); + } + + if (!foldersToBeBuilt.isEmpty()) + { + builder.append("\nThe file modifications include service interface changes, you can build the the following projects " + + "to re-generate the client APIs accordingly:\n"); + for (String folder : foldersToBeBuilt) + { + builder.append(" "); + builder.append(folder); + builder.append("\n"); + } + } + + return builder.toString(); + } + + private static String createPossibleMissingFilesMessage(Collection missingFiles) + { + StringBuilder builder = new StringBuilder(); + builder.append("If this is the result of an automated build, then you may have forgotten to check in some snapshot or idl files:\n"); + for (String file : missingFiles) + { + builder.append(" "); + builder.append(file); + builder.append("\n"); + } + + return builder.toString(); + } + + private static String findProperty(FileCompatibilityType type) + { + String property; + switch (type) + { + case SNAPSHOT: + property = SNAPSHOT_COMPAT_REQUIREMENT; + break; + case IDL: + property = IDL_COMPAT_REQUIREMENT; + break; + case PEGASUS_SCHEMA_SNAPSHOT: + property = PEGASUS_SCHEMA_SNAPSHOT_REQUIREMENT; + break; + case PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: + property = PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_REQUIREMENT; + break; + default: + throw new GradleException("No property defined for compatibility type " + type); + } + return property; + } + + private static Set buildWatchedRestModelInputDirs(Project project, SourceSet sourceSet) { + @SuppressWarnings("unchecked") + Map pegasusOptions = (Map) project + .getExtensions().getExtraProperties().get("pegasus"); + + File rootPath = new File(project.getProjectDir(), + pegasusOptions.get(sourceSet.getName()).restModelOptions.getRestResourcesRootPath()); + + IdlOptions idlOptions = pegasusOptions.get(sourceSet.getName()).idlOptions; + + // if idlItems exist, only watch the smaller subset + return idlOptions.getIdlItems().stream() + .flatMap(idlItem -> Arrays.stream(idlItem.packageNames)) + .map(packageName -> new File(rootPath, packageName.replace('.', '/'))) + .collect(Collectors.toCollection(TreeSet::new)); + } + + private static Set difference(Set left, Set right) + { + Set result = new HashSet<>(left); + result.removeAll(right); + return result; + } + + /** + * Configures the given source set so that its data schema directory (usually 'pegasus') is marked as a resource root. + * The purpose of this is to improve the IDE experience. Makes sure to exclude this directory from being packaged in + * with the default Jar task. + */ + private static void configureDataSchemaResourcesRoot(Project project, SourceSet sourceSet) + { + sourceSet.resources(sourceDirectorySet -> { + final String dataSchemaPath = getDataSchemaPath(project, sourceSet); + final File dataSchemaRoot = project.file(dataSchemaPath); + sourceDirectorySet.srcDir(dataSchemaPath); + project.getLogger().info("Adding resource root '{}'", dataSchemaPath); + + final String extensionsSchemaPath = getExtensionSchemaPath(project, sourceSet); + final File extensionsSchemaRoot = project.file(extensionsSchemaPath); + sourceDirectorySet.srcDir(extensionsSchemaPath); + project.getLogger().info("Adding resource root '{}'", extensionsSchemaPath); + + // Exclude the data schema and extensions schema directory from being copied into the default Jar task + sourceDirectorySet.getFilter().exclude(fileTreeElement -> { + final File file = fileTreeElement.getFile(); + // Traversal starts with the children of a resource root, so checking the direct parent is sufficient + final boolean underDataSchemaRoot = dataSchemaRoot.equals(file.getParentFile()); + final boolean underExtensionsSchemaRoot = extensionsSchemaRoot.equals(file.getParentFile()); + final boolean exclude = (underDataSchemaRoot || underExtensionsSchemaRoot); + if (exclude) + { + project.getLogger().info("Excluding resource directory '{}'", file); + } + return exclude; + }); + }); + } + + private Task generatePegasusSchemaSnapshot(Project project, SourceSet sourceSet, String taskName, File inputDir, File outputDir, + boolean isExtensionSchema) + { + return project.getTasks().create(sourceSet.getTaskName("generate", taskName), + GeneratePegasusSnapshotTask.class, task -> + { + task.setInputDir(inputDir); + task.setResolverPath(getDataModelConfig(project, sourceSet).plus(project.files(getDataSchemaPath(project, sourceSet)))); + task.setClassPath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); + task.setPegasusSchemaSnapshotDestinationDir(outputDir); + task.setExtensionSchema(isExtensionSchema); + if (isPropertyTrue(project, ENABLE_ARG_FILE)) + { + task.setEnableArgFile(true); + } + }); + } + + private Task publishPegasusSchemaSnapshot(Project project, SourceSet sourceSet, String taskName, Task checkPegasusSnapshotTask, + File inputDir, File outputDir) + { + return project.getTasks().create(sourceSet.getTaskName("publish", taskName), + Sync.class, task -> + { + task.dependsOn(checkPegasusSnapshotTask); + task.from(inputDir); + task.into(outputDir); + task.onlyIf(t -> !SharedFileUtils.getSuffixedFiles(project, inputDir, PDL_FILE_SUFFIX).isEmpty()); + }); + } + + private void checkGradleVersion(Project project) + { + if (MIN_REQUIRED_VERSION.compareTo(GradleVersion.current()) > 0) + { + throw new GradleException(String.format("This plugin does not support %s. Please use %s or later.", + GradleVersion.current(), + MIN_REQUIRED_VERSION)); + } + if (MIN_SUGGESTED_VERSION.compareTo(GradleVersion.current()) > 0) + { + project.getLogger().warn(String.format("Pegasus supports %s, but it may not be supported in the next major release. Please use %s or later.", + GradleVersion.current(), + MIN_SUGGESTED_VERSION)); + } + } + + /** + * Reflection is necessary to obscure types introduced in Gradle 5.3 + * + * @param sourceSet the target sourceset upon which to create a new feature variant + * @return an Action which modifies a org.gradle.api.plugins.FeatureSpec instance + */ + private Action/**/ createFeatureVariantFromSourceSet(SourceSet sourceSet) + { + return featureSpec -> { + try + { + Class clazz = Class.forName("org.gradle.api.plugins.FeatureSpec"); + Method usingSourceSet = clazz.getDeclaredMethod("usingSourceSet", SourceSet.class); + usingSourceSet.invoke(featureSpec, sourceSet); + } + catch (ReflectiveOperationException e) + { + throw new GradleException("Unable to invoke FeatureSpec#usingSourceSet(SourceSet)", e); + } + }; + } + + protected static boolean isAtLeastGradle61() + { + return GradleVersion.current().getBaseVersion().compareTo(GradleVersion.version("6.1")) >= 0; + } + + public static boolean isAtLeastGradle7() { + return GradleVersion.current().getBaseVersion().compareTo(GradleVersion.version("7.0")) >= 0; + } +} \ No newline at end of file diff --git a/buildSrc/src/main/java/com/linkedin/pegasus/gradle/tasks/ChangedFileReportTask.java b/buildSrc/src/main/java/com/linkedin/pegasus/gradle/tasks/ChangedFileReportTask.java new file mode 100644 index 0000000000000..a2aafaf1be017 --- /dev/null +++ b/buildSrc/src/main/java/com/linkedin/pegasus/gradle/tasks/ChangedFileReportTask.java @@ -0,0 +1,124 @@ +package com.linkedin.pegasus.gradle.tasks; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import org.gradle.api.DefaultTask; +import org.gradle.api.file.FileCollection; +import org.gradle.api.specs.Specs; +import org.gradle.api.tasks.InputFiles; +import org.gradle.api.tasks.Internal; +import org.gradle.api.tasks.SkipWhenEmpty; +import org.gradle.api.tasks.TaskAction; +import org.gradle.work.FileChange; +import org.gradle.work.InputChanges; + + +public class ChangedFileReportTask extends DefaultTask +{ + private final Collection _needCheckinFiles = new ArrayList<>(); + + private FileCollection _idlFiles = getProject().files(); + private FileCollection _snapshotFiles = getProject().files(); + + public ChangedFileReportTask() + { + //with Gradle 6.0, Declaring an incremental task without outputs is not allowed. + getOutputs().upToDateWhen(Specs.satisfyNone()); + } + + // DataHub Note - updated for InputChanges + @TaskAction + public void checkFilesForChanges(InputChanges inputChanges) + { + getLogger().lifecycle("Checking idl and snapshot files for changes..."); + getLogger().info("idlFiles: " + _idlFiles.getAsPath()); + getLogger().info("snapshotFiles: " + _snapshotFiles.getAsPath()); + + Set filesRemoved = new HashSet<>(); + Set filesAdded = new HashSet<>(); + Set filesChanged = new HashSet<>(); + + if (inputChanges.isIncremental()) + { + Consumer handleChange = change -> + { + switch (change.getChangeType()) { + case ADDED: + filesAdded.add(change.getFile().getAbsolutePath()); + break; + case REMOVED: + filesRemoved.add(change.getFile().getAbsolutePath()); + break; + case MODIFIED: + filesChanged.add(change.getFile().getAbsolutePath()); + break; + } + }; + + inputChanges.getFileChanges(_idlFiles).forEach(handleChange); + inputChanges.getFileChanges(_snapshotFiles).forEach(handleChange); + + if (!filesRemoved.isEmpty()) + { + String files = joinByComma(filesRemoved); + _needCheckinFiles.add(files); + getLogger().lifecycle( + "The following files have been removed, be sure to remove them from source control: {}", files); + } + + if (!filesAdded.isEmpty()) + { + String files = joinByComma(filesAdded); + _needCheckinFiles.add(files); + getLogger().lifecycle("The following files have been added, be sure to add them to source control: {}", files); + } + + if (!filesChanged.isEmpty()) + { + String files = joinByComma(filesChanged); + _needCheckinFiles.add(files); + getLogger().lifecycle( + "The following files have been changed, be sure to commit the changes to source control: {}", files); + } + } + } + + private String joinByComma(Set files) + { + return files.stream().collect(Collectors.joining(", ")); + } + + @InputFiles + @SkipWhenEmpty + public FileCollection getSnapshotFiles() + { + return _snapshotFiles; + } + + public void setSnapshotFiles(FileCollection snapshotFiles) + { + _snapshotFiles = snapshotFiles; + } + + @InputFiles + @SkipWhenEmpty + public FileCollection getIdlFiles() + { + return _idlFiles; + } + + public void setIdlFiles(FileCollection idlFiles) + { + _idlFiles = idlFiles; + } + + @Internal + public Collection getNeedCheckinFiles() + { + return _needCheckinFiles; + } +} \ No newline at end of file diff --git a/datahub-frontend/build.gradle b/datahub-frontend/build.gradle index a1b97701dbf88..437c72e6394ea 100644 --- a/datahub-frontend/build.gradle +++ b/datahub-frontend/build.gradle @@ -2,6 +2,7 @@ plugins { id "io.github.kobylynskyi.graphql.codegen" version "4.1.1" id 'scala' id 'com.palantir.docker' + id 'org.gradle.playframework' } apply from: "../gradle/versioning/versioning.gradle" @@ -20,7 +21,6 @@ model { } task myTar(type: Tar) { - extension = "tgz" compression = Compression.GZIP from("${buildDir}/stage") @@ -119,3 +119,23 @@ task cleanLocalDockerImages { } } dockerClean.finalizedBy(cleanLocalDockerImages) + +// gradle 8 fixes +tasks.getByName('createDatahub-frontendTarDist').dependsOn 'stageMainDist' +tasks.getByName('createDatahub-frontendZipDist').dependsOn 'stageMainDist' +stagePlayBinaryDist.dependsOn tasks.getByName('createDatahub-frontendStartScripts') +playBinaryDistTar.dependsOn tasks.getByName('createDatahub-frontendStartScripts') +playBinaryDistZip.dependsOn tasks.getByName('createDatahub-frontendStartScripts') +tasks.getByName('stageDatahub-frontendDist').dependsOn stagePlayBinaryDist +tasks.getByName('stageDatahub-frontendDist').dependsOn createPlayBinaryStartScripts +tasks.getByName('datahub-frontendDistTar').dependsOn createPlayBinaryStartScripts +tasks.getByName('datahub-frontendDistTar').dependsOn createMainStartScripts +tasks.getByName('datahub-frontendDistZip').dependsOn createPlayBinaryStartScripts +tasks.getByName('datahub-frontendDistZip').dependsOn createMainStartScripts +playBinaryDistTar.dependsOn createMainStartScripts +playBinaryDistZip.dependsOn createMainStartScripts +createMainStartScripts.dependsOn 'stageDatahub-frontendDist' +createPlayBinaryTarDist.dependsOn 'stageDatahub-frontendDist' +createPlayBinaryZipDist.dependsOn 'stageDatahub-frontendDist' +createPlayBinaryTarDist.dependsOn 'stageMainDist' +createPlayBinaryZipDist.dependsOn 'stageMainDist' diff --git a/datahub-frontend/play.gradle b/datahub-frontend/play.gradle index dd1ceee411f74..84fb4c02620b8 100644 --- a/datahub-frontend/play.gradle +++ b/datahub-frontend/play.gradle @@ -1,4 +1,3 @@ -apply plugin: "org.gradle.playframework" // Change this to listen on a different port project.ext.httpPort = 9001 @@ -101,4 +100,22 @@ play { test { useJUnitPlatform() + + def playJava17CompatibleJvmArgs = [ + "--add-opens=java.base/java.lang=ALL-UNNAMED", + //"--add-opens=java.base/java.lang.invoke=ALL-UNNAMED", + //"--add-opens=java.base/java.lang.reflect=ALL-UNNAMED", + //"--add-opens=java.base/java.io=ALL-UNNAMED", + //"--add-opens=java.base/java.net=ALL-UNNAMED", + //"--add-opens=java.base/java.nio=ALL-UNNAMED", + "--add-opens=java.base/java.util=ALL-UNNAMED", + //"--add-opens=java.base/java.util.concurrent=ALL-UNNAMED", + //"--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED", + //"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED", + //"--add-opens=java.base/sun.nio.cs=ALL-UNNAMED", + //"--add-opens=java.base/sun.security.action=ALL-UNNAMED", + //"--add-opens=java.base/sun.util.calendar=ALL-UNNAMED", + //"--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED", + ] + jvmArgs = playJava17CompatibleJvmArgs } diff --git a/datahub-graphql-core/build.gradle b/datahub-graphql-core/build.gradle index fba0031351b58..6e8cb93966922 100644 --- a/datahub-graphql-core/build.gradle +++ b/datahub-graphql-core/build.gradle @@ -1,7 +1,8 @@ plugins { + id 'java' id "io.github.kobylynskyi.graphql.codegen" version "4.1.1" } -apply plugin: 'java' + dependencies { implementation project(':metadata-service:restli-client') diff --git a/datahub-web-react/build.gradle b/datahub-web-react/build.gradle index fd36e5ac4bc2c..72821d8b97dc0 100644 --- a/datahub-web-react/build.gradle +++ b/datahub-web-react/build.gradle @@ -1,8 +1,8 @@ plugins { id 'java' + id 'distribution' + id 'com.github.node-gradle.node' } -apply plugin: 'distribution' -apply plugin: 'com.github.node-gradle.node' node { @@ -35,7 +35,7 @@ node { yarnWorkDir = file("${project.projectDir}/.gradle/yarn") // Set the work directory where node_modules should be located - nodeModulesDir = file("${project.projectDir}") + nodeProjectDir = file("${project.projectDir}") } @@ -94,7 +94,7 @@ configurations { distZip { dependsOn yarnQuickBuild - baseName 'datahub-web-react' + archiveFileName = "datahub-web-react-${archiveVersion}.${archiveExtension}" from 'dist' } @@ -112,5 +112,5 @@ jar { into('public') { from zipTree(distZip.outputs.files.first()) } - classifier = 'assets' + archiveClassifier = 'assets' } diff --git a/docker/datahub-frontend/Dockerfile b/docker/datahub-frontend/Dockerfile index 0c4c229af34f0..17d691177aa34 100644 --- a/docker/datahub-frontend/Dockerfile +++ b/docker/datahub-frontend/Dockerfile @@ -17,7 +17,7 @@ RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ && apk --no-cache add curl sqlite libc6-compat java-snappy \ - && apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ + && apk --no-cache add openjdk17-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ && apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/ ENV LD_LIBRARY_PATH="/lib:/lib64" @@ -25,7 +25,10 @@ ENV LD_LIBRARY_PATH="/lib:/lib64" FROM base as prod-install COPY ./datahub-frontend.zip / -RUN unzip datahub-frontend.zip && rm datahub-frontend.zip +RUN unzip datahub-frontend.zip -d /datahub-frontend \ + && mv /datahub-frontend/main/* /datahub-frontend \ + && rmdir /datahub-frontend/main \ + && rm datahub-frontend.zip COPY ./docker/monitoring/client-prometheus-config.yaml /datahub-frontend/ RUN chown -R datahub:datahub /datahub-frontend && chmod 755 /datahub-frontend diff --git a/docker/datahub-frontend/start.sh b/docker/datahub-frontend/start.sh index 12e6b8915096d..f5de9c87968b0 100755 --- a/docker/datahub-frontend/start.sh +++ b/docker/datahub-frontend/start.sh @@ -49,6 +49,8 @@ export JAVA_OPTS="${JAVA_MEMORY_OPTS:-"-Xms512m -Xmx1024m"} \ -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf \ -Dlogback.configurationFile=datahub-frontend/conf/logback.xml \ -Dlogback.debug=false \ + --add-opens java.base/java.lang=ALL-UNNAMED \ + --add-opens=java.base/java.util=ALL-UNNAMED \ ${PROMETHEUS_AGENT:-} ${OTEL_AGENT:-} \ ${TRUSTSTORE_FILE:-} ${TRUSTSTORE_TYPE:-} ${TRUSTSTORE_PASSWORD:-} \ ${HTTP_PROXY:-} ${HTTPS_PROXY:-} ${NO_PROXY:-} \ diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile index 9c79e1da542f0..b26a02c1d3b15 100644 --- a/docker/datahub-gms/Dockerfile +++ b/docker/datahub-gms/Dockerfile @@ -40,14 +40,14 @@ RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ && apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat java-snappy \ - && apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ + && apk --no-cache add openjdk17-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ && apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/ \ && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-runner/9.4.46.v20220331/jetty-runner-9.4.46.v20220331.jar --output jetty-runner.jar \ && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-jmx/9.4.46.v20220331/jetty-jmx-9.4.46.v20220331.jar --output jetty-jmx.jar \ && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-util/9.4.46.v20220331/jetty-util-9.4.46.v20220331.jar --output jetty-util.jar \ && wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ && wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ - && cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks + && cp /usr/lib/jvm/java-17-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin ENV LD_LIBRARY_PATH="/lib:/lib64" diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle index 52db594e2ef85..36444210f1938 100644 --- a/docker/datahub-ingestion/build.gradle +++ b/docker/datahub-ingestion/build.gradle @@ -45,9 +45,9 @@ docker { buildArgs(dockerBuildArgs) } -tasks.getByName('docker').dependsOn(['build', - ':docker:datahub-ingestion-base:docker', - ':metadata-ingestion:codegen']) +tasks.getByName('dockerPrepare').dependsOn(['build', + ':docker:datahub-ingestion-base:docker', + ':metadata-ingestion:codegen']) task mkdirBuildDocker { doFirst { diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile index 5bfa5f35ace17..9b7c6e762462e 100644 --- a/docker/datahub-mae-consumer/Dockerfile +++ b/docker/datahub-mae-consumer/Dockerfile @@ -38,11 +38,11 @@ ENV JMX_VERSION=0.18.0 # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ && apk --no-cache add curl bash coreutils sqlite libc6-compat java-snappy \ - && apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ + && apk --no-cache add openjdk17-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ && apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/ \ && wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ && wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ - && cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks + && cp /usr/lib/jvm/java-17-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin ENV LD_LIBRARY_PATH="/lib:/lib64" diff --git a/docker/datahub-mce-consumer/Dockerfile b/docker/datahub-mce-consumer/Dockerfile index cc79a3072c193..4da94794e0ead 100644 --- a/docker/datahub-mce-consumer/Dockerfile +++ b/docker/datahub-mce-consumer/Dockerfile @@ -38,11 +38,11 @@ ENV JMX_VERSION=0.18.0 # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ && apk --no-cache add curl bash sqlite libc6-compat java-snappy \ - && apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ + && apk --no-cache add openjdk17-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ && apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/ \ && wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ && wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ - && cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks + && cp /usr/lib/jvm/java-17-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin FROM base as prod-install diff --git a/docker/datahub-upgrade/Dockerfile b/docker/datahub-upgrade/Dockerfile index 2beb5b54dac38..00dae87dfc3de 100644 --- a/docker/datahub-upgrade/Dockerfile +++ b/docker/datahub-upgrade/Dockerfile @@ -38,13 +38,13 @@ ENV JMX_VERSION=0.18.0 # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ && apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat java-snappy \ - && apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ + && apk --no-cache add openjdk17-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-runner/9.4.46.v20220331/jetty-runner-9.4.46.v20220331.jar --output jetty-runner.jar \ && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-jmx/9.4.46.v20220331/jetty-jmx-9.4.46.v20220331.jar --output jetty-jmx.jar \ && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-util/9.4.46.v20220331/jetty-util-9.4.46.v20220331.jar --output jetty-util.jar \ && wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ && wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ - && cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks + && cp /usr/lib/jvm/java-17-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin ENV LD_LIBRARY_PATH="/lib:/lib64" diff --git a/docker/kafka-setup/Dockerfile b/docker/kafka-setup/Dockerfile index f6a4b62a79356..53353863b6e5f 100644 --- a/docker/kafka-setup/Dockerfile +++ b/docker/kafka-setup/Dockerfile @@ -31,7 +31,7 @@ LABEL name="kafka" version=${KAFKA_VERSION} RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi RUN apk add --no-cache bash coreutils -RUN apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community +RUN apk --no-cache add openjdk17-jre-headless --repository=${ALPINE_REPO_URL}/edge/community RUN apk add --no-cache -t .build-deps git curl ca-certificates jq gcc musl-dev libffi-dev zip RUN mkdir -p /opt \ diff --git a/docs-website/build.gradle b/docs-website/build.gradle index a213ec1ae8194..2644491a2a5f8 100644 --- a/docs-website/build.gradle +++ b/docs-website/build.gradle @@ -1,5 +1,7 @@ -apply plugin: 'distribution' -apply plugin: 'com.github.node-gradle.node' +plugins { + id 'distribution' + id 'com.github.node-gradle.node' +} node { @@ -12,10 +14,10 @@ node { } // Version of node to use. - version = '16.16.0' + version = '21.2.0' // Version of Yarn to use. - yarnVersion = '1.22.0' + yarnVersion = '1.22.1' // Base URL for fetching node distributions (set nodeDistBaseUrl if you have a mirror). if (project.hasProperty('nodeDistBaseUrl')) { @@ -31,7 +33,7 @@ node { yarnWorkDir = file("${project.projectDir}/.gradle/yarn") // Set the work directory where node_modules should be located - nodeModulesDir = file("${project.projectDir}") + nodeProjectDir = file("${project.projectDir}") } /* @@ -122,7 +124,11 @@ task yarnBuild(type: YarnTask, dependsOn: [yarnLint, yarnGenerate, downloadHisto // See https://stackoverflow.com/questions/53230823/fatal-error-ineffective-mark-compacts-near-heap-limit-allocation-failed-java // and https://github.com/facebook/docusaurus/issues/8329. // TODO: As suggested in https://github.com/facebook/docusaurus/issues/4765, try switching to swc-loader. - environment = ['NODE_OPTIONS': '--max-old-space-size=10248'] + if (project.hasProperty('useSystemNode') && project.getProperty('useSystemNode').toBoolean()) { + environment = ['NODE_OPTIONS': '--max-old-space-size=10248'] + } else { + environment = ['NODE_OPTIONS': '--max-old-space-size=10248 --openssl-legacy-provider'] + } args = ['run', 'build'] } diff --git a/docs-website/vercel-setup.sh b/docs-website/vercel-setup.sh index db532e167b59f..915635b24ee88 100755 --- a/docs-website/vercel-setup.sh +++ b/docs-website/vercel-setup.sh @@ -12,7 +12,7 @@ set -euxo pipefail yum groupinstall "Development Tools" -y yum erase openssl-devel -y -yum install openssl11 openssl11-devel libffi-devel bzip2-devel wget -y +yum install openssl11 openssl11-devel libffi-devel bzip2-devel wget nodejs -y wget https://www.python.org/ftp/python/3.10.11/Python-3.10.11.tgz tar -xf Python-3.10.11.tgz diff --git a/docs/developers.md b/docs/developers.md index c3c3a59283e66..60d31f5e4523f 100644 --- a/docs/developers.md +++ b/docs/developers.md @@ -6,16 +6,12 @@ title: "Local Development" ## Requirements -- Both [Java 11 JDK](https://openjdk.org/projects/jdk/11/) and [Java 8 JDK](https://openjdk.java.net/projects/jdk8/) +- [Java 17 JDK](https://openjdk.org/projects/jdk/17/) - [Python 3.10](https://www.python.org/downloads/release/python-3100/) - [Docker](https://www.docker.com/) - [Docker Compose](https://docs.docker.com/compose/) - Docker engine with at least 8GB of memory to run tests. -:::caution - -Do not try to use a JDK newer than JDK 11. The build process does not currently work with newer JDKs versions. - ::: On macOS, these can be installed using [Homebrew](https://brew.sh/). @@ -147,11 +143,11 @@ You're probably using a Java version that's too new for gradle. Run the followin java --version ``` -While it may be possible to build and run DataHub using newer versions of Java, we currently only support [Java 11](https://openjdk.org/projects/jdk/11/) (aka Java 11). +While it may be possible to build and run DataHub using newer versions of Java, we currently only support [Java 17](https://openjdk.org/projects/jdk/17/) (aka Java 17). #### Getting `cannot find symbol` error for `javax.annotation.Generated` -Similar to the previous issue, please use Java 1.8 to build the project. +Similar to the previous issue, please use Java 17 to build the project. You can install multiple version of Java on a single machine and switch between them using the `JAVA_HOME` environment variable. See [this document](https://docs.oracle.com/cd/E21454_01/html/821-2531/inst_jdk_javahome_t.html) for more details. #### `:metadata-models:generateDataTemplate` task fails with `java.nio.file.InvalidPathException: Illegal char <:> at index XX` or `Caused by: java.lang.IllegalArgumentException: 'other' has different root` error diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 36be572f2886e..61ad2d623d72a 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -7,11 +7,15 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes - Updating MySQL version for quickstarts to 8.2, may cause quickstart issues for existing instances. +- Neo4j 5.x, may require migration from 4.x +- Build now requires JDK17 (Runtime Java 11) ### Potential Downtime ### Deprecations +- Spark 2.x (including previous JDK8 build requirements) + ### Other Notable Changes ## 0.12.1 diff --git a/docs/troubleshooting/build.md b/docs/troubleshooting/build.md index 112bcdc47e956..7b4ae98cdb03b 100644 --- a/docs/troubleshooting/build.md +++ b/docs/troubleshooting/build.md @@ -10,11 +10,11 @@ You're probably using a Java version that's too new for gradle. Run the followin java --version ``` -While it may be possible to build and run DataHub using newer versions of Java, we currently only support [Java 11](https://openjdk.org/projects/jdk/11/) (aka Java 11). +While it may be possible to build and run DataHub using newer versions of Java, we currently only support [Java 17](https://openjdk.org/projects/jdk/17/) (aka Java 17). ## Getting `cannot find symbol` error for `javax.annotation.Generated` -Similar to the previous issue, please use Java 1.8 to build the project. +Similar to the previous issue, please use Java 17 to build the project. You can install multiple version of Java on a single machine and switch between them using the `JAVA_HOME` environment variable. See [this document](https://docs.oracle.com/cd/E21454_01/html/821-2531/inst_jdk_javahome_t.html) for more details. ## `:metadata-models:generateDataTemplate` task fails with `java.nio.file.InvalidPathException: Illegal char <:> at index XX` or `Caused by: java.lang.IllegalArgumentException: 'other' has different root` error diff --git a/entity-registry/build.gradle b/entity-registry/build.gradle index 3da0bf5bb4fb8..77cca24c0e723 100644 --- a/entity-registry/build.gradle +++ b/entity-registry/build.gradle @@ -1,10 +1,13 @@ -apply plugin: 'pegasus' -apply plugin: 'java-library' +plugins { + id 'pegasus' + id 'java-library' +} dependencies { implementation spec.product.pegasus.data implementation spec.product.pegasus.generator api project(path: ':metadata-models') + api project(path: ':metadata-models', configuration: "dataTemplate") implementation externalDependency.slf4jApi compileOnly externalDependency.lombok implementation externalDependency.guava diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 4e86b9270786f..bdc9a83b1e652 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.6.2-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.2-bin.zip networkTimeout=10000 zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/li-utils/build.gradle b/li-utils/build.gradle index 1d5222e39185a..975cd2bccccf3 100644 --- a/li-utils/build.gradle +++ b/li-utils/build.gradle @@ -1,17 +1,9 @@ -apply plugin: 'java-library' -apply plugin: 'pegasus' - -tasks.withType(JavaCompile).configureEach { - javaCompiler = javaToolchains.compilerFor { - languageVersion = JavaLanguageVersion.of(8) - } -} -tasks.withType(Test).configureEach { - javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(8) - } +plugins { + id 'java-library' + id 'pegasus' } + dependencies { api spec.product.pegasus.data implementation externalDependency.commonsLang @@ -28,7 +20,7 @@ dependencies { testImplementation externalDependency.commonsIo testImplementation project(':test-models') testImplementation project(path: ':test-models', configuration: 'testDataTemplate') - testImplementation externalDependency.testngJava8 + testImplementation externalDependency.testng } idea { @@ -38,4 +30,4 @@ idea { } // Need to compile backing java parameterDefinitions with the data template. -sourceSets.mainGeneratedDataTemplate.java.srcDirs('src/main/javaPegasus/') \ No newline at end of file +sourceSets.mainGeneratedDataTemplate.java.srcDirs('src/main/javaPegasus/') diff --git a/metadata-auth/auth-api/build.gradle b/metadata-auth/auth-api/build.gradle index 7159aa5f15e61..c68c3019bd2b4 100644 --- a/metadata-auth/auth-api/build.gradle +++ b/metadata-auth/auth-api/build.gradle @@ -15,13 +15,12 @@ test { } jar { - archiveName = "$project.name-lib.jar" + archiveClassifier = "lib" } shadowJar { zip64 true - classifier = null - archiveName = "$project.name-${version}.jar" + archiveClassifier = "" exclude "META-INF/*.RSA", "META-INF/*.SF","META-INF/*.DSA" } @@ -39,12 +38,12 @@ dependencies() { } task sourcesJar(type: Jar) { - classifier 'sources' + archiveClassifier = 'sources' from sourceSets.main.allJava } task javadocJar(type: Jar, dependsOn: javadoc) { - classifier 'javadoc' + archiveClassifier = 'javadoc' from javadoc.destinationDir } diff --git a/metadata-events/mxe-utils-avro/build.gradle b/metadata-events/mxe-utils-avro/build.gradle index 3493797ab4f97..98bfb9127b209 100644 --- a/metadata-events/mxe-utils-avro/build.gradle +++ b/metadata-events/mxe-utils-avro/build.gradle @@ -1,8 +1,11 @@ -apply plugin: 'java-library' +plugins { + id 'java-library' +} dependencies { api project(':metadata-events:mxe-avro') api project(':metadata-models') + api project(path: ':metadata-models', configuration: "dataTemplate") api spec.product.pegasus.dataAvro testImplementation externalDependency.testng diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle index 7ae01faaaabdd..b14953d7ce021 100644 --- a/metadata-integration/java/datahub-client/build.gradle +++ b/metadata-integration/java/datahub-client/build.gradle @@ -14,19 +14,9 @@ import org.apache.tools.ant.filters.ReplaceTokens jar.enabled = false // Since we only want to build shadow jars, disabling the regular jar creation -tasks.withType(JavaCompile).configureEach { - javaCompiler = javaToolchains.compilerFor { - languageVersion = JavaLanguageVersion.of(8) - } -} -tasks.withType(Test).configureEach { - javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(8) - } -} - dependencies { implementation project(':metadata-models') + implementation project(path: ':metadata-models', configuration: "dataTemplate") implementation(externalDependency.kafkaAvroSerializer) { exclude group: "org.apache.avro" } @@ -49,7 +39,7 @@ dependencies { annotationProcessor externalDependency.lombok // VisibleForTesting compileOnly externalDependency.guava - testImplementation externalDependency.testngJava8 + testImplementation externalDependency.testng testImplementation externalDependency.mockito testImplementation externalDependency.mockServer testImplementation externalDependency.mockServerClient @@ -241,4 +231,4 @@ sourceSets.main.resources.srcDir "${generateOpenApiPojos.outputDir}/src/main/res clean { project.delete("$projectDir/generated") -} +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-protobuf-example/build.gradle b/metadata-integration/java/datahub-protobuf-example/build.gradle index 4e53d8ed763ba..1efb43360457a 100644 --- a/metadata-integration/java/datahub-protobuf-example/build.gradle +++ b/metadata-integration/java/datahub-protobuf-example/build.gradle @@ -64,10 +64,6 @@ protobuf { task publishSchema(dependsOn: build) { description "Publishes protobuf schema in the `main` sourceSet to DataHub" - def javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(11) - } - fileTree("schema").matching { exclude "protobuf/meta/**" }.each {f -> diff --git a/metadata-integration/java/datahub-protobuf/build.gradle b/metadata-integration/java/datahub-protobuf/build.gradle index bc919119f8fac..2cb36a14cb9c7 100644 --- a/metadata-integration/java/datahub-protobuf/build.gradle +++ b/metadata-integration/java/datahub-protobuf/build.gradle @@ -12,12 +12,6 @@ apply from: '../versioning.gradle' jar.enabled = false // Since we only want to build shadow jars, disabling the regular jar creation -afterEvaluate { - if (project.plugins.hasPlugin('java')) { - sourceCompatibility = 11 - targetCompatibility = 11 - } -} ext { javaMainClass = "datahub.protobuf.Proto2DataHub" } @@ -211,4 +205,4 @@ nexusStaging { password = System.getenv("NEXUS_PASSWORD") } - +startScripts.dependsOn shadowJar \ No newline at end of file diff --git a/metadata-integration/java/examples/build.gradle b/metadata-integration/java/examples/build.gradle index 581e9f82da0dc..ddf574e8c8905 100644 --- a/metadata-integration/java/examples/build.gradle +++ b/metadata-integration/java/examples/build.gradle @@ -1,16 +1,6 @@ -apply plugin: 'java' -apply plugin: 'jacoco' - - -tasks.withType(JavaCompile).configureEach { - javaCompiler = javaToolchains.compilerFor { - languageVersion = JavaLanguageVersion.of(8) - } -} -tasks.withType(Test).configureEach { - javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(8) - } +plugins { + id 'java' + id 'jacoco' } dependencies { diff --git a/metadata-integration/java/spark-lineage/build.gradle b/metadata-integration/java/spark-lineage/build.gradle index 7143ac4833143..c5dd9b5012c29 100644 --- a/metadata-integration/java/spark-lineage/build.gradle +++ b/metadata-integration/java/spark-lineage/build.gradle @@ -11,17 +11,6 @@ apply from: '../versioning.gradle' jar.enabled = false // Since we only want to build shadow jars, disabling the regular jar creation -tasks.withType(JavaCompile).configureEach { - javaCompiler = javaToolchains.compilerFor { - languageVersion = JavaLanguageVersion.of(8) - } -} -tasks.withType(Test).configureEach { - javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(8) - } -} - //to rename artifacts for publish project.archivesBaseName = 'datahub-'+project.name @@ -34,18 +23,19 @@ configurations { dependencies { - //Needed for tie breaking of guava version need for spark and wiremock - provided(externalDependency.hadoopMapreduceClient) { - force = true + constraints { + provided(externalDependency.hadoopMapreduceClient) { + because 'Needed for tie breaking of guava version need for spark and wiremock' + } + provided(externalDependency.hadoopCommon) { + because 'required for org.apache.hadoop.util.StopWatch' + } + provided(externalDependency.commonsIo) { + because 'required for org.apache.commons.io.Charsets that is used internally' + } } - provided(externalDependency.hadoopCommon) { - force = true - } // required for org.apache.hadoop.util.StopWatch - - provided(externalDependency.commonsIo) { - force = true - } // required for org.apache.commons.io.Charsets that is used internally + provided 'org.scala-lang:scala-library:2.12.18' implementation externalDependency.slf4jApi compileOnly externalDependency.lombok @@ -86,7 +76,7 @@ task checkShadowJar(type: Exec) { shadowJar { zip64=true - classifier='' + archiveClassifier = '' mergeServiceFiles() def exclude_modules = project @@ -107,7 +97,7 @@ shadowJar { // preventing java multi-release JAR leakage // https://github.com/johnrengelman/shadow/issues/729 - exclude('module-info.class', 'META-INF/versions/**') + exclude('module-info.class', 'META-INF/versions/**', 'LICENSE', 'NOTICE') // prevent jni conflict with spark exclude '**/libzstd-jni.*' @@ -138,6 +128,25 @@ jacocoTestReport { test { forkEvery = 1 useJUnit() + + def sparkJava17CompatibleJvmArgs = [ + "--add-opens=java.base/java.lang=ALL-UNNAMED", + //"--add-opens=java.base/java.lang.invoke=ALL-UNNAMED", + //"--add-opens=java.base/java.lang.reflect=ALL-UNNAMED", + //"--add-opens=java.base/java.io=ALL-UNNAMED", + "--add-opens=java.base/java.net=ALL-UNNAMED", + "--add-opens=java.base/java.nio=ALL-UNNAMED", + //"--add-opens=java.base/java.util=ALL-UNNAMED", + //"--add-opens=java.base/java.util.concurrent=ALL-UNNAMED", + //"--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED", + "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED", + //"--add-opens=java.base/sun.nio.cs=ALL-UNNAMED", + //"--add-opens=java.base/sun.security.action=ALL-UNNAMED", + //"--add-opens=java.base/sun.util.calendar=ALL-UNNAMED", + //"--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED", + ] + jvmArgs = sparkJava17CompatibleJvmArgs + finalizedBy jacocoTestReport } @@ -151,12 +160,12 @@ task integrationTest(type: Exec, dependsOn: [shadowJar, ':docker:quickstartSlim' } task sourcesJar(type: Jar) { - classifier 'sources' + archiveClassifier = 'sources' from sourceSets.main.allJava } task javadocJar(type: Jar, dependsOn: javadoc) { - classifier 'javadoc' + archiveClassifier = 'javadoc' from javadoc.destinationDir } @@ -224,3 +233,12 @@ nexusStaging { username = System.getenv("NEXUS_USERNAME") password = System.getenv("NEXUS_PASSWORD") } + +task cleanExtraDirs { + delete "$projectDir/derby.log" + delete "$projectDir/src/test/resources/data/hive" + delete "$projectDir/src/test/resources/data/out.csv" + delete "$projectDir/src/test/resources/data/out_persist.csv" + delete "$projectDir/spark-smoke-test/venv" +} +clean.finalizedBy(cleanExtraDirs) diff --git a/metadata-integration/java/spark-lineage/scripts/check_jar.sh b/metadata-integration/java/spark-lineage/scripts/check_jar.sh index dd9cae68f31cb..275b91304e7ee 100755 --- a/metadata-integration/java/spark-lineage/scripts/check_jar.sh +++ b/metadata-integration/java/spark-lineage/scripts/check_jar.sh @@ -34,7 +34,9 @@ jar -tvf $jarFile |\ grep -v "linux/" |\ grep -v "darwin" |\ grep -v "MetadataChangeProposal.avsc" |\ - grep -v "aix" + grep -v "aix" |\ + grep -v "library.properties" |\ + grep -v "rootdoc.txt" if [ $? -ne 0 ]; then echo "✅ No unexpected class paths found in ${jarFile}" diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/docker/SparkBase.Dockerfile b/metadata-integration/java/spark-lineage/spark-smoke-test/docker/SparkBase.Dockerfile index 119338be6c2a9..21d0701fcfcd6 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/docker/SparkBase.Dockerfile +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/docker/SparkBase.Dockerfile @@ -17,7 +17,7 @@ RUN apt-get update -y && \ apt-get install /tmp/zulu-repo_1.0.0-3_all.deb && \ apt-get update && \ # apt-cache search zulu && \ - apt-get install -y --no-install-recommends zulu11-jre && \ + apt-get install -y --no-install-recommends zulu17-jre && \ apt-get clean && \ curl -sS https://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz -o spark.tgz && \ tar -xf spark.tgz && \ diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/python-spark-lineage-test/python_test_run.sh b/metadata-integration/java/spark-lineage/spark-smoke-test/python-spark-lineage-test/python_test_run.sh index 429f692500c80..c06e2faec0bcb 100755 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/python-spark-lineage-test/python_test_run.sh +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/python-spark-lineage-test/python_test_run.sh @@ -7,25 +7,24 @@ saluation () { echo "--------------------------------------------------------" - echo "Starting execution $1" + echo "Starting execution $1 (properties: $2)" echo "--------------------------------------------------------" } -saluation "HdfsIn2HdfsOut1.py" - +saluation "HdfsIn2HdfsOut1.py" $2 spark-submit --properties-file $2 HdfsIn2HdfsOut1.py -saluation "HdfsIn2HdfsOut2.py" +saluation "HdfsIn2HdfsOut2.py" $2 spark-submit --properties-file $2 HdfsIn2HdfsOut2.py -saluation "HdfsIn2HiveCreateTable.py" +saluation "HdfsIn2HiveCreateTable.py" $2 spark-submit --properties-file $2 HdfsIn2HiveCreateTable.py -saluation "HdfsIn2HiveCreateInsertTable.py" +saluation "HdfsIn2HiveCreateInsertTable.py" $2 spark-submit --properties-file $2 HdfsIn2HiveCreateInsertTable.py -saluation "HiveInHiveOut.py" +saluation "HiveInHiveOut.py" $2 spark-submit --properties-file $2 HiveInHiveOut.py diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/spark-docker.conf b/metadata-integration/java/spark-lineage/spark-smoke-test/spark-docker.conf index 43103c3db65ad..a511d9f114f2b 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/spark-docker.conf +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/spark-docker.conf @@ -4,3 +4,7 @@ spark.jars file:///opt/workspace/datahub-spark-lineage*.jar spark.extraListeners datahub.spark.DatahubSparkListener spark.datahub.rest.server http://datahub-gms:8080 + +spark.driver.extraJavaOptions --add-opens java.base/java.lang=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED +spark.executor.extraJavaOptions --add-opens java.base/java.lang=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED + diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/build.gradle b/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/build.gradle index 12aa1775d6104..6337f8c9beec6 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/build.gradle +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/build.gradle @@ -17,17 +17,6 @@ repositories { jcenter() } -tasks.withType(JavaCompile).configureEach { - javaCompiler = javaToolchains.compilerFor { - languageVersion = JavaLanguageVersion.of(8) - } -} -tasks.withType(Test).configureEach { - javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(8) - } -} - dependencies { implementation 'org.apache.spark:spark-sql_2.11:2.4.8' } diff --git a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestCoalesceJobLineage.java b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestCoalesceJobLineage.java index 2df468fc03e74..053055716eaa0 100644 --- a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestCoalesceJobLineage.java +++ b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestCoalesceJobLineage.java @@ -37,7 +37,7 @@ public class TestCoalesceJobLineage { private static final String APP_NAME = "sparkCoalesceTestApp"; - private static final String TEST_RELATIVE_PATH = "../"; + private static final String TEST_RELATIVE_PATH = ""; private static final String RESOURCE_DIR = "src/test/resources"; private static final String DATA_DIR = TEST_RELATIVE_PATH + RESOURCE_DIR + "/data"; private static final String WAREHOUSE_LOC = DATA_DIR + "/hive/warehouse/coalesce"; @@ -142,6 +142,9 @@ public void setup() { "spark.datahub.parent.datajob_urn", "urn:li:dataJob:(urn:li:dataFlow:(airflow,datahub_analytics_refresh,prod),load_dashboard_info_to_snowflake)") .config("spark.sql.warehouse.dir", new File(WAREHOUSE_LOC).getAbsolutePath()) + .config( + "javax.jdo.option.ConnectionURL", + "jdbc:derby:;databaseName=build/tmp/metastore_db_coalesce;create=true") .enableHiveSupport() .getOrCreate(); diff --git a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java index 3a70c10e0c1f9..fa896814d16f6 100644 --- a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java +++ b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java @@ -191,6 +191,9 @@ public static void setup() { .config("spark.datahub.metadata.dataset.platformInstance", DATASET_PLATFORM_INSTANCE) .config("spark.datahub.metadata.dataset.env", DATASET_ENV.name()) .config("spark.sql.warehouse.dir", new File(WAREHOUSE_LOC).getAbsolutePath()) + .config( + "javax.jdo.option.ConnectionURL", + "jdbc:derby:;databaseName=build/tmp/metastore_db_spark;create=true") .enableHiveSupport() .getOrCreate(); diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 48f80f06d07c2..568b99acdf894 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -62,7 +62,10 @@ dependencies { testImplementation externalDependency.h2 testImplementation externalDependency.mysqlConnector testImplementation externalDependency.neo4jHarness - testImplementation (externalDependency.neo4jApoc) { + testImplementation (externalDependency.neo4jApocCore) { + exclude group: 'org.yaml', module: 'snakeyaml' + } + testImplementation (externalDependency.neo4jApocCommon) { exclude group: 'org.yaml', module: 'snakeyaml' } testImplementation externalDependency.mockito diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java index 217d54c5c0b0f..c8d3147711eba 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java @@ -432,8 +432,8 @@ private Pair> generateLineageStatementAndParameters( + "(b)) " + "WHERE a <> b " + " AND ALL(rt IN relationships(path) WHERE " - + " (EXISTS(rt.source) AND rt.source = 'UI') OR " - + " (NOT EXISTS(rt.createdOn) AND NOT EXISTS(rt.updatedOn)) OR " + + " (rt.source IS NOT NULL AND rt.source = 'UI') OR " + + " (rt.createdOn IS NULL AND rt.updatedOn IS NULL) OR " + " ($startTimeMillis <= rt.createdOn <= $endTimeMillis OR " + " $startTimeMillis <= rt.updatedOn <= $endTimeMillis) " + " ) " diff --git a/metadata-jobs/mae-consumer/build.gradle b/metadata-jobs/mae-consumer/build.gradle index fcb8b62e4ac9d..2e068d5a3501e 100644 --- a/metadata-jobs/mae-consumer/build.gradle +++ b/metadata-jobs/mae-consumer/build.gradle @@ -60,6 +60,7 @@ task avroSchemaSources(type: Copy) { } compileJava.dependsOn avroSchemaSources +processResources.dependsOn avroSchemaSources clean { project.delete("src/main/resources/avro") diff --git a/metadata-jobs/mce-consumer/build.gradle b/metadata-jobs/mce-consumer/build.gradle index 97eec9fcff051..5fa65c06de714 100644 --- a/metadata-jobs/mce-consumer/build.gradle +++ b/metadata-jobs/mce-consumer/build.gradle @@ -1,8 +1,8 @@ plugins { id 'java' + id 'pegasus' } -apply plugin: 'pegasus' configurations { avro @@ -49,6 +49,7 @@ task avroSchemaSources(type: Copy) { } compileJava.dependsOn avroSchemaSources +processResources.dependsOn avroSchemaSources clean { project.delete("src/main/resources/avro") diff --git a/metadata-jobs/pe-consumer/build.gradle b/metadata-jobs/pe-consumer/build.gradle index 81e8b8c9971f0..2fd19af92971e 100644 --- a/metadata-jobs/pe-consumer/build.gradle +++ b/metadata-jobs/pe-consumer/build.gradle @@ -1,7 +1,7 @@ plugins { id 'java' + id 'pegasus' } -apply plugin: 'pegasus' configurations { avro @@ -37,6 +37,7 @@ task avroSchemaSources(type: Copy) { } compileJava.dependsOn avroSchemaSources +processResources.dependsOn avroSchemaSources clean { project.delete("src/main/resources/avro") diff --git a/metadata-models-custom/build.gradle b/metadata-models-custom/build.gradle index 71d3b0fd1f736..3ac08dca7c0db 100644 --- a/metadata-models-custom/build.gradle +++ b/metadata-models-custom/build.gradle @@ -16,8 +16,8 @@ buildscript { plugins { id 'base' id 'maven-publish' + id 'pegasus' } -apply plugin: 'pegasus' if (project.hasProperty('projVersion')) { project.version = project.projVersion diff --git a/metadata-models-validator/build.gradle b/metadata-models-validator/build.gradle index c8d1d2e6651d6..1dae53e817ae1 100644 --- a/metadata-models-validator/build.gradle +++ b/metadata-models-validator/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java' +plugins { + id 'java' +} dependencies { implementation project(":entity-registry") diff --git a/metadata-models/build.gradle b/metadata-models/build.gradle index e90a4042c1921..04c90fa444f0c 100644 --- a/metadata-models/build.gradle +++ b/metadata-models/build.gradle @@ -1,20 +1,12 @@ import io.datahubproject.GenerateJsonSchemaTask -apply plugin: 'java-library' -apply plugin: 'pegasus' -apply plugin: 'org.hidetake.swagger.generator' - -tasks.withType(JavaCompile).configureEach { - javaCompiler = javaToolchains.compilerFor { - languageVersion = JavaLanguageVersion.of(8) - } -} -tasks.withType(Test).configureEach { - javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(8) - } +plugins { + id 'pegasus' + id 'java-library' + id 'org.hidetake.swagger.generator' } + dependencies { api spec.product.pegasus.data constraints { @@ -35,7 +27,7 @@ dependencies { swaggerCodegen externalDependency.swaggerCli testImplementation externalDependency.guava - testImplementation externalDependency.testngJava8 + testImplementation externalDependency.testng } sourceSets { diff --git a/metadata-service/auth-config/build.gradle b/metadata-service/auth-config/build.gradle index c7a1128897dd5..8302e3b0c2fe6 100644 --- a/metadata-service/auth-config/build.gradle +++ b/metadata-service/auth-config/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java' +plugins { + id 'java' +} dependencies { implementation project(path: ':metadata-models') diff --git a/metadata-service/auth-filter/build.gradle b/metadata-service/auth-filter/build.gradle index 61e9015adc942..9d763ca11421b 100644 --- a/metadata-service/auth-filter/build.gradle +++ b/metadata-service/auth-filter/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java' +plugins { + id 'java' +} dependencies { implementation project(':metadata-auth:auth-api') diff --git a/metadata-service/auth-impl/build.gradle b/metadata-service/auth-impl/build.gradle index 60d622dea5447..4f4b0658caf24 100644 --- a/metadata-service/auth-impl/build.gradle +++ b/metadata-service/auth-impl/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java' +plugins { + id 'java' +} compileJava { diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java index d5d5b0c4e6c71..f03113f3eb9bd 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java @@ -8,7 +8,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyZeroInteractions; +import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; @@ -68,7 +68,7 @@ public void shouldReturnFieldValueWithResourceSpecIfTypeIsDataPlatformInstance() assertEquals( Set.of(DATA_PLATFORM_INSTANCE_URN), result.getFieldValuesFuture().join().getValues()); - verifyZeroInteractions(entityClientMock); + verifyNoMoreInteractions(entityClientMock); } @Test diff --git a/metadata-service/auth-servlet-impl/build.gradle b/metadata-service/auth-servlet-impl/build.gradle index 7945b3b4e9a06..b8310bbd4ebc0 100644 --- a/metadata-service/auth-servlet-impl/build.gradle +++ b/metadata-service/auth-servlet-impl/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java' +plugins { + id 'java' +} dependencies { implementation project(':metadata-auth:auth-api') diff --git a/metadata-service/factories/build.gradle b/metadata-service/factories/build.gradle index 86644e3b034da..145ec7e65188c 100644 --- a/metadata-service/factories/build.gradle +++ b/metadata-service/factories/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java-library' +plugins { + id 'java-library' +} dependencies { api project(':metadata-io') diff --git a/metadata-service/graphql-servlet-impl/build.gradle b/metadata-service/graphql-servlet-impl/build.gradle index 51f67631159d3..5767698242118 100644 --- a/metadata-service/graphql-servlet-impl/build.gradle +++ b/metadata-service/graphql-servlet-impl/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java' +plugins { + id 'java' +} dependencies { implementation project(':datahub-graphql-core') diff --git a/metadata-service/openapi-servlet/build.gradle b/metadata-service/openapi-servlet/build.gradle index 1909b4862d294..0430d4427528d 100644 --- a/metadata-service/openapi-servlet/build.gradle +++ b/metadata-service/openapi-servlet/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java' +plugins { + id 'java' +} dependencies { diff --git a/metadata-service/plugin/build.gradle b/metadata-service/plugin/build.gradle index 00a6384b923a0..3f91b8f6ae6ba 100644 --- a/metadata-service/plugin/build.gradle +++ b/metadata-service/plugin/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java' +plugins { + id 'java' +} dependencies { @@ -30,4 +32,4 @@ test { clean { dependsOn ':metadata-service:plugin:src:test:sample-test-plugins:clean' -} +} \ No newline at end of file diff --git a/metadata-service/plugin/src/test/sample-test-plugins/build.gradle b/metadata-service/plugin/src/test/sample-test-plugins/build.gradle index f299a35db0f64..d4b2b4c92ad63 100644 --- a/metadata-service/plugin/src/test/sample-test-plugins/build.gradle +++ b/metadata-service/plugin/src/test/sample-test-plugins/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java' +plugins { + id 'java' +} jar { archiveFileName = "sample-plugins.jar" diff --git a/metadata-service/restli-api/build.gradle b/metadata-service/restli-api/build.gradle index 352738d01f8da..505320e8267ee 100644 --- a/metadata-service/restli-api/build.gradle +++ b/metadata-service/restli-api/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'pegasus' +plugins { + id 'pegasus' +} dependencies { dataModel project(':metadata-models') @@ -17,4 +19,4 @@ dependencies { because("CVE-2023-1428, CVE-2023-32731") } } -} \ No newline at end of file +} diff --git a/metadata-service/restli-client/build.gradle b/metadata-service/restli-client/build.gradle index 7cad1981ad911..86336755dc095 100644 --- a/metadata-service/restli-client/build.gradle +++ b/metadata-service/restli-client/build.gradle @@ -1,5 +1,7 @@ -apply plugin: 'pegasus' -apply plugin: 'java-library' +plugins { + id 'pegasus' + id 'java-library' +} dependencies { api project(':metadata-service:restli-api') diff --git a/metadata-service/restli-servlet-impl/build.gradle b/metadata-service/restli-servlet-impl/build.gradle index de6fb6690e693..ec5b645ee233c 100644 --- a/metadata-service/restli-servlet-impl/build.gradle +++ b/metadata-service/restli-servlet-impl/build.gradle @@ -1,5 +1,7 @@ -apply plugin: 'java' -apply plugin: 'pegasus' +plugins { + id 'java' + id 'pegasus' +} sourceSets { integTest { diff --git a/metadata-service/schema-registry-api/build.gradle b/metadata-service/schema-registry-api/build.gradle index 077d7d4f2d6a4..c146d5202fef9 100644 --- a/metadata-service/schema-registry-api/build.gradle +++ b/metadata-service/schema-registry-api/build.gradle @@ -1,5 +1,8 @@ -apply plugin: 'java' -apply plugin: 'org.hidetake.swagger.generator' +plugins { + id 'org.hidetake.swagger.generator' + id 'java' +} + dependencies { // Dependencies for open api diff --git a/metadata-service/schema-registry-servlet/build.gradle b/metadata-service/schema-registry-servlet/build.gradle index 554ac696c94fd..7bab51d51a86c 100644 --- a/metadata-service/schema-registry-servlet/build.gradle +++ b/metadata-service/schema-registry-servlet/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java' +plugins { + id 'java' +} dependencies { implementation project(':metadata-service:factories') diff --git a/metadata-service/services/build.gradle b/metadata-service/services/build.gradle index b6af3d330d185..c683b0c75f40a 100644 --- a/metadata-service/services/build.gradle +++ b/metadata-service/services/build.gradle @@ -1,5 +1,7 @@ -apply plugin: 'java' -apply plugin: 'org.hidetake.swagger.generator' +plugins { + id 'org.hidetake.swagger.generator' + id 'java' +} configurations { enhance diff --git a/metadata-service/servlet/build.gradle b/metadata-service/servlet/build.gradle index eb2cd9c2d3de7..f961bf6a9de7e 100644 --- a/metadata-service/servlet/build.gradle +++ b/metadata-service/servlet/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java' +plugins { + id 'java' +} dependencies { implementation project(':metadata-io') diff --git a/metadata-utils/build.gradle b/metadata-utils/build.gradle index 7bc6aa2d43442..3d65675219624 100644 --- a/metadata-utils/build.gradle +++ b/metadata-utils/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java-library' +plugins { + id 'java-library' +} dependencies { api externalDependency.avro diff --git a/mock-entity-registry/build.gradle b/mock-entity-registry/build.gradle index 12d7e58eee0a1..8242d6451dd60 100644 --- a/mock-entity-registry/build.gradle +++ b/mock-entity-registry/build.gradle @@ -1,4 +1,6 @@ -apply plugin: 'java' +plugins { + id 'java' +} dependencies { implementation project(':entity-registry') diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle index ee0ea3c7be384..1614a4b8527dc 100644 --- a/smoke-test/build.gradle +++ b/smoke-test/build.gradle @@ -11,10 +11,10 @@ node { } // Version of node to use. - version = '16.8.0' + version = '21.2.0' // Version of Yarn to use. - yarnVersion = '1.22.0' + yarnVersion = '1.22.1' // Base URL for fetching node distributions (set nodeDistBaseUrl if you have a mirror). if (project.hasProperty('nodeDistBaseUrl')) { @@ -30,11 +30,12 @@ node { yarnWorkDir = file("${project.projectDir}/.gradle/yarn") // Set the work directory where node_modules should be located - nodeModulesDir = file("${project.projectDir}") + nodeProjectDir = file("${project.projectDir}") } task yarnInstall(type: YarnTask) { println "Root directory: ${project.rootDir}"; + environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] args = ['install', '--cwd', "${project.rootDir}/smoke-test/tests/cypress"] } \ No newline at end of file diff --git a/test-models/build.gradle b/test-models/build.gradle index c74f7249fa1d9..e8733f0525870 100644 --- a/test-models/build.gradle +++ b/test-models/build.gradle @@ -1,17 +1,9 @@ -apply plugin: 'pegasus' -apply plugin: 'java-library' - -tasks.withType(JavaCompile).configureEach { - javaCompiler = javaToolchains.compilerFor { - languageVersion = JavaLanguageVersion.of(8) - } -} -tasks.withType(Test).configureEach { - javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(8) - } +plugins { + id 'pegasus' + id 'java-library' } + dependencies { implementation spec.product.pegasus.data implementation externalDependency.commonsIo diff --git a/vercel.json b/vercel.json index d5515e68b05bd..a1815cab8ae88 100644 --- a/vercel.json +++ b/vercel.json @@ -1,5 +1,5 @@ { - "buildCommand": "./gradlew :docs-website:build", + "buildCommand": "./gradlew -PuseSystemNode=true :docs-website:build", "github": { "silent": true, "autoJobCancelation": true From caef6771b828d8ee94f76801a9121f4e1a2e7561 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 15 Dec 2023 15:07:56 -0500 Subject: [PATCH 3/6] feat(ingest/redshift): drop repeated operations (#9440) --- metadata-ingestion/setup.py | 6 +- .../ingestion/source/redshift/report.py | 3 +- .../ingestion/source/redshift/usage.py | 68 +++++++++++++++++-- .../redshift-usage/test_redshift_usage.py | 54 ++++++++++++++- 4 files changed, 121 insertions(+), 10 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 5d15d7167b63e..1bc1bc5100b08 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -368,7 +368,11 @@ | {"psycopg2-binary", "pymysql>=1.0.2"}, "pulsar": {"requests"}, "redash": {"redash-toolbelt", "sql-metadata"} | sqllineage_lib, - "redshift": sql_common | redshift_common | usage_common | sqlglot_lib, + "redshift": sql_common + | redshift_common + | usage_common + | sqlglot_lib + | {"cachetools"}, "s3": {*s3_base, *data_lake_profiling}, "gcs": {*s3_base, *data_lake_profiling}, "sagemaker": aws_common, diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py index b845580f35939..333c851650fb3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py @@ -29,7 +29,8 @@ class RedshiftReport(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowRep lineage_mem_size: Dict[str, str] = field(default_factory=TopKDict) tables_in_mem_size: Dict[str, str] = field(default_factory=TopKDict) views_in_mem_size: Dict[str, str] = field(default_factory=TopKDict) - num_operational_stats_skipped: int = 0 + num_operational_stats_filtered: int = 0 + num_repeated_operations_dropped: int = 0 num_usage_stat_skipped: int = 0 num_lineage_tables_dropped: int = 0 num_lineage_dropped_query_parser: int = 0 diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py index c789e605b9c29..409027a8805a0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py @@ -4,6 +4,7 @@ from datetime import datetime from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union +import cachetools import pydantic.error_wrappers import redshift_connector from pydantic.fields import Field @@ -251,7 +252,7 @@ def _get_workunits_internal( ) -> Iterable[MetadataWorkUnit]: self.report.num_usage_workunits_emitted = 0 self.report.num_usage_stat_skipped = 0 - self.report.num_operational_stats_skipped = 0 + self.report.num_operational_stats_filtered = 0 if self.config.include_operational_stats: self.report.report_ingestion_stage_start(USAGE_EXTRACTION_OPERATIONAL_STATS) @@ -304,8 +305,13 @@ def _gen_operation_aspect_workunits( ) # Generate operation aspect work units from the access events - yield from self._gen_operation_aspect_workunits_from_access_events( - access_events_iterable, all_tables=all_tables + yield from ( + mcpw.as_workunit() + for mcpw in self._drop_repeated_operations( + self._gen_operation_aspect_workunits_from_access_events( + access_events_iterable, all_tables=all_tables + ) + ) ) def _should_process_event( @@ -366,11 +372,61 @@ def _gen_access_events_from_history_query( yield access_event results = cursor.fetchmany() + def _drop_repeated_operations( + self, events: Iterable[MetadataChangeProposalWrapper] + ) -> Iterable[MetadataChangeProposalWrapper]: + """Drop repeated operations on the same entity. + + ASSUMPTION: Events are ordered by lastUpdatedTimestamp, descending. + + Operations are only dropped if they were within 1 minute of each other, + and have the same operation type, user, and entity. + + This is particularly useful when we see a string of insert operations + that are all really part of the same overall operation. + """ + + OPERATION_CACHE_MAXSIZE = 1000 + DROP_WINDOW_SEC = 10 + + # All timestamps are in milliseconds. + timestamp_low_watermark = 0 + + def timer(): + return -timestamp_low_watermark + + # dict of entity urn -> (last event's actor, operation type) + # TODO: Remove the type ignore and use TTLCache[key_type, value_type] directly once that's supported in Python 3.9. + last_events: Dict[str, Tuple[Optional[str], str]] = cachetools.TTLCache( # type: ignore[assignment] + maxsize=OPERATION_CACHE_MAXSIZE, ttl=DROP_WINDOW_SEC * 1000, timer=timer + ) + + for event in events: + assert isinstance(event.aspect, OperationClass) + + timestamp_low_watermark = min( + timestamp_low_watermark, event.aspect.lastUpdatedTimestamp + ) + + urn = event.entityUrn + assert urn + assert isinstance(event.aspect.operationType, str) + value: Tuple[Optional[str], str] = ( + event.aspect.actor, + event.aspect.operationType, + ) + if urn in last_events and last_events[urn] == value: + self.report.num_repeated_operations_dropped += 1 + continue + + last_events[urn] = value + yield event + def _gen_operation_aspect_workunits_from_access_events( self, events_iterable: Iterable[RedshiftAccessEvent], all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]], - ) -> Iterable[MetadataWorkUnit]: + ) -> Iterable[MetadataChangeProposalWrapper]: self.report.num_operational_stats_workunits_emitted = 0 for event in events_iterable: if not ( @@ -384,7 +440,7 @@ def _gen_operation_aspect_workunits_from_access_events( continue if not self._should_process_event(event, all_tables=all_tables): - self.report.num_operational_stats_skipped += 1 + self.report.num_operational_stats_filtered += 1 continue assert event.operation_type in ["insert", "delete"] @@ -406,7 +462,7 @@ def _gen_operation_aspect_workunits_from_access_events( resource: str = f"{event.database}.{event.schema_}.{event.table}".lower() yield MetadataChangeProposalWrapper( entityUrn=self.dataset_urn_builder(resource), aspect=operation_aspect - ).as_workunit() + ) self.report.num_operational_stats_workunits_emitted += 1 def _aggregate_access_events( diff --git a/metadata-ingestion/tests/integration/redshift-usage/test_redshift_usage.py b/metadata-ingestion/tests/integration/redshift-usage/test_redshift_usage.py index 74eec82b39ba3..a9eebb8d54154 100644 --- a/metadata-ingestion/tests/integration/redshift-usage/test_redshift_usage.py +++ b/metadata-ingestion/tests/integration/redshift-usage/test_redshift_usage.py @@ -2,11 +2,11 @@ import pathlib from pathlib import Path from typing import Dict, List, Union -from unittest.mock import Mock, patch +from unittest.mock import MagicMock, Mock, patch from freezegun import freeze_time -from datahub.emitter.mce_builder import make_dataset_urn +from datahub.emitter.mce_builder import make_dataset_urn, make_user_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.sink.file import write_metadata_file from datahub.ingestion.source.redshift.config import RedshiftConfig @@ -20,6 +20,7 @@ MetadataChangeEvent, MetadataChangeProposal, ) +from datahub.metadata.schema_classes import OperationClass, OperationTypeClass from tests.test_helpers import mce_helpers FROZEN_TIME = "2021-09-15 09:00:00" @@ -243,3 +244,52 @@ def load_access_events(test_resources_dir: pathlib.Path) -> List[Dict]: with access_events_history_file.open() as access_events_json: access_events = json.loads(access_events_json.read()) return access_events + + +def test_duplicate_operations_dropped(): + report = RedshiftReport() + usage_extractor = RedshiftUsageExtractor( + config=MagicMock(), + connection=MagicMock(), + report=report, + dataset_urn_builder=MagicMock(), + redundant_run_skip_handler=None, + ) + + user = make_user_urn("jdoe") + urnA = "urn:li:dataset:(urn:li:dataPlatform:redshift,db.schema.tableA,PROD)" + urnB = "urn:li:dataset:(urn:li:dataPlatform:redshift,db.schema.tableB,PROD)" + + opA1 = MetadataChangeProposalWrapper( + entityUrn=urnA, + aspect=OperationClass( + timestampMillis=100 * 1000, + lastUpdatedTimestamp=95 * 1000, + actor=user, + operationType=OperationTypeClass.INSERT, + ), + ) + opB1 = MetadataChangeProposalWrapper( + entityUrn=urnB, + aspect=OperationClass( + timestampMillis=101 * 1000, + lastUpdatedTimestamp=94 * 1000, + actor=user, + operationType=OperationTypeClass.INSERT, + ), + ) + opA2 = MetadataChangeProposalWrapper( + entityUrn=urnA, + aspect=OperationClass( + timestampMillis=102 * 1000, + lastUpdatedTimestamp=90 * 1000, + actor=user, + operationType=OperationTypeClass.INSERT, + ), + ) + + dedups = list(usage_extractor._drop_repeated_operations([opA1, opB1, opA2])) + assert dedups == [ + opA1, + opB1, + ] From e58e2bf3be6cf43923ff400667406ee6dc95cd3a Mon Sep 17 00:00:00 2001 From: kushagra-apptware <81357546+kushagra-apptware@users.noreply.github.com> Date: Mon, 18 Dec 2023 11:02:33 +0530 Subject: [PATCH 4/6] feat: Deprecation 'Note' changed to Markdown Renderable (#9396) Setting auto merge after test cases are passed --- .../EntityDropdown/UpdateDeprecationModal.tsx | 14 +++- .../components/styled/DeprecationPill.tsx | 82 +++++++++++++++++-- .../tests/cypress/cypress/support/commands.js | 2 +- 3 files changed, 86 insertions(+), 12 deletions(-) diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/UpdateDeprecationModal.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/UpdateDeprecationModal.tsx index 6ae893e12575f..25527497b33a8 100644 --- a/datahub-web-react/src/app/entity/shared/EntityDropdown/UpdateDeprecationModal.tsx +++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/UpdateDeprecationModal.tsx @@ -1,7 +1,10 @@ import React from 'react'; -import { Button, DatePicker, Form, Input, message, Modal } from 'antd'; +import { Button, DatePicker, Form, message, Modal } from 'antd'; +import styled from 'styled-components'; import { useBatchUpdateDeprecationMutation } from '../../../../graphql/mutations.generated'; import { handleBatchError } from '../utils'; +import { Editor } from '../tabs/Documentation/components/editor/Editor'; +import { ANTD_GRAY } from '../constants'; type Props = { urns: string[]; @@ -9,6 +12,10 @@ type Props = { refetch?: () => void; }; +const StyledEditor = styled(Editor)` + border: 1px solid ${ANTD_GRAY[4.5]}; +`; + export const UpdateDeprecationModal = ({ urns, onClose, refetch }: Props) => { const [batchUpdateDeprecation] = useBatchUpdateDeprecationMutation(); const [form] = Form.useForm(); @@ -64,10 +71,11 @@ export const UpdateDeprecationModal = ({ urns, onClose, refetch }: Props) => { } + width='40%' >
- - + + diff --git a/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx b/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx index f60a74247ebcc..9ec2aab193aa0 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx @@ -1,4 +1,4 @@ -import React from 'react'; +import React, { useState } from 'react'; import { InfoCircleOutlined } from '@ant-design/icons'; import { Divider, message, Modal, Popover, Tooltip, Typography } from 'antd'; import { blue } from '@ant-design/colors'; @@ -8,6 +8,8 @@ import { Deprecation } from '../../../../../types.generated'; import { getLocaleTimezone } from '../../../../shared/time/timeUtils'; import { ANTD_GRAY } from '../../constants'; import { useBatchUpdateDeprecationMutation } from '../../../../../graphql/mutations.generated'; +import { Editor } from '../../tabs/Documentation/components/editor/Editor'; +import StripMarkdownText, { removeMarkdown } from './StripMarkdownText'; const DeprecatedContainer = styled.div` height: 18px; @@ -38,11 +40,6 @@ const DeprecatedTitle = styled(Typography.Text)` font-weight: bold; `; -const DeprecatedSubTitle = styled(Typography.Text)` - display: block; - margin-bottom: 5px; -`; - const LastEvaluatedAtLabel = styled.div` padding: 0; margin: 0; @@ -70,15 +67,42 @@ const IconGroup = styled.div` } `; +const DescriptionContainer = styled.div` + position: relative; + display: flex; + flex-direction: column; + width: 100%; + height: 100%; + min-height: 22px; + margin-bottom: 14px; +`; +const StyledViewer = styled(Editor)` + padding-right: 8px; + display: block; + + .remirror-editor.ProseMirror { + padding: 0; + } +`; + +const ExpandedActions = styled.div` + height: 10px; +`; +const ReadLessText = styled(Typography.Link)` + margin-right: 4px; +`; type Props = { urn: string; deprecation: Deprecation; refetch?: () => void; showUndeprecate: boolean | null; }; +const ABBREVIATED_LIMIT = 80; export const DeprecationPill = ({ deprecation, urn, refetch, showUndeprecate }: Props) => { const [batchUpdateDeprecationMutation] = useBatchUpdateDeprecationMutation(); + const [expanded, setExpanded] = useState(false); + const overLimit = deprecation?.note && removeMarkdown(deprecation?.note).length > 80; /** * Deprecation Decommission Timestamp */ @@ -131,14 +155,56 @@ export const DeprecationPill = ({ deprecation, urn, refetch, showUndeprecate }: return ( {deprecation?.note !== '' && Deprecation note} {isDividerNeeded && } - {deprecation?.note !== '' && {deprecation.note}} + + {expanded || !overLimit ? ( + <> + { + deprecation?.note && deprecation?.note !== '' && + <> + + + {overLimit && ( + { + setExpanded(false); + }} + > + Read Less + + )} + + + } + + ) : ( + <> + + { + setExpanded(true); + }} + > + Read More + + + } + shouldWrap + > + {deprecation.note} + + + )} + {deprecation?.decommissionTime !== null && ( diff --git a/smoke-test/tests/cypress/cypress/support/commands.js b/smoke-test/tests/cypress/cypress/support/commands.js index 5e3664f944edf..ffbd050488181 100644 --- a/smoke-test/tests/cypress/cypress/support/commands.js +++ b/smoke-test/tests/cypress/cypress/support/commands.js @@ -171,7 +171,7 @@ Cypress.Commands.add("deleteFromDropdown", () => { Cypress.Commands.add("addViaFormModal", (text, modelHeader) => { cy.waitTextVisible(modelHeader); - cy.get(".ant-form-item-control-input-content > input[type='text']").first().type(text); + cy.get('.ProseMirror-focused').type(text); cy.get(".ant-modal-footer > button:nth-child(2)").click(); }); From b4fe451d932315546ebd98623f1572a66c41ad43 Mon Sep 17 00:00:00 2001 From: gaurav2733 <77378510+gaurav2733@users.noreply.github.com> Date: Mon, 18 Dec 2023 12:38:30 +0530 Subject: [PATCH 5/6] feat : markdown support for group description (#9455) --- .../group/EditGroupDescriptionModal.tsx | 64 ++++++++ .../src/app/entity/group/GroupInfoSideBar.tsx | 145 ++++++++++++++++-- .../app/identity/group/CreateGroupModal.tsx | 106 +++++++------ .../cypress/e2e/settings/managing_groups.js | 6 +- 4 files changed, 261 insertions(+), 60 deletions(-) create mode 100644 datahub-web-react/src/app/entity/group/EditGroupDescriptionModal.tsx diff --git a/datahub-web-react/src/app/entity/group/EditGroupDescriptionModal.tsx b/datahub-web-react/src/app/entity/group/EditGroupDescriptionModal.tsx new file mode 100644 index 0000000000000..a898a73c254ef --- /dev/null +++ b/datahub-web-react/src/app/entity/group/EditGroupDescriptionModal.tsx @@ -0,0 +1,64 @@ +import React, { useState } from 'react'; +import { Button, Modal, Form } from 'antd'; +import styled from 'styled-components'; + +import { Editor } from '../shared/tabs/Documentation/components/editor/Editor'; +import { ANTD_GRAY } from '../shared/constants'; + +type Props = { + onClose: () => void; + onSaveAboutMe: () => void; + setStagedDescription: (des: string) => void; + stagedDescription: string | undefined; +}; +const StyledEditor = styled(Editor)` + border: 1px solid ${ANTD_GRAY[4]}; +`; + +export default function EditGroupDescriptionModal({ + onClose, + onSaveAboutMe, + setStagedDescription, + stagedDescription, +}: Props) { + const [form] = Form.useForm(); + const [aboutText,setAboutText] = useState(stagedDescription) + + function updateDescription(description: string) { + setAboutText(aboutText) + setStagedDescription(description); + + } + + const saveDescription = () => { + onSaveAboutMe(); + onClose(); + }; + + return ( + + + + + } + > + + +
+ +
+
+ +
+ ); +} diff --git a/datahub-web-react/src/app/entity/group/GroupInfoSideBar.tsx b/datahub-web-react/src/app/entity/group/GroupInfoSideBar.tsx index d9eaed2682ea1..07885a4d0f630 100644 --- a/datahub-web-react/src/app/entity/group/GroupInfoSideBar.tsx +++ b/datahub-web-react/src/app/entity/group/GroupInfoSideBar.tsx @@ -16,14 +16,15 @@ import { EmptyValue, SocialDetails, EditButton, - AboutSection, - AboutSectionText, GroupsSection, + AboutSection, } from '../shared/SidebarStyledComponents'; import GroupMembersSideBarSection from './GroupMembersSideBarSection'; import { useUserContext } from '../../context/useUserContext'; - -const { Paragraph } = Typography; +import StripMarkdownText, { removeMarkdown } from '../shared/components/styled/StripMarkdownText'; +import { Editor } from '../shared/tabs/Documentation/components/editor/Editor'; +import EditGroupDescriptionModal from './EditGroupDescriptionModal'; +import { REDESIGN_COLORS } from '../shared/constants'; type SideBarData = { photoUrl: string | undefined; @@ -80,6 +81,61 @@ const GroupTitle = styled(Typography.Title)` } `; +const EditIcon = styled(EditOutlined)` + cursor: pointer; + color: ${REDESIGN_COLORS.BLUE}; +`; +const AddNewDescription = styled(Button)` + display: none; + margin: -4px; + width: 140px; +`; + +const StyledViewer = styled(Editor)` + padding-right: 8px; + display: block; + + .remirror-editor.ProseMirror { + padding: 0; + } +`; + +const DescriptionContainer = styled.div` + position: relative; + display: flex; + flex-direction: column; + width: 100%; + text-align:left; + font-weight: normal; + font + min-height: 22px; + + &:hover ${AddNewDescription} { + display: block; + } + & ins.diff { + background-color: #b7eb8f99; + text-decoration: none; + &:hover { + background-color: #b7eb8faa; + } + } + & del.diff { + background-color: #ffa39e99; + text-decoration: line-through; + &: hover { + background-color: #ffa39eaa; + } + } +`; + +const ExpandedActions = styled.div` + height: 10px; +`; +const ReadLessText = styled(Typography.Link)` + margin-right: 4px; +`; + /** * Responsible for reading & writing users. */ @@ -106,7 +162,17 @@ export default function GroupInfoSidebar({ sideBarData, refetch }: Props) { const me = useUserContext(); const canEditGroup = me?.platformPrivileges?.manageIdentities; const [groupTitle, setGroupTitle] = useState(name); + const [expanded, setExpanded] = useState(false); + const [isUpdatingDescription, SetIsUpdatingDescription] = useState(false); + const [stagedDescription, setStagedDescription] = useState(aboutText); + const [updateName] = useUpdateNameMutation(); + const overLimit = removeMarkdown(aboutText || '').length > 80; + const ABBREVIATED_LIMIT = 80; + + useEffect(() => { + setStagedDescription(aboutText); + }, [aboutText]); useEffect(() => { setGroupTitle(groupTitle); @@ -136,12 +202,12 @@ export default function GroupInfoSidebar({ sideBarData, refetch }: Props) { }; // About Text save - const onSaveAboutMe = (inputString) => { + const onSaveAboutMe = () => { updateCorpGroupPropertiesMutation({ variables: { urn: urn || '', input: { - description: inputString, + description: stagedDescription, }, }, }) @@ -201,16 +267,65 @@ export default function GroupInfoSidebar({ sideBarData, refetch }: Props) { - {TITLES.about} - - - {aboutText || } - - + + {TITLES.about} + + SetIsUpdatingDescription(true)} data-testid="edit-icon" /> + + + + {(aboutText && expanded) || !overLimit ? ( + <> + {/* Read only viewer for displaying group description */} + + + {overLimit && ( + { + setExpanded(false); + }} + > + Read Less + + )} + + + ) : ( + <> + {/* Display abbreviated description with option to read more */} + + { + setExpanded(true); + }} + > + Read More + + + } + shouldWrap + > + {aboutText} + + + )} + + {/* Modal for updating group description */} + {isUpdatingDescription && ( + { + SetIsUpdatingDescription(false); + setStagedDescription(aboutText); + }} + onSaveAboutMe={onSaveAboutMe} + setStagedDescription={setStagedDescription} + stagedDescription={stagedDescription} + /> + )} diff --git a/datahub-web-react/src/app/identity/group/CreateGroupModal.tsx b/datahub-web-react/src/app/identity/group/CreateGroupModal.tsx index 214cb251767c9..4ba714ca23ae0 100644 --- a/datahub-web-react/src/app/identity/group/CreateGroupModal.tsx +++ b/datahub-web-react/src/app/identity/group/CreateGroupModal.tsx @@ -1,16 +1,23 @@ -import React, { useState } from 'react'; +import React, { useRef, useState } from 'react'; import { message, Button, Input, Modal, Typography, Form, Collapse } from 'antd'; +import styled from 'styled-components'; import { useCreateGroupMutation } from '../../../graphql/group.generated'; import { useEnterKeyListener } from '../../shared/useEnterKeyListener'; import { validateCustomUrnId } from '../../shared/textUtil'; import analytics, { EventType } from '../../analytics'; import { CorpGroup, EntityType } from '../../../types.generated'; +import { Editor as MarkdownEditor } from '../../entity/shared/tabs/Documentation/components/editor/Editor'; +import { ANTD_GRAY } from '../../entity/shared/constants'; type Props = { onClose: () => void; onCreate: (group: CorpGroup) => void; }; +const StyledEditor = styled(MarkdownEditor)` + border: 1px solid ${ANTD_GRAY[4]}; +`; + export default function CreateGroupModal({ onClose, onCreate }: Props) { const [stagedName, setStagedName] = useState(''); const [stagedDescription, setStagedDescription] = useState(''); @@ -19,45 +26,54 @@ export default function CreateGroupModal({ onClose, onCreate }: Props) { const [createButtonEnabled, setCreateButtonEnabled] = useState(true); const [form] = Form.useForm(); + // Reference to the styled editor for handling focus + const styledEditorRef = useRef(null); + const onCreateGroup = () => { - createGroupMutation({ - variables: { - input: { - id: stagedId, - name: stagedName, - description: stagedDescription, - }, - }, - }) - .then(({ data, errors }) => { - if (!errors) { - analytics.event({ - type: EventType.CreateGroupEvent, - }); - message.success({ - content: `Created group!`, - duration: 3, - }); - // TODO: Get a full corp group back from create endpoint. - onCreate({ - urn: data?.createGroup || '', - type: EntityType.CorpGroup, + // Check if the Enter key was pressed inside the styled editor to prevent unintended form submission + const isEditorNewlineKeypress = + document.activeElement !== styledEditorRef.current && + !styledEditorRef.current?.contains(document.activeElement); + if (isEditorNewlineKeypress) { + createGroupMutation({ + variables: { + input: { + id: stagedId, name: stagedName, - info: { - description: stagedDescription, - }, - }); - } - }) - .catch((e) => { - message.destroy(); - message.error({ content: `Failed to create group!: \n ${e.message || ''}`, duration: 3 }); + description: stagedDescription, + }, + }, }) - .finally(() => { - setStagedName(''); - setStagedDescription(''); - }); - onClose(); + .then(({ data, errors }) => { + if (!errors) { + analytics.event({ + type: EventType.CreateGroupEvent, + }); + message.success({ + content: `Created group!`, + duration: 3, + }); + // TODO: Get a full corp group back from create endpoint. + onCreate({ + urn: data?.createGroup || '', + type: EntityType.CorpGroup, + name: stagedName, + info: { + description: stagedDescription, + }, + }); + } + }) + .catch((e) => { + message.destroy(); + message.error({ content: `Failed to create group!: \n ${e.message || ''}`, duration: 3 }); + }) + .finally(() => { + setStagedName(''); + setStagedDescription(''); + }); + onClose(); + } }; // Handle the Enter press @@ -65,8 +81,13 @@ export default function CreateGroupModal({ onClose, onCreate }: Props) { querySelectorToExecuteClick: '#createGroupButton', }); + function updateDescription(description: string) { + setStagedDescription(description); + } + return ( Description
}> An optional description for your new group. - - setStagedDescription(event.target.value)} - /> + + {/* Styled editor for the group description */} +
+ +
diff --git a/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js b/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js index 70219a550cd8b..978a245c3d9e3 100644 --- a/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js +++ b/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js @@ -72,8 +72,10 @@ describe("create and manage group", () => { cy.focused().clear().type(`Test group EDITED ${test_id}{enter}`); cy.waitTextVisible("Name Updated"); cy.contains(`Test group EDITED ${test_id}`).should("be.visible"); - cy.contains("Test group description").find('[aria-label="edit"]').click(); - cy.focused().type(" EDITED{enter}"); + cy.get('[data-testid="edit-icon"]').click(); + cy.waitTextVisible("Edit Description"); + cy.get("#description").should("be.visible").type(" EDITED"); + cy.get("#updateGroupButton").click(); cy.waitTextVisible("Changes saved."); cy.contains("Test group description EDITED").should("be.visible"); cy.clickOptionWithText("Add Owners"); From 9d386fbd6f9a0436b25daa2b4603d1fa0b8f44ee Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 18 Dec 2023 05:38:16 -0500 Subject: [PATCH 6/6] feat(ingest): enable CLL for dbt by default (#9466) --- .../ingestion/source/dbt/dbt_common.py | 7 +- .../ingestion/source/looker/looker_common.py | 2 +- .../source/looker/looker_lib_wrapper.py | 2 +- .../dbt_enabled_with_schemas_mces_golden.json | 248 ++++++++++++ .../dbt_test_column_meta_mapping_golden.json | 383 ++++++++++++++++++ ...th_complex_owner_patterns_mces_golden.json | 248 ++++++++++++ ...th_data_platform_instance_mces_golden.json | 248 ++++++++++++ ...h_non_incremental_lineage_mces_golden.json | 248 ++++++++++++ ..._target_platform_instance_mces_golden.json | 248 ++++++++++++ 9 files changed, 1630 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index af28be310587a..7bec07b40c4bd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -300,7 +300,7 @@ class DBTCommonConfig( description="When enabled, schemas will be inferred from the dbt node definition.", ) include_column_lineage: bool = Field( - default=False, + default=True, description="When enabled, column-level lineage will be extracted from the dbt node definition. Requires `infer_dbt_schemas` to be enabled. " "If you run into issues where the column name casing does not match up with properly, providing a datahub_api or using the rest sink will improve accuracy.", ) @@ -696,7 +696,10 @@ def get_column_type( @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion") @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") -@capability(SourceCapability.LINEAGE_FINE, "Enabled using `include_column_lineage`") +@capability( + SourceCapability.LINEAGE_FINE, + "Enabled by default, configure using `include_column_lineage`", +) class DBTSourceBase(StatefulIngestionSourceBase): def __init__(self, config: DBTCommonConfig, ctx: PipelineContext, platform: str): super().__init__(config, ctx) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index e440750cba0d0..53533a8d27c9b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -1015,7 +1015,7 @@ def __init__( self.report = report self.source_config = source_config - @lru_cache() + @lru_cache(maxsize=200) def get_explore(self, model: str, explore: str) -> Optional[LookerExplore]: looker_explore = LookerExplore.from_api( model, diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py index 988caba1c0d74..8959868c27114 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py @@ -114,7 +114,7 @@ def get_available_permissions(self) -> Set[str]: return permissions - @lru_cache(maxsize=2000) + @lru_cache(maxsize=1000) def get_user(self, id_: str, user_fields: str) -> Optional[User]: self.client_stats.user_calls += 1 try: diff --git a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json index e4f01ef7a6c53..4deb725ed2b44 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json @@ -247,6 +247,86 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),full_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),email)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),address)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),city)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),postal_code)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),phone)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -428,6 +508,41 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -650,6 +765,104 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -789,6 +1002,41 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" + ], + "confidenceScore": 0.9 + } ] } }, diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json index 4d5b008b695f9..588470ef41631 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json @@ -201,6 +201,98 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),full_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),first_name)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),last_name)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),initial_full_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),email)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),address)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),city)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),postal_code)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),phone)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -360,6 +452,52 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),billing_month)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),amount)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),email)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),email)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -574,6 +712,104 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),amount)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),payment_date)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),payment_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),rental_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),staff_id)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -741,6 +977,41 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),payment_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),amount)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -1011,6 +1282,118 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),active)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),active)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),activebool)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),activebool)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),address_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),address_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),create_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),create_date)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),email)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),first_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),last_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_update)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),last_update)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),store_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),store_id)" + ], + "confidenceScore": 0.9 + } ] } }, diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json index 0bdd5e3c895c2..926e8b8c8ed84 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json @@ -211,6 +211,86 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),full_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),email)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),address)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),city)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),postal_code)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),phone)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -375,6 +455,41 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -597,6 +712,104 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -736,6 +949,41 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" + ], + "confidenceScore": 0.9 + } ] } }, diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json index 5ab0b11e37771..3727603266f25 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json @@ -212,6 +212,86 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD),first_name)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD),last_name)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),full_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD),email)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),email)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.address,PROD),address)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),address)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.city,PROD),city)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),city)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.address,PROD),postal_code)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),postal_code)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.address,PROD),phone)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),phone)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -376,6 +456,41 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -598,6 +713,104 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_01,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_02,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_03,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_04,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_01,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_02,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_03,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_04,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_01,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_02,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_03,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_04,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),payment_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_01,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_02,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_03,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_04,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),payment_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_01,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_02,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_03,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_04,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),rental_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_01,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_02,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_03,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_04,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),staff_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -737,6 +950,41 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" + ], + "confidenceScore": 0.9 + } ] } }, diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json index 3725e590fee9e..ec879e6af766a 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json @@ -212,6 +212,86 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),full_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),email)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),address)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),city)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),postal_code)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),phone)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -376,6 +456,41 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -598,6 +713,104 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -737,6 +950,41 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" + ], + "confidenceScore": 0.9 + } ] } }, diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json index a47abab6b40f7..e25c5e4faf6af 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json @@ -212,6 +212,86 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),full_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),email)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),address)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),city)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),postal_code)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),phone)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -376,6 +456,41 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -598,6 +713,104 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)" + ], + "confidenceScore": 0.9 + } ] } }, @@ -737,6 +950,41 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD)", "type": "TRANSFORMED" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" + ], + "downstreamType": "FIELD_SET", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" + ], + "confidenceScore": 0.9 + } ] } },