diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index 21fd547114872a..a9be56a69560ca 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -51,6 +51,9 @@ jobs: - python-version: "3.10" extra_pip_requirements: 'apache-airflow==2.8.1 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.1/constraints-3.10.txt' extra_pip_extras: plugin-v2 + - python-version: "3.10" + extra_pip_requirements: 'apache-airflow==2.9.0 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.10.txt' + extra_pip_extras: plugin-v2 fail-fast: false steps: - name: Set up JDK 17 diff --git a/build.gradle b/build.gradle index f4fb7b42d8560b..58e7a2ba3a8d6e 100644 --- a/build.gradle +++ b/build.gradle @@ -168,7 +168,8 @@ project.ext.externalDependency = [ 'jettison': 'org.codehaus.jettison:jettison:1.5.4', 'jgrapht': 'org.jgrapht:jgrapht-core:1.5.1', 'jna': 'net.java.dev.jna:jna:5.12.1', - 'jsonPatch': 'com.github.java-json-tools:json-patch:1.13', + 'jsonPatch': 'jakarta.json:jakarta.json-api:2.1.3', + 'jsonPathImpl': 'org.eclipse.parsson:parsson:1.1.6', 'jsonSimple': 'com.googlecode.json-simple:json-simple:1.1.1', 'jsonSmart': 'net.minidev:json-smart:2.4.9', 'json': 'org.json:json:20231013', diff --git a/entity-registry/build.gradle b/entity-registry/build.gradle index 1f27faf4811dc6..e9c9537483474e 100644 --- a/entity-registry/build.gradle +++ b/entity-registry/build.gradle @@ -15,7 +15,10 @@ dependencies { implementation externalDependency.jacksonDataBind implementation externalDependency.jacksonDataFormatYaml implementation externalDependency.reflections - api externalDependency.jsonPatch + + implementation externalDependency.jsonPatch + implementation externalDependency.jsonPathImpl + constraints { implementation(externalDependency.snakeYaml) { because("previous versions are vulnerable to CVE-2022-25857") @@ -28,6 +31,7 @@ dependencies { testImplementation project(':test-models') testImplementation project(path: ':test-models', configuration: 'testDataTemplate') + testImplementation project(':metadata-utils') testImplementation externalDependency.testng testImplementation externalDependency.mockito testImplementation externalDependency.mockitoInline diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchMCP.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchMCP.java index f04133e9e1ff8b..293ef90a25f818 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchMCP.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchMCP.java @@ -1,8 +1,8 @@ package com.linkedin.metadata.aspect.batch; -import com.github.fge.jsonpatch.Patch; import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.aspect.AspectRetriever; +import jakarta.json.JsonPatch; /** * A change proposal represented as a patch to an exiting stored object in the primary data store. @@ -17,5 +17,5 @@ public interface PatchMCP extends MCPItem { */ ChangeMCP applyPatch(RecordTemplate recordTemplate, AspectRetriever aspectRetriever); - Patch getPatch(); + JsonPatch getPatch(); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/GenericJsonPatch.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/GenericJsonPatch.java index 484603b9c1f859..09392330c81a56 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/GenericJsonPatch.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/GenericJsonPatch.java @@ -1,12 +1,15 @@ package com.linkedin.metadata.aspect.patch; import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.databind.JsonNode; -import com.github.fge.jsonpatch.JsonPatch; -import java.io.IOException; +import com.linkedin.util.Pair; +import jakarta.json.Json; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonPatch; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.AllArgsConstructor; @@ -21,7 +24,7 @@ public class GenericJsonPatch { @Nullable private Map> arrayPrimaryKeys; - @Nonnull private JsonNode patch; + @Nonnull private List patch; @Nonnull public Map> getArrayPrimaryKeys() { @@ -29,7 +32,27 @@ public Map> getArrayPrimaryKeys() { } @JsonIgnore - public JsonPatch getJsonPatch() throws IOException { - return JsonPatch.fromJson(patch); + public JsonPatch getJsonPatch() { + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + patch.forEach(op -> arrayBuilder.add(Json.createObjectBuilder(op.toMap()))); + return Json.createPatch(arrayBuilder.build()); + } + + @Data + @NoArgsConstructor + public static class PatchOp { + @Nonnull private String op; + @Nonnull private String path; + @Nullable private Object value; + + public Map toMap() { + if (value != null) { + return Stream.of(Pair.of("op", op), Pair.of("path", path), Pair.of("value", value)) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + } else { + return Stream.of(Pair.of("op", op), Pair.of("path", path)) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + } + } } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java index 4613396109cc10..0818241df81559 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java @@ -15,10 +15,9 @@ import static com.linkedin.metadata.Constants.UPSTREAM_LINEAGE_ASPECT_NAME; import com.fasterxml.jackson.core.JsonProcessingException; -import com.github.fge.jsonpatch.JsonPatchException; -import com.github.fge.jsonpatch.Patch; import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.models.AspectSpec; +import jakarta.json.JsonPatch; import java.util.HashMap; import java.util.Map; import java.util.Set; @@ -75,12 +74,11 @@ public RecordTemplate getDefaultTemplate(String aspectSpecName) { * @param aspectSpec aspectSpec of the template * @return a {@link RecordTemplate} with the patch applied * @throws JsonProcessingException if there is an issue with processing the record template's json - * @throws JsonPatchException if there is an issue with applying the json patch */ @Nonnull public RecordTemplate applyPatch( - RecordTemplate recordTemplate, Patch jsonPatch, AspectSpec aspectSpec) - throws JsonProcessingException, JsonPatchException { + RecordTemplate recordTemplate, JsonPatch jsonPatch, AspectSpec aspectSpec) + throws JsonProcessingException { Template template = getTemplate(aspectSpec); return template.applyPatch(recordTemplate, jsonPatch); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/CompoundKeyTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/CompoundKeyTemplate.java index 78cf14c47a0bf1..2b6c8f9409d267 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/CompoundKeyTemplate.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/CompoundKeyTemplate.java @@ -1,23 +1,29 @@ package com.linkedin.metadata.aspect.patch.template; +import static com.linkedin.metadata.aspect.patch.template.TemplateUtil.OBJECT_MAPPER; import static com.linkedin.metadata.aspect.patch.template.TemplateUtil.populateTopLevelKeys; import com.datahub.util.RecordUtils; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; -import com.github.fge.jsonpatch.JsonPatchException; -import com.github.fge.jsonpatch.Patch; import com.linkedin.data.template.RecordTemplate; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonPatch; +import java.io.StringReader; public abstract class CompoundKeyTemplate implements ArrayMergingTemplate { @Override - public T applyPatch(RecordTemplate recordTemplate, Patch jsonPatch) - throws JsonProcessingException, JsonPatchException { + public T applyPatch(RecordTemplate recordTemplate, JsonPatch jsonPatch) + throws JsonProcessingException { JsonNode transformed = populateTopLevelKeys(preprocessTemplate(recordTemplate), jsonPatch); - JsonNode patched = jsonPatch.apply(transformed); - JsonNode postProcessed = rebaseFields(patched); + JsonObject patched = + jsonPatch.apply( + Json.createReader(new StringReader(OBJECT_MAPPER.writeValueAsString(transformed))) + .readObject()); + JsonNode postProcessed = rebaseFields(OBJECT_MAPPER.readTree(patched.toString())); return RecordUtils.toRecordTemplate(getTemplateType(), postProcessed.toString()); } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/Template.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/Template.java index bd8cd544fb59be..06acb4cb318964 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/Template.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/Template.java @@ -6,9 +6,11 @@ import com.datahub.util.RecordUtils; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; -import com.github.fge.jsonpatch.JsonPatchException; -import com.github.fge.jsonpatch.Patch; import com.linkedin.data.template.RecordTemplate; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonPatch; +import java.io.StringReader; import javax.annotation.Nonnull; public interface Template { @@ -45,18 +47,21 @@ default T getSubtype(RecordTemplate recordTemplate) throws ClassCastException { * @param jsonPatch patch to apply * @return patched value * @throws JsonProcessingException if there is an issue converting the input to JSON - * @throws JsonPatchException if there is an issue applying the patch */ - default T applyPatch(RecordTemplate recordTemplate, Patch jsonPatch) - throws JsonProcessingException, JsonPatchException { - + default T applyPatch(RecordTemplate recordTemplate, JsonPatch jsonPatch) + throws JsonProcessingException { TemplateUtil.validatePatch(jsonPatch); + JsonNode transformed = populateTopLevelKeys(preprocessTemplate(recordTemplate), jsonPatch); try { - JsonNode patched = jsonPatch.apply(transformed); - JsonNode postProcessed = rebaseFields(patched); + // Hack in a more efficient patcher. Even with the serialization overhead 140% faster + JsonObject patched = + jsonPatch.apply( + Json.createReader(new StringReader(OBJECT_MAPPER.writeValueAsString(transformed))) + .readObject()); + JsonNode postProcessed = rebaseFields(OBJECT_MAPPER.readTree(patched.toString())); return RecordUtils.toRecordTemplate(getTemplateType(), postProcessed.toString()); - } catch (JsonPatchException e) { + } catch (JsonProcessingException e) { throw new RuntimeException( String.format( "Error performing JSON PATCH on aspect %s. Patch: %s Target: %s", diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java index d998692f2c3889..be3fc4c1fc9836 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java @@ -8,9 +8,10 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.github.fge.jsonpatch.Patch; import com.linkedin.metadata.aspect.patch.PatchOperationType; import com.linkedin.util.Pair; +import jakarta.json.JsonPatch; +import jakarta.json.JsonValue; import java.util.ArrayList; import java.util.List; @@ -30,34 +31,32 @@ private TemplateUtil() {} .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); } - public static List> getPaths(Patch jsonPatch) { - JsonNode patchNode = OBJECT_MAPPER.valueToTree(jsonPatch); + public static List> getPaths(JsonPatch jsonPatch) { List> paths = new ArrayList<>(); - patchNode - .elements() - .forEachRemaining( + jsonPatch.toJsonArray().stream() + .map(JsonValue::asJsonObject) + .forEach( node -> paths.add( Pair.of( - PatchOperationType.valueOf(node.get("op").asText().toUpperCase()), - node.get("path").asText()))); + PatchOperationType.valueOf(node.getString("op").toUpperCase()), + node.getString("path")))); return paths; } - public static void validatePatch(Patch jsonPatch) { + public static void validatePatch(JsonPatch jsonPatch) { // ensure supported patch operations - JsonNode patchNode = OBJECT_MAPPER.valueToTree(jsonPatch); - patchNode - .elements() - .forEachRemaining( - node -> { + jsonPatch.toJsonArray().stream() + .map(JsonValue::asJsonObject) + .forEach( + jsonObject -> { try { - PatchOperationType.valueOf(node.get("op").asText().toUpperCase()); + PatchOperationType.valueOf(jsonObject.getString("op").toUpperCase()); } catch (Exception e) { throw new RuntimeException( String.format( "Unsupported PATCH operation: `%s` Operation `%s`", - node.get("op").asText(), node), + jsonObject.getString("op"), jsonObject), e); } }); @@ -70,7 +69,7 @@ public static void validatePatch(Patch jsonPatch) { * @param transformedNode transformed node to have keys populated * @return transformed node that has top level keys populated */ - public static JsonNode populateTopLevelKeys(JsonNode transformedNode, Patch jsonPatch) { + public static JsonNode populateTopLevelKeys(JsonNode transformedNode, JsonPatch jsonPatch) { JsonNode transformedNodeClone = transformedNode.deepCopy(); List> paths = getPaths(jsonPatch); for (Pair operationPath : paths) { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/GenericPatchTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/GenericPatchTemplate.java index 3a3e3c99f25a38..3658ba3c544630 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/GenericPatchTemplate.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/GenericPatchTemplate.java @@ -1,7 +1,6 @@ package com.linkedin.metadata.aspect.patch.template.common; import com.fasterxml.jackson.databind.JsonNode; -import com.github.fge.jsonpatch.JsonPatchException; import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.aspect.patch.GenericJsonPatch; import com.linkedin.metadata.aspect.patch.template.CompoundKeyTemplate; @@ -53,7 +52,7 @@ public JsonNode rebaseFields(JsonNode patched) { return transformedNode; } - public T applyPatch(RecordTemplate recordTemplate) throws IOException, JsonPatchException { + public T applyPatch(RecordTemplate recordTemplate) throws IOException { return super.applyPatch(recordTemplate, genericJsonPatch.getJsonPatch()); } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/ChartInfoTemplateTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/ChartInfoTemplateTest.java index b2911100519fc5..8b138e0f59ee97 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/ChartInfoTemplateTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/ChartInfoTemplateTest.java @@ -1,17 +1,11 @@ package com.linkedin.metadata.aspect.patch.template; -import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*; - -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.github.fge.jackson.jsonpointer.JsonPointer; -import com.github.fge.jsonpatch.AddOperation; -import com.github.fge.jsonpatch.JsonPatch; -import com.github.fge.jsonpatch.JsonPatchOperation; import com.linkedin.chart.ChartInfo; import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.aspect.patch.template.chart.ChartInfoTemplate; -import java.util.ArrayList; -import java.util.List; +import jakarta.json.Json; +import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonPatchBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -21,18 +15,16 @@ public class ChartInfoTemplateTest { public void testChartInfoTemplate() throws Exception { ChartInfoTemplate chartInfoTemplate = new ChartInfoTemplate(); ChartInfo dashboardInfo = chartInfoTemplate.getDefault(); - List patchOperations = new ArrayList<>(); - ObjectNode edgeNode = instance.objectNode(); - edgeNode.put( + JsonPatchBuilder patchOperations = Json.createPatchBuilder(); + + JsonObjectBuilder edgeNode = Json.createObjectBuilder(); + edgeNode.add( "destinationUrn", "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"); - JsonPatchOperation operation = - new AddOperation( - new JsonPointer( - "/inputEdges/urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"), - edgeNode); - patchOperations.add(operation); - JsonPatch patch = new JsonPatch(patchOperations); - ChartInfo result = chartInfoTemplate.applyPatch(dashboardInfo, patch); + + patchOperations.add( + "/inputEdges/urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + edgeNode.build()); + ChartInfo result = chartInfoTemplate.applyPatch(dashboardInfo, patchOperations.build()); Assert.assertEquals( UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"), diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DashboardInfoTemplateTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DashboardInfoTemplateTest.java index be15d6976aee6f..1446635c8de94f 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DashboardInfoTemplateTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DashboardInfoTemplateTest.java @@ -1,17 +1,10 @@ package com.linkedin.metadata.aspect.patch.template; -import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*; - -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.github.fge.jackson.jsonpointer.JsonPointer; -import com.github.fge.jsonpatch.AddOperation; -import com.github.fge.jsonpatch.JsonPatch; -import com.github.fge.jsonpatch.JsonPatchOperation; import com.linkedin.common.urn.UrnUtils; import com.linkedin.dashboard.DashboardInfo; import com.linkedin.metadata.aspect.patch.template.dashboard.DashboardInfoTemplate; -import java.util.ArrayList; -import java.util.List; +import jakarta.json.Json; +import jakarta.json.JsonPatchBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -21,18 +14,18 @@ public class DashboardInfoTemplateTest { public void testDashboardInfoTemplate() throws Exception { DashboardInfoTemplate dashboardInfoTemplate = new DashboardInfoTemplate(); DashboardInfo dashboardInfo = dashboardInfoTemplate.getDefault(); - List patchOperations = new ArrayList<>(); - ObjectNode edgeNode = instance.objectNode(); - edgeNode.put( - "destinationUrn", "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"); - JsonPatchOperation operation = - new AddOperation( - new JsonPointer( - "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"), - edgeNode); - patchOperations.add(operation); - JsonPatch patch = new JsonPatch(patchOperations); - DashboardInfo result = dashboardInfoTemplate.applyPatch(dashboardInfo, patch); + JsonPatchBuilder jsonPatchBuilder = Json.createPatchBuilder(); + jsonPatchBuilder.add( + "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + Json.createObjectBuilder() + .add( + "destinationUrn", + Json.createValue( + "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)")) + .build()); + + DashboardInfo result = + dashboardInfoTemplate.applyPatch(dashboardInfo, jsonPatchBuilder.build()); Assert.assertEquals( UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"), diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java index 4bad6a8e3d6594..8c7bfc98b2673f 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java @@ -1,47 +1,51 @@ package com.linkedin.metadata.aspect.patch.template; -import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*; - -import com.fasterxml.jackson.databind.node.NumericNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.github.fge.jackson.jsonpointer.JsonPointer; -import com.github.fge.jsonpatch.AddOperation; -import com.github.fge.jsonpatch.JsonPatch; -import com.github.fge.jsonpatch.JsonPatchOperation; -import com.github.fge.jsonpatch.RemoveOperation; +import static com.linkedin.metadata.utils.GenericRecordUtils.JSON; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.ByteString; import com.linkedin.data.DataMap; import com.linkedin.dataset.FineGrainedLineage; import com.linkedin.dataset.FineGrainedLineageDownstreamType; import com.linkedin.dataset.FineGrainedLineageUpstreamType; import com.linkedin.dataset.UpstreamLineage; import com.linkedin.metadata.aspect.patch.template.dataset.UpstreamLineageTemplate; -import java.util.ArrayList; -import java.util.List; -import org.testng.Assert; +import com.linkedin.metadata.utils.GenericRecordUtils; +import jakarta.json.Json; +import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonPatch; +import jakarta.json.JsonPatchBuilder; +import jakarta.json.JsonValue; +import java.io.StringReader; +import java.nio.charset.StandardCharsets; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.testng.annotations.Test; public class UpstreamLineageTemplateTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + @Test public void testPatchUpstream() throws Exception { UpstreamLineageTemplate upstreamLineageTemplate = new UpstreamLineageTemplate(); UpstreamLineage upstreamLineage = upstreamLineageTemplate.getDefault(); - List patchOperations = new ArrayList<>(); - ObjectNode fineGrainedLineageNode = instance.objectNode(); - NumericNode upstreamConfidenceScore = instance.numberNode(1.0f); - fineGrainedLineageNode.set("confidenceScore", upstreamConfidenceScore); - JsonPatchOperation operation = - new AddOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)//urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)"), - fineGrainedLineageNode); - patchOperations.add(operation); - JsonPatch jsonPatch = new JsonPatch(patchOperations); + JsonPatchBuilder jsonPatchBuilder = Json.createPatchBuilder(); + + JsonObjectBuilder fineGrainedLineageNode = Json.createObjectBuilder(); + JsonValue upstreamConfidenceScore = Json.createValue(1.0f); + fineGrainedLineageNode.add("confidenceScore", upstreamConfidenceScore); + + jsonPatchBuilder.add( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)//urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)", + fineGrainedLineageNode.build()); // Initial population test - UpstreamLineage result = upstreamLineageTemplate.applyPatch(upstreamLineage, jsonPatch); + UpstreamLineage result = + upstreamLineageTemplate.applyPatch(upstreamLineage, jsonPatchBuilder.build()); // Hack because Jackson parses values to doubles instead of floats DataMap dataMap = new DataMap(); dataMap.put("confidenceScore", 1.0); @@ -61,36 +65,35 @@ public void testPatchUpstream() throws Exception { fineGrainedLineage.setTransformOperation("CREATE"); fineGrainedLineage.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET); fineGrainedLineage.setDownstreamType(FineGrainedLineageDownstreamType.FIELD); - Assert.assertEquals(result.getFineGrainedLineages().get(0), fineGrainedLineage); + assertEquals(result.getFineGrainedLineages().get(0), fineGrainedLineage); // Test non-overwrite upstreams and correct confidence score and types w/ overwrite - ObjectNode finegrainedLineageNode2 = instance.objectNode(); - finegrainedLineageNode2.set( - "upstreamType", instance.textNode(FineGrainedLineageUpstreamType.FIELD_SET.name())); - finegrainedLineageNode2.set("confidenceScore", upstreamConfidenceScore); - finegrainedLineageNode2.set( - "downstreamType", instance.textNode(FineGrainedLineageDownstreamType.FIELD.name())); - JsonPatchOperation operation2 = - new AddOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"), - finegrainedLineageNode2); - NumericNode upstreamConfidenceScore2 = instance.numberNode(0.1f); - ObjectNode finegrainedLineageNode3 = instance.objectNode(); - finegrainedLineageNode3.set( - "upstreamType", instance.textNode(FineGrainedLineageUpstreamType.DATASET.name())); - finegrainedLineageNode3.set("confidenceScore", upstreamConfidenceScore2); - finegrainedLineageNode3.set( - "downstreamType", instance.textNode(FineGrainedLineageDownstreamType.FIELD_SET.name())); - JsonPatchOperation operation3 = - new AddOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"), - finegrainedLineageNode3); - List patchOperations2 = new ArrayList<>(); - patchOperations2.add(operation2); - patchOperations2.add(operation3); - JsonPatch jsonPatch2 = new JsonPatch(patchOperations2); + JsonObjectBuilder finegrainedLineageNode2 = Json.createObjectBuilder(); + finegrainedLineageNode2.add( + "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.FIELD_SET.name())); + finegrainedLineageNode2.add("confidenceScore", upstreamConfidenceScore); + finegrainedLineageNode2.add( + "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD.name())); + + JsonPatchBuilder patchOperations2 = Json.createPatchBuilder(); + patchOperations2.add( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)", + finegrainedLineageNode2.build()); + + JsonValue upstreamConfidenceScore2 = Json.createValue(0.1f); + JsonObjectBuilder finegrainedLineageNode3 = Json.createObjectBuilder(); + finegrainedLineageNode3.add( + "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.DATASET.name())); + finegrainedLineageNode3.add("confidenceScore", upstreamConfidenceScore2); + finegrainedLineageNode3.add( + "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD_SET.name())); + + patchOperations2.add( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)", + finegrainedLineageNode3.build()); + + JsonPatch jsonPatch2 = patchOperations2.build(); + UpstreamLineage result2 = upstreamLineageTemplate.applyPatch(result, jsonPatch2); // Hack because Jackson parses values to doubles instead of floats DataMap dataMap2 = new DataMap(); @@ -112,23 +115,22 @@ public void testPatchUpstream() throws Exception { fineGrainedLineage2.setUpstreamType(FineGrainedLineageUpstreamType.DATASET); fineGrainedLineage2.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET); fineGrainedLineage2.setQuery(UrnUtils.getUrn("urn:li:query:someQuery")); - Assert.assertEquals(result2.getFineGrainedLineages().get(1), fineGrainedLineage2); + assertEquals(result2.getFineGrainedLineages().get(1), fineGrainedLineage2); // Check different queries - ObjectNode finegrainedLineageNode4 = instance.objectNode(); - finegrainedLineageNode4.set( - "upstreamType", instance.textNode(FineGrainedLineageUpstreamType.FIELD_SET.name())); - finegrainedLineageNode4.set("confidenceScore", upstreamConfidenceScore); - finegrainedLineageNode4.set( - "downstreamType", instance.textNode(FineGrainedLineageDownstreamType.FIELD.name())); - JsonPatchOperation operation4 = - new AddOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"), - finegrainedLineageNode4); - List patchOperations3 = new ArrayList<>(); - patchOperations3.add(operation4); - JsonPatch jsonPatch3 = new JsonPatch(patchOperations3); + JsonObjectBuilder finegrainedLineageNode4 = Json.createObjectBuilder(); + finegrainedLineageNode4.add( + "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.FIELD_SET.name())); + finegrainedLineageNode4.add("confidenceScore", upstreamConfidenceScore); + finegrainedLineageNode4.add( + "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD.name())); + + JsonPatchBuilder patchOperations3 = Json.createPatchBuilder(); + patchOperations3.add( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)", + finegrainedLineageNode4.build()); + + JsonPatch jsonPatch3 = patchOperations3.build(); UpstreamLineage result3 = upstreamLineageTemplate.applyPatch(result2, jsonPatch3); // Hack because Jackson parses values to doubles instead of floats DataMap dataMap3 = new DataMap(); @@ -152,23 +154,22 @@ public void testPatchUpstream() throws Exception { fineGrainedLineage3.setDownstreamType(FineGrainedLineageDownstreamType.FIELD); fineGrainedLineage3.setQuery(UrnUtils.getUrn("urn:li:query:anotherQuery")); // Splits into two for different types - Assert.assertEquals(result3.getFineGrainedLineages().get(2), fineGrainedLineage3); + assertEquals(result3.getFineGrainedLineages().get(2), fineGrainedLineage3); // Check different transform types - ObjectNode finegrainedLineageNode5 = instance.objectNode(); - finegrainedLineageNode5.set( - "upstreamType", instance.textNode(FineGrainedLineageUpstreamType.FIELD_SET.name())); - finegrainedLineageNode5.set("confidenceScore", upstreamConfidenceScore); - finegrainedLineageNode5.set( - "downstreamType", instance.textNode(FineGrainedLineageDownstreamType.FIELD.name())); - JsonPatchOperation operation5 = - new AddOperation( - new JsonPointer( - "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"), - finegrainedLineageNode5); - List patchOperations4 = new ArrayList<>(); - patchOperations4.add(operation5); - JsonPatch jsonPatch4 = new JsonPatch(patchOperations4); + JsonObjectBuilder finegrainedLineageNode5 = Json.createObjectBuilder(); + finegrainedLineageNode5.add( + "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.FIELD_SET.name())); + finegrainedLineageNode5.add("confidenceScore", upstreamConfidenceScore); + finegrainedLineageNode5.add( + "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD.name())); + + JsonPatchBuilder patchOperations4 = Json.createPatchBuilder(); + patchOperations4.add( + "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)", + finegrainedLineageNode5.build()); + JsonPatch jsonPatch4 = patchOperations4.build(); + UpstreamLineage result4 = upstreamLineageTemplate.applyPatch(result3, jsonPatch4); // Hack because Jackson parses values to doubles instead of floats DataMap dataMap4 = new DataMap(); @@ -181,33 +182,76 @@ public void testPatchUpstream() throws Exception { fineGrainedLineage4.setDownstreamType(FineGrainedLineageDownstreamType.FIELD); fineGrainedLineage4.setQuery(UrnUtils.getUrn("urn:li:query:anotherQuery")); // New entry in array because of new transformation type - Assert.assertEquals(result4.getFineGrainedLineages().get(3), fineGrainedLineage4); + assertEquals(result4.getFineGrainedLineages().get(3), fineGrainedLineage4); // Remove - JsonPatchOperation removeOperation = - new RemoveOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)")); - JsonPatchOperation removeOperation2 = - new RemoveOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)")); - JsonPatchOperation removeOperation3 = - new RemoveOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)")); - JsonPatchOperation removeOperation4 = - new RemoveOperation( - new JsonPointer( - "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)")); - - List removeOperations = new ArrayList<>(); - removeOperations.add(removeOperation); - removeOperations.add(removeOperation2); - removeOperations.add(removeOperation3); - removeOperations.add(removeOperation4); - JsonPatch removePatch = new JsonPatch(removeOperations); + JsonPatchBuilder removeOperations = Json.createPatchBuilder(); + removeOperations.remove( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)"); + removeOperations.remove( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"); + removeOperations.remove( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"); + removeOperations.remove( + "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"); + + JsonPatch removePatch = removeOperations.build(); UpstreamLineage finalResult = upstreamLineageTemplate.applyPatch(result4, removePatch); - Assert.assertEquals(finalResult, upstreamLineageTemplate.getDefault()); + assertEquals(finalResult, upstreamLineageTemplate.getDefault()); + } + + @Test + public void testLargePatchStandard() throws Exception { + // Load patch operations from fixture + String patchStr = + OBJECT_MAPPER + .readTree( + new GzipCompressorInputStream( + this.getClass() + .getResourceAsStream("/patch/large_upstream_lineage_mcp.json.gz"))) + .get("aspect") + .get("com.linkedin.pegasus2avro.mxe.GenericAspect") + .get("value") + .asText(); + + JsonPatchBuilder patchBuilder = + Json.createPatchBuilder(Json.createReader(new StringReader(patchStr)).readArray()); + + // Overall the patch is a no-op, adding change to assert difference after application + patchBuilder.remove( + "/upstreams/urn:li:dataset:(urn:li:dataPlatform:snowflake,road_curated_nrt.db_3134_dbo.lineitem,PROD)"); + + JsonPatch jsonPatch = patchBuilder.build(); + assertEquals(jsonPatch.toJsonArray().size(), 7491); + + // Load existing aspect + String aspectStr = + OBJECT_MAPPER + .readTree( + new GzipCompressorInputStream( + this.getClass() + .getResourceAsStream("/patch/large_upstream_lineage_aspect.json.gz"))) + .get("select") + .get(0) + .get("metadata") + .asText(); + UpstreamLineage upstreamLineage = + GenericRecordUtils.deserializeAspect( + ByteString.copyString(aspectStr, StandardCharsets.UTF_8), JSON, UpstreamLineage.class); + assertEquals(upstreamLineage.getUpstreams().size(), 188); + assertEquals(upstreamLineage.getFineGrainedLineages().size(), 607); + + // Apply patch standard + UpstreamLineageTemplate upstreamLineageTemplate = new UpstreamLineageTemplate(); + + long start = System.currentTimeMillis(); + UpstreamLineage result = upstreamLineageTemplate.applyPatch(upstreamLineage, jsonPatch); + long end = System.currentTimeMillis(); + assertTrue( + end - start < 10000, + String.format("Expected less then 10 seconds patch actual %s ms", end - start)); + + assertEquals(result.getUpstreams().size(), 187, "Expected 1 less upstream"); + assertEquals(result.getFineGrainedLineages().size(), 607); } } diff --git a/entity-registry/src/test/resources/patch/large_upstream_lineage_aspect.json.gz b/entity-registry/src/test/resources/patch/large_upstream_lineage_aspect.json.gz new file mode 100644 index 00000000000000..618e84efe274a9 Binary files /dev/null and b/entity-registry/src/test/resources/patch/large_upstream_lineage_aspect.json.gz differ diff --git a/entity-registry/src/test/resources/patch/large_upstream_lineage_mcp.json.gz b/entity-registry/src/test/resources/patch/large_upstream_lineage_mcp.json.gz new file mode 100644 index 00000000000000..0c9fa4f0f359c3 Binary files /dev/null and b/entity-registry/src/test/resources/patch/large_upstream_lineage_mcp.json.gz differ diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index 90167126bc349c..065e9454c5d9e0 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -42,7 +42,7 @@ def get_long_description(): # We remain restrictive on the versions allowed here to prevent # us from being broken by backwards-incompatible changes in the # underlying package. - "openlineage-airflow>=1.2.0,<=1.7.0", + "openlineage-airflow>=1.2.0,<=1.12.0", }, } diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py index cdba268eed56b7..d67754605c71be 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py @@ -420,6 +420,7 @@ def run_datajob( config: Optional[DatahubLineageConfig] = None, ) -> DataProcessInstance: if datajob is None: + assert ti.task is not None datajob = AirflowGenerator.generate_datajob( cluster, ti.task, dag, config=config ) @@ -509,6 +510,7 @@ def complete_datajob( :return: DataProcessInstance """ if datajob is None: + assert ti.task is not None datajob = AirflowGenerator.generate_datajob( cluster, ti.task, dag, config=config ) @@ -530,6 +532,7 @@ def complete_datajob( f"Result should be either success or failure and it was {ti.state}" ) + assert datajob is not None dpi = DataProcessInstance.from_datajob( datajob=datajob, id=f"{dag.dag_id}_{ti.task_id}_{dag_run.run_id}", diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index cf55d5347a3389..15f76a8b1e1d09 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -360,6 +360,7 @@ def on_task_instance_running( # The type ignore is to placate mypy on Airflow 2.1.x. dagrun: "DagRun" = task_instance.dag_run # type: ignore[attr-defined] task = task_instance.task + assert task is not None dag: "DAG" = task.dag # type: ignore[assignment] self._task_holder.set_task(task_instance) @@ -447,6 +448,7 @@ def on_task_instance_finish( ) -> None: dagrun: "DagRun" = task_instance.dag_run # type: ignore[attr-defined] task = self._task_holder.get_task(task_instance) or task_instance.task + assert task is not None dag: "DAG" = task.dag # type: ignore[assignment] datajob = AirflowGenerator.generate_datajob( diff --git a/metadata-ingestion/cli-ingestion.md b/metadata-ingestion/cli-ingestion.md index 48cc4ef09db910..b15dd2a5019959 100644 --- a/metadata-ingestion/cli-ingestion.md +++ b/metadata-ingestion/cli-ingestion.md @@ -25,10 +25,10 @@ Check out the [alternative installation options](../docs/cli.md#alternate-instal ## Configuring a Recipe -Create a recipe.yml file that defines the source and sink for metadata, as shown below. +Create a `recipe.yml` file that defines the source and sink for metadata, as shown below. ```yaml -# my_reipe.yml +# recipe.yml source: type: config: @@ -48,7 +48,7 @@ For more information and examples on configuring recipes, please refer to [Recip You can run ingestion using `datahub ingest` like below. ```shell -datahub ingest -c +datahub ingest -c ``` ## Reference diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index d6f63ab385f521..6fbc56c57d4f1f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -1004,6 +1004,30 @@ def _create_upstream_table_lineage( env=self.config.env, ) + if not upstream_tables: + # Tableau's metadata graphql API sometimes returns an empty list for upstreamTables + # for embedded datasources. However, the upstreamColumns field often includes information. + # This attempts to populate upstream table information from the upstreamColumns field. + table_id_to_urn = { + column[c.TABLE][c.ID]: builder.make_dataset_urn_with_platform_instance( + self.platform, + column[c.TABLE][c.ID], + self.config.platform_instance, + self.config.env, + ) + for field in datasource.get(c.FIELDS, []) + for column in field.get(c.UPSTREAM_COLUMNS, []) + if column.get(c.TABLE, {}).get(c.TYPE_NAME) == c.CUSTOM_SQL_TABLE + and column.get(c.TABLE, {}).get(c.ID) + } + fine_grained_lineages = self.get_upstream_columns_of_fields_in_datasource( + datasource, datasource_urn, table_id_to_urn + ) + upstream_tables = [ + Upstream(dataset=table_urn, type=DatasetLineageType.TRANSFORMED) + for table_urn in table_id_to_urn.values() + ] + if datasource.get(c.FIELDS): if self.config.extract_column_level_lineage: # Find fine grained lineage for datasource column to datasource column edge, diff --git a/metadata-ingestion/src/datahub/utilities/urn_encoder.py b/metadata-ingestion/src/datahub/utilities/urn_encoder.py index b39dd043706825..88c0a128b8e468 100644 --- a/metadata-ingestion/src/datahub/utilities/urn_encoder.py +++ b/metadata-ingestion/src/datahub/utilities/urn_encoder.py @@ -15,7 +15,10 @@ def encode_string_array(arr: List[str]) -> List[str]: @staticmethod def encode_string(s: str) -> str: - return "".join([UrnEncoder.encode_char(c) for c in s]) + if not UrnEncoder.contains_reserved_char(s): + # Fast path for the common case, where no encoding is needed. + return s + return "".join(UrnEncoder.encode_char(c) for c in s) @staticmethod def encode_char(c: str) -> str: diff --git a/metadata-ingestion/tests/integration/tableau/setup/embeddedDatasourcesConnection_all.json b/metadata-ingestion/tests/integration/tableau/setup/embeddedDatasourcesConnection_all.json index bdb5013c2ea486..a558eea2e8dd86 100644 --- a/metadata-ingestion/tests/integration/tableau/setup/embeddedDatasourcesConnection_all.json +++ b/metadata-ingestion/tests/integration/tableau/setup/embeddedDatasourcesConnection_all.json @@ -12889,6 +12889,76 @@ "username": "jawadqu@gmail.com" } } + }, + { + "__typename": "EmbeddedDatasource", + "id": "5449c627-7462-4ef7-b492-bda46be068e3", + "name": "New DataSource", + "hasExtracts": true, + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastIncrementalUpdateTime": null, + "extractLastUpdateTime": "2018-02-09T00:05:25Z", + "upstreamDatabases": [], + "upstreamTables": [], + "downstreamSheets": [], + "fields": [ + { + "__typename": "ColumnField", + "id": "55a70afe-3e54-492d-80c8-744601ae78cc", + "name": "Program ID", + "description": null, + "isHidden": false, + "folderName": null, + "dataCategory": "NOMINAL", + "role": "DIMENSION", + "dataType": "STRING", + "defaultFormat": null, + "aggregation": null, + "upstreamFields": [], + "upstreamColumns": [ + { + "name": "id", + "table": { + "__typename": "CustomSQLTable", + "id": "c7dd65fb-6e7e-4091-bbde-8c78b34a40f8" + } + } + ] + }, + { + "__typename": "ColumnField", + "id": "636b9454-8786-4773-b94b-8e8f2db7e1a3", + "name": "Name", + "description": null, + "isHidden": false, + "folderName": null, + "dataCategory": "NOMINAL", + "role": "DIMENSION", + "dataType": "STRING", + "defaultFormat": null, + "aggregation": null, + "upstreamFields": [], + "upstreamColumns": [ + { + "name": "name", + "table": { + "__typename": "CustomSQLTable", + "id": "c7dd65fb-6e7e-4091-bbde-8c78b34a40f8" + } + } + ] + } + ], + "upstreamDatasources": [], + "workbook": { + "id": "bd040833-8f66-22c0-1b51-bd4ccf5eef7c", + "name": "Workbook published ds", + "projectName": "default", + "luid": "a059a443-7634-4abf-9e46-d147b99168be", + "owner": { + "username": "jawadqu@gmail.com" + } + } } ], "pageInfo": { diff --git a/metadata-ingestion/tests/integration/tableau/tableau_cll_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_cll_mces_golden.json index 3f481207a03ea1..d8c27057872c8b 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_cll_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_cll_mces_golden.json @@ -31608,6 +31608,234 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Program ID)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/tableau/default/Workbook published ds" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "hasExtracts": "True", + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastUpdateTime": "2018-02-09T00:05:25Z" + }, + "name": "New DataSource", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test", + "platform": "urn:li:dataPlatform:tableau", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Program ID", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "Name", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Embedded Data Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", @@ -43142,6 +43370,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,618c87db-5959-338b-bcc7-6f5f4cc0b6c6,PROD)", @@ -44718,6 +44962,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/tableau_extract_all_project_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_extract_all_project_mces_golden.json index 63796970b27c12..250c43b7fc2da6 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_extract_all_project_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_extract_all_project_mces_golden.json @@ -31853,6 +31853,234 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Program ID)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/tableau/default/Workbook published ds" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "hasExtracts": "True", + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastUpdateTime": "2018-02-09T00:05:25Z" + }, + "name": "New DataSource", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test", + "platform": "urn:li:dataPlatform:tableau", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Program ID", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "Name", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Embedded Data Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", @@ -43302,6 +43530,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,618c87db-5959-338b-bcc7-6f5f4cc0b6c6,PROD)", @@ -44924,6 +45168,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/tableau_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_mces_golden.json index 5308f5daebea69..c5417a8d212bc3 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_mces_golden.json @@ -31608,6 +31608,234 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Program ID)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/tableau/default/Workbook published ds" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "hasExtracts": "True", + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastUpdateTime": "2018-02-09T00:05:25Z" + }, + "name": "New DataSource", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test", + "platform": "urn:li:dataPlatform:tableau", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Program ID", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "Name", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Embedded Data Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", @@ -43057,6 +43285,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,618c87db-5959-338b-bcc7-6f5f4cc0b6c6,PROD)", @@ -44633,6 +44877,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/tableau_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/tableau/tableau_mces_golden_deleted_stateful.json index b39f7a181f01bc..8d7be1c4d0033b 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_mces_golden_deleted_stateful.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_mces_golden_deleted_stateful.json @@ -577,6 +577,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": true + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,demo_postgres_instance.dvdrental.public.actor,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/tableau_nested_project_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_nested_project_mces_golden.json index 17a7ed91ff36e5..5e46b91c207a7c 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_nested_project_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_nested_project_mces_golden.json @@ -31853,6 +31853,234 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Program ID)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/tableau/default/Workbook published ds" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "hasExtracts": "True", + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastUpdateTime": "2018-02-09T00:05:25Z" + }, + "name": "New DataSource", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test", + "platform": "urn:li:dataPlatform:tableau", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Program ID", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "Name", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Embedded Data Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", @@ -43272,6 +43500,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,618c87db-5959-338b-bcc7-6f5f4cc0b6c6,PROD)", @@ -44894,6 +45138,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/tableau_signout_timeout_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_signout_timeout_mces_golden.json index 5308f5daebea69..c5417a8d212bc3 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_signout_timeout_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_signout_timeout_mces_golden.json @@ -31608,6 +31608,234 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Program ID)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/tableau/default/Workbook published ds" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "hasExtracts": "True", + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastUpdateTime": "2018-02-09T00:05:25Z" + }, + "name": "New DataSource", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test", + "platform": "urn:li:dataPlatform:tableau", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Program ID", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "Name", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Embedded Data Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", @@ -43057,6 +43285,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,618c87db-5959-338b-bcc7-6f5f4cc0b6c6,PROD)", @@ -44633,6 +44877,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/tableau_with_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_with_platform_instance_mces_golden.json index cd957fe388926c..5dc8fd9522da4f 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_with_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_with_platform_instance_mces_golden.json @@ -12801,7 +12801,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.10c6297d-0dbd-44f1-b1ba-458bea446513,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.22b0b4c3-6b85-713d-a161-5a87fdd78f40,PROD)", "type": "TRANSFORMED" }, { @@ -12809,7 +12809,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.22b0b4c3-6b85-713d-a161-5a87fdd78f40,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.10c6297d-0dbd-44f1-b1ba-458bea446513,PROD)", "type": "TRANSFORMED" } ], @@ -31740,6 +31740,239 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD),Name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD),Program ID)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:tableau", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/tableau/acryl_site1/default/Workbook published ds" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "hasExtracts": "True", + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastUpdateTime": "2018-02-09T00:05:25Z" + }, + "name": "New DataSource", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test", + "platform": "urn:li:dataPlatform:tableau", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Program ID", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "Name", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Embedded Data Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ba8a5ac7eb4c6e5edc9b03bf8891be55" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, + { + "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", + "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" + }, + { + "id": "urn:li:container:ba8a5ac7eb4c6e5edc9b03bf8891be55", + "urn": "urn:li:container:ba8a5ac7eb4c6e5edc9b03bf8891be55" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", @@ -43350,6 +43583,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.618c87db-5959-338b-bcc7-6f5f4cc0b6c6,PROD)", @@ -45080,6 +45329,35 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, + { + "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", + "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" + }, + { + "id": "urn:li:container:ba8a5ac7eb4c6e5edc9b03bf8891be55", + "urn": "urn:li:container:ba8a5ac7eb4c6e5edc9b03bf8891be55" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 2b122897a333ff..36e7af700589c8 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -675,7 +675,7 @@ def test_tableau_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph) state1.get_urns_not_in(type="dataset", other_checkpoint_state=state2) ) - assert len(difference_dataset_urns) == 34 + assert len(difference_dataset_urns) == 35 deleted_dataset_urns = [ "urn:li:dataset:(urn:li:dataPlatform:tableau,dfe2c02a-54b7-f7a2-39fc-c651da2f6ad8,PROD)", "urn:li:dataset:(urn:li:dataPlatform:tableau,d00f4ba6-707e-4684-20af-69eb47587cc2,PROD)", @@ -711,6 +711,7 @@ def test_tableau_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph) "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.people,PROD)", "urn:li:dataset:(urn:li:dataPlatform:webdata-direct:servicenowitsm-servicenowitsm,ven01911.sc_cat_item,PROD)", "urn:li:dataset:(urn:li:dataPlatform:tableau,10c6297d-0dbd-44f1-b1ba-458bea446513,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", ] assert sorted(deleted_dataset_urns) == sorted(difference_dataset_urns) diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle index 2328697632434b..53c2a338289079 100644 --- a/metadata-integration/java/datahub-client/build.gradle +++ b/metadata-integration/java/datahub-client/build.gradle @@ -118,7 +118,8 @@ shadowJar { relocate 'ch.randelshofer', 'datahub.shaded.ch.randelshofer' relocate 'io.github.classgraph', 'datahub.shaded.io.github.classgraph' relocate 'nonapi.io.github.classgraph', 'datahub.shaded.nonapi.io.github.classgraph' - relocate 'com.github.fge', 'datahub.shaded.com.github.fge' + relocate 'org.eclipse.parsson', 'datahub.shaded.parsson' + relocate 'jakarta.json', 'datahub.shaded.json' finalizedBy checkShadowJar } diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 532395f158c020..49d7610dc3c890 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -25,7 +25,8 @@ dependencies { implementation externalDependency.guava implementation externalDependency.reflections - implementation externalDependency.jsonPatch + + implementation 'com.github.java-json-tools:json-patch:1.13' // TODO: Replace with jakarta.json api(externalDependency.dgraph4j) { exclude group: 'com.google.guava', module: 'guava' exclude group: 'io.grpc', module: 'grpc-protobuf' diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java index e84a7e8a0ab510..a11e01a56c96f2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java @@ -3,7 +3,6 @@ import static com.linkedin.metadata.entity.AspectUtils.validateAspect; import com.datahub.util.exception.ModelConversionException; -import com.github.fge.jsonpatch.JsonPatchException; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; @@ -54,7 +53,7 @@ public static ChangeItemImpl fromPatch( try { builder.recordTemplate(genericPatchTemplate.applyPatch(currentValue)); - } catch (JsonPatchException | IOException e) { + } catch (IOException e) { throw new RuntimeException(e); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java index cf9c3978e3a374..0efa45d121f2ae 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java @@ -8,9 +8,6 @@ import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; -import com.github.fge.jsonpatch.JsonPatch; -import com.github.fge.jsonpatch.JsonPatchException; -import com.github.fge.jsonpatch.Patch; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; @@ -27,7 +24,9 @@ import com.linkedin.metadata.utils.SystemMetadataUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; -import java.io.IOException; +import jakarta.json.Json; +import jakarta.json.JsonPatch; +import java.io.StringReader; import java.nio.charset.StandardCharsets; import java.util.Objects; import javax.annotation.Nonnull; @@ -59,7 +58,7 @@ public class PatchItemImpl implements PatchMCP { private final SystemMetadata systemMetadata; private final AuditStamp auditStamp; - private final Patch patch; + private final JsonPatch patch; private final MetadataChangeProposal metadataChangeProposal; @@ -108,7 +107,7 @@ public ChangeItemImpl applyPatch(RecordTemplate recordTemplate, AspectRetriever try { builder.recordTemplate( aspectTemplateEngine.applyPatch(currentValue, getPatch(), getAspectSpec())); - } catch (JsonProcessingException | JsonPatchException e) { + } catch (JsonProcessingException e) { throw new RuntimeException(e); } @@ -178,12 +177,14 @@ public static PatchItemImpl build( .build(entityRegistry); } - private static Patch convertToJsonPatch(MetadataChangeProposal mcp) { + private static JsonPatch convertToJsonPatch(MetadataChangeProposal mcp) { JsonNode json; try { - json = OBJECT_MAPPER.readTree(mcp.getAspect().getValue().asString(StandardCharsets.UTF_8)); - return JsonPatch.fromJson(json); - } catch (IOException e) { + return Json.createPatch( + Json.createReader( + new StringReader(mcp.getAspect().getValue().asString(StandardCharsets.UTF_8))) + .readArray()); + } catch (RuntimeException e) { throw new IllegalArgumentException("Invalid JSON Patch: " + mcp.getAspect().getValue(), e); } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index adc3164c03b11a..b8166d50cc4d08 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -677,8 +677,7 @@ private ChangeMCP toUpsertItem( @Nonnull AspectSpec aspectSpec, @Nullable RecordTemplate currentValue, @Nonnull GenericPatchTemplate genericPatchTemplate, - @Nonnull Actor actor) - throws URISyntaxException { + @Nonnull Actor actor) { return ChangeItemImpl.fromPatch( urn, aspectSpec, diff --git a/metadata-service/services/build.gradle b/metadata-service/services/build.gradle index 9ec523bfd1e218..ea1ff32cb3838b 100644 --- a/metadata-service/services/build.gradle +++ b/metadata-service/services/build.gradle @@ -8,7 +8,7 @@ configurations { } dependencies { - implementation externalDependency.jsonPatch + implementation 'com.github.java-json-tools:json-patch:1.13' // TODO: Replace with jakarta.json implementation project(':entity-registry') implementation project(':metadata-utils') implementation project(':metadata-events:mxe-avro') diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java index 5d956c2c8ffad4..c44cb4eaa1ac3b 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java @@ -14,6 +14,8 @@ @ComponentScan( basePackages = { "com.linkedin.metadata.boot", + "com.linkedin.metadata.service", + "com.datahub.event", "com.linkedin.gms.factory.config", "com.linkedin.gms.factory.entityregistry", "com.linkedin.gms.factory.common", @@ -34,7 +36,7 @@ "com.linkedin.gms.factory.auth", "com.linkedin.gms.factory.search", "com.linkedin.gms.factory.secret", - "com.linkedin.gms.factory.timeseries" + "com.linkedin.gms.factory.timeseries", }) @PropertySource(value = "classpath:/application.yaml", factory = YamlPropertySourceFactory.class) @Configuration