From 6aa6a8f639c5531011a9b2411699b38a0cdef4b4 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 1 Apr 2024 11:59:04 -0500 Subject: [PATCH 01/17] test(graph): refactor graph test (#10175) --- .../metadata/graph/GraphServiceTestBase.java | 409 +++++++++++------- .../graph/GraphServiceTestBaseNoVia.java | 2 +- .../graph/dgraph/DgraphGraphServiceTest.java | 2 +- .../graph/neo4j/Neo4jGraphServiceTest.java | 104 ++--- .../search/SearchGraphServiceTestBase.java | 86 ++-- 5 files changed, 346 insertions(+), 257 deletions(-) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java index 1fa89ec99973fd..38c9136113dbb0 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java @@ -31,8 +31,10 @@ import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.stream.Collectors; @@ -40,8 +42,6 @@ import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.Assert; import org.testng.annotations.BeforeMethod; @@ -81,37 +81,114 @@ public int compare(RelatedEntity left, RelatedEntity right) { protected static String userType = "user"; /** Some test datasets. */ - protected static String datasetOneUrnString = - "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDatasetOne,PROD)"; - - protected static String datasetTwoUrnString = - "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDatasetTwo,PROD)"; - protected static String datasetThreeUrnString = - "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDatasetThree,PROD)"; - protected static String datasetFourUrnString = - "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDatasetFour,PROD)"; - protected static String datasetFiveUrnString = - "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDatasetFive,PROD)"; - - protected static final String schemaFieldUrnOneString = - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:type,SampleDatasetFive,PROD),fieldOne)"; - protected static final String schemaFieldUrnTwoString = - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:type,SampleDatasetFour,PROD),fieldTwo)"; - - protected static final String lifeCycleOwnerOneString = + protected static String dataset1UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset1,PROD)"; + + protected static String dataset2UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset2,PROD)"; + protected static String dataset3UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset3,PROD)"; + protected static String dataset4UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset4,PROD)"; + protected static String dataset5UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset5,PROD)"; + + protected static String dataset6UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset6,PROD)"; + + protected static String dataset7UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset7,PROD)"; + + protected static String dataset8UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset8,PROD)"; + protected static String dataset9UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset9,PROD)"; + protected static String dataset10UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset10,PROD)"; + protected static String dataset11UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset11,PROD)"; + protected static String dataset12UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset12,PROD)"; + protected static String dataset13UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset13,PROD)"; + protected static String dataset14UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset14,PROD)"; + protected static String dataset15UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset15,PROD)"; + protected static String dataset16UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset16,PROD)"; + protected static String dataset17UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset17,PROD)"; + protected static String dataset18UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset18,PROD)"; + + protected static String dataset19UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset19,PROD)"; + + protected static String dataset20UrnString = + "urn:li:" + datasetType + ":(urn:li:dataPlatform:type,SampleDataset20,PROD)"; + + protected static Urn dataset1Urn = createFromString(dataset1UrnString); + protected static Urn dataset2Urn = createFromString(dataset2UrnString); + protected static Urn dataset3Urn = createFromString(dataset3UrnString); + protected static Urn dataset4Urn = createFromString(dataset4UrnString); + protected static Urn dataset5Urn = createFromString(dataset5UrnString); + protected static Urn dataset6Urn = createFromString(dataset6UrnString); + + protected static Urn dataset7Urn = createFromString(dataset7UrnString); + + protected static Urn dataset8Urn = createFromString(dataset8UrnString); + protected static Urn dataset9Urn = createFromString(dataset9UrnString); + protected static Urn dataset10Urn = createFromString(dataset10UrnString); + protected static Urn dataset11Urn = createFromString(dataset11UrnString); + protected static Urn dataset12Urn = createFromString(dataset12UrnString); + protected static Urn dataset13Urn = createFromString(dataset13UrnString); + protected static Urn dataset14Urn = createFromString(dataset14UrnString); + protected static Urn dataset15Urn = createFromString(dataset15UrnString); + protected static Urn dataset16Urn = createFromString(dataset16UrnString); + protected static Urn dataset17Urn = createFromString(dataset17UrnString); + protected static Urn dataset18Urn = createFromString(dataset18UrnString); + + protected static Urn dataset19Urn = createFromString(dataset19UrnString); + + protected static Urn dataset20Urn = createFromString(dataset20UrnString); + protected static List datasetUrns = + List.of( + dataset1Urn, + dataset2Urn, + dataset3Urn, + dataset4Urn, + dataset5Urn, + dataset6Urn, + dataset7Urn, + dataset8Urn, + dataset9Urn, + dataset10Urn, + dataset11Urn, + dataset12Urn, + dataset13Urn, + dataset14Urn, + dataset15Urn, + dataset16Urn, + dataset17Urn, + dataset18Urn, + dataset19Urn, + dataset20Urn); + + protected static final String schemaFieldUrn1String = + "urn:li:schemaField:(" + dataset5UrnString + ",fieldOne)"; + protected static final String schemaFieldUrn2String = + "urn:li:schemaField:(" + dataset4UrnString + ",fieldTwo)"; + + protected static final String lifeCycleOwner1String = "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)"; - protected static final String lifeCycleOwnerTwoString = + protected static final String lifeCycleOwner2String = "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"; - protected static Urn datasetOneUrn = createFromString(datasetOneUrnString); - protected static Urn datasetTwoUrn = createFromString(datasetTwoUrnString); - protected static Urn datasetThreeUrn = createFromString(datasetThreeUrnString); - protected static Urn datasetFourUrn = createFromString(datasetFourUrnString); - protected static Urn datasetFiveUrn = createFromString(datasetFiveUrnString); - protected static final Urn schemaFieldUrnOne = createFromString(schemaFieldUrnOneString); - protected static final Urn schemaFieldUrnTwo = createFromString(schemaFieldUrnTwoString); - protected static final Urn lifeCycleOwnerOne = createFromString(lifeCycleOwnerOneString); - protected static final Urn lifeCycleOwnerTwo = createFromString(lifeCycleOwnerTwoString); + protected static final Urn schemaFieldUrnOne = createFromString(schemaFieldUrn1String); + protected static final Urn schemaFieldUrnTwo = createFromString(schemaFieldUrn2String); + protected static final Urn lifeCycleOwnerOne = createFromString(lifeCycleOwner1String); + protected static final Urn lifeCycleOwnerTwo = createFromString(lifeCycleOwner2String); protected static String unknownUrnString = "urn:li:unknown:(urn:li:unknown:Unknown)"; @@ -146,31 +223,31 @@ public int compare(RelatedEntity left, RelatedEntity right) { /** Some expected related entities. */ protected static RelatedEntity downstreamOfDatasetOneRelatedEntity = - new RelatedEntity(downstreamOf, datasetOneUrnString); + new RelatedEntity(downstreamOf, dataset1UrnString); protected static RelatedEntity downstreamOfDatasetTwoRelatedEntity = - new RelatedEntity(downstreamOf, datasetTwoUrnString); + new RelatedEntity(downstreamOf, dataset2UrnString); protected static RelatedEntity downstreamOfDatasetThreeRelatedEntity = - new RelatedEntity(downstreamOf, datasetThreeUrnString); + new RelatedEntity(downstreamOf, dataset3UrnString); protected static RelatedEntity downstreamOfDatasetFourRelatedEntity = - new RelatedEntity(downstreamOf, datasetFourUrnString); + new RelatedEntity(downstreamOf, dataset4UrnString); protected static final RelatedEntity downstreamOfSchemaFieldOneVia = - new RelatedEntity(downstreamOf, schemaFieldUrnOneString, lifeCycleOwnerOneString); + new RelatedEntity(downstreamOf, schemaFieldUrn1String, lifeCycleOwner1String); protected static final RelatedEntity downstreamOfSchemaFieldOne = - new RelatedEntity(downstreamOf, schemaFieldUrnOneString); + new RelatedEntity(downstreamOf, schemaFieldUrn1String); protected static final RelatedEntity downstreamOfSchemaFieldTwoVia = - new RelatedEntity(downstreamOf, schemaFieldUrnTwoString, lifeCycleOwnerOneString); + new RelatedEntity(downstreamOf, schemaFieldUrn2String, lifeCycleOwner1String); protected static final RelatedEntity downstreamOfSchemaFieldTwo = - new RelatedEntity(downstreamOf, schemaFieldUrnTwoString); + new RelatedEntity(downstreamOf, schemaFieldUrn2String); protected static RelatedEntity hasOwnerDatasetOneRelatedEntity = - new RelatedEntity(hasOwner, datasetOneUrnString); + new RelatedEntity(hasOwner, dataset1UrnString); protected static RelatedEntity hasOwnerDatasetTwoRelatedEntity = - new RelatedEntity(hasOwner, datasetTwoUrnString); + new RelatedEntity(hasOwner, dataset2UrnString); protected static RelatedEntity hasOwnerDatasetThreeRelatedEntity = - new RelatedEntity(hasOwner, datasetThreeUrnString); + new RelatedEntity(hasOwner, dataset3UrnString); protected static RelatedEntity hasOwnerDatasetFourRelatedEntity = - new RelatedEntity(hasOwner, datasetFourUrnString); + new RelatedEntity(hasOwner, dataset4UrnString); protected static RelatedEntity hasOwnerUserOneRelatedEntity = new RelatedEntity(hasOwner, userOneUrnString); protected static RelatedEntity hasOwnerUserTwoRelatedEntity = @@ -207,10 +284,10 @@ public void disableAssert() { @Test public void testStaticUrns() { - assertNotNull(datasetOneUrn); - assertNotNull(datasetTwoUrn); - assertNotNull(datasetThreeUrn); - assertNotNull(datasetFourUrn); + assertNotNull(dataset1Urn); + assertNotNull(dataset2Urn); + assertNotNull(dataset3Urn); + assertNotNull(dataset4Urn); assertNotNull(userOneUrn); assertNotNull(userTwoUrn); @@ -260,13 +337,13 @@ protected GraphService getPopulatedGraphService() throws Exception { List edges = Arrays.asList( - new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetFourUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetOneUrn, userOneUrn, hasOwner, null, null, null, null, null), - new Edge(datasetTwoUrn, userOneUrn, hasOwner, null, null, null, null, null), - new Edge(datasetThreeUrn, userTwoUrn, hasOwner, null, null, null, null, null), - new Edge(datasetFourUrn, userTwoUrn, hasOwner, null, null, null, null, null), + new Edge(dataset2Urn, dataset1Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset3Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset4Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset1Urn, userOneUrn, hasOwner, null, null, null, null, null), + new Edge(dataset2Urn, userOneUrn, hasOwner, null, null, null, null, null), + new Edge(dataset3Urn, userTwoUrn, hasOwner, null, null, null, null, null), + new Edge(dataset4Urn, userTwoUrn, hasOwner, null, null, null, null, null), new Edge(userOneUrn, userTwoUrn, knowsUser, null, null, null, null, null), new Edge(userTwoUrn, userOneUrn, knowsUser, null, null, null, null, null), new Edge( @@ -308,21 +385,21 @@ protected GraphService getLineagePopulatedGraphService(boolean multiPathSearch) List edges = Arrays.asList( - new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetFourUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetOneUrn, userOneUrn, hasOwner, null, null, null, null, null), - new Edge(datasetTwoUrn, userOneUrn, hasOwner, null, null, null, null, null), - new Edge(datasetThreeUrn, userTwoUrn, hasOwner, null, null, null, null, null), - new Edge(datasetFourUrn, userTwoUrn, hasOwner, null, null, null, null, null), + new Edge(dataset2Urn, dataset1Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset3Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset4Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset1Urn, userOneUrn, hasOwner, null, null, null, null, null), + new Edge(dataset2Urn, userOneUrn, hasOwner, null, null, null, null, null), + new Edge(dataset3Urn, userTwoUrn, hasOwner, null, null, null, null, null), + new Edge(dataset4Urn, userTwoUrn, hasOwner, null, null, null, null, null), new Edge(userOneUrn, userTwoUrn, knowsUser, null, null, null, null, null), new Edge(userTwoUrn, userOneUrn, knowsUser, null, null, null, null, null), - new Edge(dataJobOneUrn, datasetOneUrn, consumes, null, null, null, null, null), - new Edge(dataJobOneUrn, datasetTwoUrn, consumes, null, null, null, null, null), - new Edge(dataJobOneUrn, datasetThreeUrn, produces, null, null, null, null, null), - new Edge(dataJobOneUrn, datasetFourUrn, produces, null, null, null, null, null), - new Edge(dataJobTwoUrn, datasetOneUrn, consumes, null, null, null, null, null), - new Edge(dataJobTwoUrn, datasetTwoUrn, consumes, null, null, null, null, null), + new Edge(dataJobOneUrn, dataset1Urn, consumes, null, null, null, null, null), + new Edge(dataJobOneUrn, dataset2Urn, consumes, null, null, null, null, null), + new Edge(dataJobOneUrn, dataset3Urn, produces, null, null, null, null, null), + new Edge(dataJobOneUrn, dataset4Urn, produces, null, null, null, null, null), + new Edge(dataJobTwoUrn, dataset1Urn, consumes, null, null, null, null, null), + new Edge(dataJobTwoUrn, dataset2Urn, consumes, null, null, null, null, null), new Edge(dataJobTwoUrn, dataJobOneUrn, downstreamOf, null, null, null, null, null)); edges.forEach(service::addEdge); @@ -371,22 +448,22 @@ public Object[][] getAddEdgeTests() { new Object[] {Collections.emptyList(), Collections.emptyList(), Collections.emptyList()}, new Object[] { Collections.singletonList( - new Edge(datasetOneUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null)), + new Edge(dataset1Urn, dataset2Urn, downstreamOf, null, null, null, null, null)), Collections.singletonList(downstreamOfDatasetTwoRelatedEntity), Collections.singletonList(downstreamOfDatasetOneRelatedEntity) }, new Object[] { Arrays.asList( - new Edge(datasetOneUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetTwoUrn, datasetThreeUrn, downstreamOf, null, null, null, null, null)), + new Edge(dataset1Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset2Urn, dataset3Urn, downstreamOf, null, null, null, null, null)), Arrays.asList(downstreamOfDatasetTwoRelatedEntity, downstreamOfDatasetThreeRelatedEntity), Arrays.asList(downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity) }, new Object[] { Arrays.asList( - new Edge(datasetOneUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null), - new Edge(datasetOneUrn, userOneUrn, hasOwner, null, null, null, null, null), - new Edge(datasetTwoUrn, userTwoUrn, hasOwner, null, null, null, null, null), + new Edge(dataset1Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset1Urn, userOneUrn, hasOwner, null, null, null, null, null), + new Edge(dataset2Urn, userTwoUrn, hasOwner, null, null, null, null, null), new Edge(userOneUrn, userTwoUrn, knowsUser, null, null, null, null, null)), Arrays.asList( downstreamOfDatasetTwoRelatedEntity, @@ -531,37 +608,36 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { GraphService service = getLineagePopulatedGraphService(); EntityLineageResult upstreamLineage = - service.getLineage(datasetOneUrn, LineageDirection.UPSTREAM, 0, 1000, 1); + service.getLineage(dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 0); assertEquals(upstreamLineage.getRelationships().size(), 0); EntityLineageResult downstreamLineage = - service.getLineage(datasetOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + service.getLineage(dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 3); assertEquals(downstreamLineage.getRelationships().size(), 3); Map relationships = downstreamLineage.getRelationships().stream() .collect(Collectors.toMap(LineageRelationship::getEntity, Function.identity())); - assertTrue(relationships.containsKey(datasetTwoUrn)); - assertEquals(relationships.get(datasetTwoUrn).getType(), downstreamOf); + assertTrue(relationships.containsKey(dataset2Urn)); + assertEquals(relationships.get(dataset2Urn).getType(), downstreamOf); assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getType(), consumes); assertTrue(relationships.containsKey(dataJobTwoUrn)); assertEquals(relationships.get(dataJobTwoUrn).getType(), consumes); - upstreamLineage = service.getLineage(datasetThreeUrn, LineageDirection.UPSTREAM, 0, 1000, 1); + upstreamLineage = service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 2); assertEquals(upstreamLineage.getRelationships().size(), 2); relationships = upstreamLineage.getRelationships().stream() .collect(Collectors.toMap(LineageRelationship::getEntity, Function.identity())); - assertTrue(relationships.containsKey(datasetTwoUrn)); - assertEquals(relationships.get(datasetTwoUrn).getType(), downstreamOf); + assertTrue(relationships.containsKey(dataset2Urn)); + assertEquals(relationships.get(dataset2Urn).getType(), downstreamOf); assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getType(), produces); - downstreamLineage = - service.getLineage(datasetThreeUrn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + downstreamLineage = service.getLineage(dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 0); assertEquals(downstreamLineage.getRelationships().size(), 0); @@ -571,10 +647,10 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { relationships = upstreamLineage.getRelationships().stream() .collect(Collectors.toMap(LineageRelationship::getEntity, Function.identity())); - assertTrue(relationships.containsKey(datasetOneUrn)); - assertEquals(relationships.get(datasetOneUrn).getType(), consumes); - assertTrue(relationships.containsKey(datasetTwoUrn)); - assertEquals(relationships.get(datasetTwoUrn).getType(), consumes); + assertTrue(relationships.containsKey(dataset1Urn)); + assertEquals(relationships.get(dataset1Urn).getType(), consumes); + assertTrue(relationships.containsKey(dataset2Urn)); + assertEquals(relationships.get(dataset2Urn).getType(), consumes); downstreamLineage = service.getLineage(dataJobOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 3); @@ -582,10 +658,10 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { relationships = downstreamLineage.getRelationships().stream() .collect(Collectors.toMap(LineageRelationship::getEntity, Function.identity())); - assertTrue(relationships.containsKey(datasetThreeUrn)); - assertEquals(relationships.get(datasetThreeUrn).getType(), produces); - assertTrue(relationships.containsKey(datasetFourUrn)); - assertEquals(relationships.get(datasetFourUrn).getType(), produces); + assertTrue(relationships.containsKey(dataset3Urn)); + assertEquals(relationships.get(dataset3Urn).getType(), produces); + assertTrue(relationships.containsKey(dataset4Urn)); + assertEquals(relationships.get(dataset4Urn).getType(), produces); assertTrue(relationships.containsKey(dataJobTwoUrn)); assertEquals(relationships.get(dataJobTwoUrn).getType(), downstreamOf); } @@ -594,19 +670,19 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { public Object[][] getFindRelatedEntitiesSourceEntityFilterTests() { return new Object[][] { new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(downstreamOf), outgoingRelationships, Collections.singletonList(downstreamOfDatasetOneRelatedEntity) }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(downstreamOf), incomingRelationships, Arrays.asList(downstreamOfDatasetThreeRelatedEntity, downstreamOfDatasetFourRelatedEntity) }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(downstreamOf), undirectedRelationships, Arrays.asList( @@ -615,19 +691,19 @@ public Object[][] getFindRelatedEntitiesSourceEntityFilterTests() { downstreamOfDatasetFourRelatedEntity) }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(hasOwner), outgoingRelationships, Collections.singletonList(hasOwnerUserOneRelatedEntity) }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(hasOwner), incomingRelationships, Collections.emptyList() }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(hasOwner), undirectedRelationships, Collections.singletonList(hasOwnerUserOneRelatedEntity) @@ -672,19 +748,19 @@ public void testFindRelatedEntitiesSourceEntityFilter( public Object[][] getFindRelatedEntitiesDestinationEntityFilterTests() { return new Object[][] { new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(downstreamOf), outgoingRelationships, Collections.singletonList(downstreamOfDatasetTwoRelatedEntity) }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(downstreamOf), incomingRelationships, Collections.singletonList(downstreamOfDatasetTwoRelatedEntity) }, new Object[] { - newFilter("urn", datasetTwoUrnString), + newFilter("urn", dataset2UrnString), Collections.singletonList(downstreamOf), undirectedRelationships, Collections.singletonList(downstreamOfDatasetTwoRelatedEntity) @@ -1083,8 +1159,7 @@ public void testFindRelatedEntitiesNullSourceType() throws Exception { doTestFindRelatedEntitiesEntityType( anyType, null, downstreamOf, outgoingRelationships, service); - service.addEdge( - new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf, null, null, null, null, null)); + service.addEdge(new Edge(dataset2Urn, dataset1Urn, downstreamOf, null, null, null, null, null)); syncAfterWrite(); doTestFindRelatedEntitiesEntityType( anyType, ImmutableList.of("null"), downstreamOf, outgoingRelationships, service); @@ -1096,7 +1171,7 @@ public void testFindRelatedEntitiesNullSourceType() throws Exception { service, downstreamOfDatasetOneRelatedEntity); - service.addEdge(new Edge(datasetOneUrn, nullUrn, downstreamOf, null, null, null, null, null)); + service.addEdge(new Edge(dataset1Urn, nullUrn, downstreamOf, null, null, null, null, null)); syncAfterWrite(); doTestFindRelatedEntitiesEntityType( anyType, @@ -1128,8 +1203,7 @@ public void testFindRelatedEntitiesNullDestinationType() throws Exception { doTestFindRelatedEntitiesEntityType( anyType, null, downstreamOf, outgoingRelationships, service); - service.addEdge( - new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf, null, null, null, null, null)); + service.addEdge(new Edge(dataset2Urn, dataset1Urn, downstreamOf, null, null, null, null, null)); syncAfterWrite(); doTestFindRelatedEntitiesEntityType( anyType, ImmutableList.of("null"), downstreamOf, outgoingRelationships, service); @@ -1141,7 +1215,7 @@ public void testFindRelatedEntitiesNullDestinationType() throws Exception { service, downstreamOfDatasetOneRelatedEntity); - service.addEdge(new Edge(datasetOneUrn, nullUrn, downstreamOf, null, null, null, null, null)); + service.addEdge(new Edge(dataset1Urn, nullUrn, downstreamOf, null, null, null, null, null)); syncAfterWrite(); doTestFindRelatedEntitiesEntityType( anyType, @@ -1281,7 +1355,7 @@ public void testFindRelatedEntitiesAllFilters() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( ImmutableList.of(datasetType), - newFilter("urn", datasetOneUrnString), + newFilter("urn", dataset1UrnString), ImmutableList.of(userType), newFilter("urn", userOneUrnString), Collections.singletonList(hasOwner), @@ -1294,7 +1368,7 @@ public void testFindRelatedEntitiesAllFilters() throws Exception { relatedEntities = service.findRelatedEntities( ImmutableList.of(datasetType), - newFilter("urn", datasetOneUrnString), + newFilter("urn", dataset1UrnString), ImmutableList.of(userType), newFilter("urn", userTwoUrnString), Collections.singletonList(hasOwner), @@ -1312,7 +1386,7 @@ public void testFindRelatedEntitiesMultipleEntityTypes() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( ImmutableList.of(datasetType, userType), - newFilter("urn", datasetOneUrnString), + newFilter("urn", dataset1UrnString), ImmutableList.of(datasetType, userType), newFilter("urn", userOneUrnString), Collections.singletonList(hasOwner), @@ -1325,7 +1399,7 @@ public void testFindRelatedEntitiesMultipleEntityTypes() throws Exception { relatedEntities = service.findRelatedEntities( ImmutableList.of(datasetType, userType), - newFilter("urn", datasetOneUrnString), + newFilter("urn", dataset1UrnString), ImmutableList.of(datasetType, userType), newFilter("urn", userTwoUrnString), Collections.singletonList(hasOwner), @@ -1374,7 +1448,7 @@ public void testFindRelatedEntitiesOffsetAndCount() throws Exception { public Object[][] getRemoveEdgesFromNodeTests() { return new Object[][] { new Object[] { - datasetTwoUrn, + dataset2Urn, Collections.singletonList(downstreamOf), outgoingRelationships, Collections.singletonList(downstreamOfDatasetOneRelatedEntity), @@ -1383,7 +1457,7 @@ public Object[][] getRemoveEdgesFromNodeTests() { Arrays.asList(downstreamOfDatasetThreeRelatedEntity, downstreamOfDatasetFourRelatedEntity) }, new Object[] { - datasetTwoUrn, + dataset2Urn, Collections.singletonList(downstreamOf), incomingRelationships, Collections.singletonList(downstreamOfDatasetOneRelatedEntity), @@ -1392,7 +1466,7 @@ public Object[][] getRemoveEdgesFromNodeTests() { Collections.emptyList(), }, new Object[] { - datasetTwoUrn, + dataset2Urn, Collections.singletonList(downstreamOf), undirectedRelationships, Collections.singletonList(downstreamOfDatasetOneRelatedEntity), @@ -1567,7 +1641,7 @@ public void testRemoveEdgesFromNode( @Test public void testRemoveEdgesFromNodeNoRelationshipTypes() throws Exception { GraphService service = getPopulatedGraphService(); - Urn nodeToRemoveFrom = datasetOneUrn; + Urn nodeToRemoveFrom = dataset1Urn; // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = @@ -1662,7 +1736,7 @@ public void testRemoveEdgesFromUnknownNode() throws Exception { public void testRemoveNode() throws Exception { GraphService service = getPopulatedGraphService(); - service.removeNode(datasetTwoUrn); + service.removeNode(dataset2Urn); syncAfterWrite(); // assert the modified graph @@ -1961,8 +2035,19 @@ private void doTestConcurrentOp(Stream operations) throws Exception { }) .collect(Collectors.toList()); try { - executorPool.invokeAll( - callables, getTestConcurrentOpTimeout().toMillis(), TimeUnit.MILLISECONDS); + List> futures = + executorPool.invokeAll( + callables, getTestConcurrentOpTimeout().toMillis(), TimeUnit.MILLISECONDS); + futures.forEach( + future -> { + try { + future.get(); + } catch (InterruptedException | ExecutionException e) { + System.err.println( + System.currentTimeMillis() + + ": unable to complete execution of concurrent operations in time"); + } + }); } catch (InterruptedException e) { System.err.println( System.currentTimeMillis() @@ -1976,9 +2061,13 @@ private void doTestConcurrentOp(Stream operations) throws Exception { assertTrue(throwables.isEmpty()); } - @ParameterizedTest - @ValueSource(booleans = {true, false}) - public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiPathAlgo) + @DataProvider(name = "trueFalse") + public static Object[] trueFalse() { + return new Object[] {true, false}; + } + + @Test(dataProvider = "trueFalse") + public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiPathAlgo) throws Exception { GraphService service = getLineagePopulatedGraphService(attemptMultiPathAlgo); @@ -1988,12 +2077,12 @@ public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiPath (!((service instanceof Neo4jGraphService) || (service instanceof DgraphGraphService))); EntityLineageResult upstreamLineage = - service.getLineage(datasetOneUrn, LineageDirection.UPSTREAM, 0, 1000, 2); + service.getLineage(dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineage.getTotal().intValue(), 0); assertEquals(upstreamLineage.getRelationships().size(), 0); EntityLineageResult downstreamLineage = - service.getLineage(datasetOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 2); + service.getLineage(dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); assertEquals(downstreamLineage.getTotal().intValue(), 5); assertEquals(downstreamLineage.getRelationships().size(), 5); @@ -2002,12 +2091,12 @@ public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiPath .collect(Collectors.toMap(LineageRelationship::getEntity, Function.identity())); Set entities = relationships.keySet().stream().collect(Collectors.toUnmodifiableSet()); assertEquals(entities.size(), 5); - assertTrue(relationships.containsKey(datasetTwoUrn)); + assertTrue(relationships.containsKey(dataset2Urn)); assertEquals(relationships.get(dataJobTwoUrn).getDegree(), 1); - assertTrue(relationships.containsKey(datasetThreeUrn)); - assertEquals(relationships.get(datasetThreeUrn).getDegree(), 2); - assertTrue(relationships.containsKey(datasetFourUrn)); - assertEquals(relationships.get(datasetFourUrn).getDegree(), 2); + assertTrue(relationships.containsKey(dataset3Urn)); + assertEquals(relationships.get(dataset3Urn).getDegree(), 2); + assertTrue(relationships.containsKey(dataset4Urn)); + assertEquals(relationships.get(dataset4Urn).getDegree(), 2); assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getDegree(), 1); // dataJobOne is present both at degree 1 and degree 2 @@ -2018,21 +2107,20 @@ public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiPath assertTrue(relationships.containsKey(dataJobTwoUrn)); assertEquals(relationships.get(dataJobTwoUrn).getDegree(), 1); - upstreamLineage = service.getLineage(datasetThreeUrn, LineageDirection.UPSTREAM, 0, 1000, 2); + upstreamLineage = service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineage.getTotal().intValue(), 3); assertEquals(upstreamLineage.getRelationships().size(), 3); relationships = upstreamLineage.getRelationships().stream() .collect(Collectors.toMap(LineageRelationship::getEntity, Function.identity())); - assertTrue(relationships.containsKey(datasetOneUrn)); - assertEquals(relationships.get(datasetOneUrn).getDegree(), 2); - assertTrue(relationships.containsKey(datasetTwoUrn)); - assertEquals(relationships.get(datasetTwoUrn).getDegree(), 1); + assertTrue(relationships.containsKey(dataset1Urn)); + assertEquals(relationships.get(dataset1Urn).getDegree(), 2); + assertTrue(relationships.containsKey(dataset2Urn)); + assertEquals(relationships.get(dataset2Urn).getDegree(), 1); assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getDegree(), 1); - downstreamLineage = - service.getLineage(datasetThreeUrn, LineageDirection.DOWNSTREAM, 0, 1000, 2); + downstreamLineage = service.getLineage(dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); assertEquals(downstreamLineage.getTotal().intValue(), 0); assertEquals(downstreamLineage.getRelationships().size(), 0); } @@ -2040,34 +2128,37 @@ public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiPath @Test public void testHighlyConnectedGraphWalk() throws Exception { final GraphService service = getGraphService(); - - int nodes = 25; - List allRelationships = Arrays.asList(downstreamOf, consumes, hasOwner); - List edges = - getFullyConnectedGraph(nodes, allRelationships, Collections.singletonList(datasetType)); + List allRelationships = Collections.singletonList(downstreamOf); + List edges = createHighlyConnectedGraph(); Stream operations = edges.stream().map(edge -> () -> service.addEdge(edge)); doTestConcurrentOp(operations); syncAfterWrite(); - RelatedEntitiesResult relatedEntities = - service.findRelatedEntities( - null, - EMPTY_FILTER, - null, - EMPTY_FILTER, - allRelationships, - outgoingRelationships, - 0, - nodes * 3 * 2); - Set expectedRelatedEntities = edges.stream() .map( edge -> new RelatedEntity(edge.getRelationshipType(), edge.getDestination().toString())) .collect(Collectors.toSet()); + RelatedEntitiesResult relatedEntities = null; + for (int i = 0; i < 3; i++) { + relatedEntities = + service.findRelatedEntities( + null, + EMPTY_FILTER, + null, + EMPTY_FILTER, + allRelationships, + outgoingRelationships, + 0, + 400); + if (!new HashSet<>(relatedEntities.getEntities()).equals(expectedRelatedEntities)) { + // Sleep up to 6 seconds in case Elastic needs to catch up + Thread.sleep(2000); + } + } assertEquals(new HashSet<>(relatedEntities.getEntities()), expectedRelatedEntities); Urn root = UrnUtils.getUrn(relatedEntities.getEntities().get(0).getUrn()); @@ -2087,7 +2178,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { 1000, 100, new LineageFlags().setEntitiesExploredPerHopLimit(5)); - assertEquals(lineageResult.getRelationships().size(), 24); + assertEquals(lineageResult.getRelationships().size(), 19); LineageRelationshipArray relationships = lineageResult.getRelationships(); int maxDegree = relationships.stream() @@ -2112,15 +2203,31 @@ public void testHighlyConnectedGraphWalk() throws Exception { 100, new LineageFlags().setEntitiesExploredPerHopLimit(5)); - assertEquals(lineageResultMulti.getRelationships().size(), 25); + assertEquals(lineageResultMulti.getRelationships().size(), 20); relationships = lineageResultMulti.getRelationships(); maxDegree = relationships.stream() .flatMap(relationship -> relationship.getDegrees().stream()) .reduce(0, Math::max); - assertTrue(maxDegree > 6); + assertTrue(maxDegree > 4); // Reset graph service getGraphService(); } + + protected List createHighlyConnectedGraph() { + List graph = new ArrayList<>(); + for (Urn sourceUrn : datasetUrns) { + for (Urn destUrn : datasetUrns) { + if (sourceUrn.equals(destUrn)) { + continue; + } + Edge edge = + new Edge( + sourceUrn, destUrn, downstreamOf, 0L, userOneUrn, 0L, userOneUrn, null, null, null); + graph.add(edge); + } + } + return graph; + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java index 19ca2e85e8c542..e4cefaa1feaa1a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java @@ -360,7 +360,7 @@ public void testPopulatedGraphService() throws Exception { public void testRemoveNode() throws Exception { GraphService service = getPopulatedGraphService(); - service.removeNode(datasetTwoUrn); + service.removeNode(dataset2Urn); syncAfterWrite(); // assert the modified graph diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphGraphServiceTest.java index 4f8fa54b028ff2..680776a8e777ca 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphGraphServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphGraphServiceTest.java @@ -822,7 +822,7 @@ public void testGetDestinationUrnsFromResponseData() { } @Override - public void testPopulatedGraphServiceGetLineageMultihop(boolean attemptMultiHop) { + public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiHop) { // TODO: Remove this overridden method once the multihop for dGraph is implemented! } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java index a58fafabdac911..22d4ed56b5a93a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java @@ -232,65 +232,65 @@ public void testGetLineage() { List edges = Arrays.asList( // d1 <-Consumes- dj1 -Produces-> d2 <-DownstreamOf- d3 <-DownstreamOf- d5 - new Edge(dataJobOneUrn, datasetOneUrn, consumes, 1L, null, 3L, null, null), - new Edge(dataJobOneUrn, datasetTwoUrn, produces, 5L, null, 7L, null, null), - new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf, 9L, null, null, null, null), - new Edge(datasetFiveUrn, datasetThreeUrn, downstreamOf, 11L, null, null, null, null), + new Edge(dataJobOneUrn, dataset1Urn, consumes, 1L, null, 3L, null, null), + new Edge(dataJobOneUrn, dataset2Urn, produces, 5L, null, 7L, null, null), + new Edge(dataset3Urn, dataset2Urn, downstreamOf, 9L, null, null, null, null), + new Edge(dataset5Urn, dataset3Urn, downstreamOf, 11L, null, null, null, null), // another path between d2 and d5 which is shorter // d1 <-DownstreamOf- d4 <-DownstreamOf- d5 - new Edge(datasetFourUrn, datasetOneUrn, downstreamOf, 13L, null, 13L, null, null), - new Edge(datasetFiveUrn, datasetFourUrn, downstreamOf, 13L, null, 13L, null, null)); + new Edge(dataset4Urn, dataset1Urn, downstreamOf, 13L, null, 13L, null, null), + new Edge(dataset5Urn, dataset4Urn, downstreamOf, 13L, null, 13L, null, null)); edges.forEach(service::addEdge); // simple path finding final var upstreamLineageDataset3Hop3 = - service.getLineage(datasetThreeUrn, LineageDirection.UPSTREAM, 0, 1000, 3); + service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 3); assertEquals(upstreamLineageDataset3Hop3.getTotal().intValue(), 3); assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageDataset3Hop3), Set.of( - new UrnArray(datasetThreeUrn, datasetTwoUrn), - new UrnArray(datasetThreeUrn, datasetTwoUrn, dataJobOneUrn), - new UrnArray(datasetThreeUrn, datasetTwoUrn, dataJobOneUrn, datasetOneUrn))); + new UrnArray(dataset3Urn, dataset2Urn), + new UrnArray(dataset3Urn, dataset2Urn, dataJobOneUrn), + new UrnArray(dataset3Urn, dataset2Urn, dataJobOneUrn, dataset1Urn))); // simple path finding final var upstreamLineageDatasetFiveHop2 = - service.getLineage(datasetFiveUrn, LineageDirection.UPSTREAM, 0, 1000, 2); + service.getLineage(dataset5Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineageDatasetFiveHop2.getTotal().intValue(), 4); assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageDatasetFiveHop2), Set.of( - new UrnArray(datasetFiveUrn, datasetThreeUrn), - new UrnArray(datasetFiveUrn, datasetThreeUrn, datasetTwoUrn), - new UrnArray(datasetFiveUrn, datasetFourUrn), - new UrnArray(datasetFiveUrn, datasetFourUrn, datasetOneUrn))); + new UrnArray(dataset5Urn, dataset3Urn), + new UrnArray(dataset5Urn, dataset3Urn, dataset2Urn), + new UrnArray(dataset5Urn, dataset4Urn), + new UrnArray(dataset5Urn, dataset4Urn, dataset1Urn))); // there are two paths from p5 to p1, one longer and one shorter, and the longer one is // discarded from result final var upstreamLineageDataset5Hop5 = - service.getLineage(datasetFiveUrn, LineageDirection.UPSTREAM, 0, 1000, 5); + service.getLineage(dataset5Urn, LineageDirection.UPSTREAM, 0, 1000, 5); assertEquals(upstreamLineageDataset5Hop5.getTotal().intValue(), 5); assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageDataset5Hop5), Set.of( - new UrnArray(datasetFiveUrn, datasetThreeUrn), - new UrnArray(datasetFiveUrn, datasetThreeUrn, datasetTwoUrn), - new UrnArray(datasetFiveUrn, datasetThreeUrn, datasetTwoUrn, dataJobOneUrn), - new UrnArray(datasetFiveUrn, datasetFourUrn), - new UrnArray(datasetFiveUrn, datasetFourUrn, datasetOneUrn))); + new UrnArray(dataset5Urn, dataset3Urn), + new UrnArray(dataset5Urn, dataset3Urn, dataset2Urn), + new UrnArray(dataset5Urn, dataset3Urn, dataset2Urn, dataJobOneUrn), + new UrnArray(dataset5Urn, dataset4Urn), + new UrnArray(dataset5Urn, dataset4Urn, dataset1Urn))); // downstream lookup final var downstreamLineageDataset1Hop2 = - service.getLineage(datasetOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 2); + service.getLineage(dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); assertEquals(downstreamLineageDataset1Hop2.getTotal().intValue(), 4); assertEquals( getPathUrnArraysFromLineageResult(downstreamLineageDataset1Hop2), Set.of( - new UrnArray(datasetOneUrn, dataJobOneUrn), - new UrnArray(datasetOneUrn, dataJobOneUrn, datasetTwoUrn), - new UrnArray(datasetOneUrn, datasetFourUrn), - new UrnArray(datasetOneUrn, datasetFourUrn, datasetFiveUrn))); + new UrnArray(dataset1Urn, dataJobOneUrn), + new UrnArray(dataset1Urn, dataJobOneUrn, dataset2Urn), + new UrnArray(dataset1Urn, dataset4Urn), + new UrnArray(dataset1Urn, dataset4Urn, dataset5Urn))); } @Test @@ -300,27 +300,27 @@ public void testGetLineageTimeFilterQuery() throws Exception { List edges = Arrays.asList( // d1 <-Consumes- dj1 -Produces-> d2 <-DownstreamOf- d3 <-DownstreamOf- d4 - new Edge(dataJobOneUrn, datasetOneUrn, consumes, 1L, null, 3L, null, null), - new Edge(dataJobOneUrn, datasetTwoUrn, produces, 5L, null, 7L, null, null), - new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf, 9L, null, null, null, null), - new Edge(datasetFourUrn, datasetThreeUrn, downstreamOf, 11L, null, null, null, null)); + new Edge(dataJobOneUrn, dataset1Urn, consumes, 1L, null, 3L, null, null), + new Edge(dataJobOneUrn, dataset2Urn, produces, 5L, null, 7L, null, null), + new Edge(dataset3Urn, dataset2Urn, downstreamOf, 9L, null, null, null, null), + new Edge(dataset4Urn, dataset3Urn, downstreamOf, 11L, null, null, null, null)); edges.forEach(service::addEdge); // no time filtering EntityLineageResult upstreamLineageTwoHops = - service.getLineage(datasetFourUrn, LineageDirection.UPSTREAM, 0, 1000, 2); + service.getLineage(dataset4Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineageTwoHops.getTotal().intValue(), 2); assertEquals(upstreamLineageTwoHops.getRelationships().size(), 2); assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageTwoHops), Set.of( - new UrnArray(datasetFourUrn, datasetThreeUrn), - new UrnArray(datasetFourUrn, datasetThreeUrn, datasetTwoUrn))); + new UrnArray(dataset4Urn, dataset3Urn), + new UrnArray(dataset4Urn, dataset3Urn, dataset2Urn))); // with time filtering EntityLineageResult upstreamLineageTwoHopsWithTimeFilter = service.getLineage( - datasetFourUrn, + dataset4Urn, LineageDirection.UPSTREAM, 0, 1000, @@ -330,12 +330,12 @@ public void testGetLineageTimeFilterQuery() throws Exception { assertEquals(upstreamLineageTwoHopsWithTimeFilter.getRelationships().size(), 1); assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageTwoHopsWithTimeFilter), - Set.of(new UrnArray(datasetFourUrn, datasetThreeUrn))); + Set.of(new UrnArray(dataset4Urn, dataset3Urn))); // with time filtering EntityLineageResult upstreamLineageTimeFilter = service.getLineage( - datasetTwoUrn, + dataset2Urn, LineageDirection.UPSTREAM, 0, 1000, @@ -346,13 +346,13 @@ public void testGetLineageTimeFilterQuery() throws Exception { assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageTimeFilter), Set.of( - new UrnArray(datasetTwoUrn, dataJobOneUrn), - new UrnArray(datasetTwoUrn, dataJobOneUrn, datasetOneUrn))); + new UrnArray(dataset2Urn, dataJobOneUrn), + new UrnArray(dataset2Urn, dataJobOneUrn, dataset1Urn))); // with time filtering EntityLineageResult downstreamLineageTimeFilter = service.getLineage( - datasetOneUrn, + dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, @@ -362,7 +362,7 @@ public void testGetLineageTimeFilterQuery() throws Exception { assertEquals(downstreamLineageTimeFilter.getRelationships().size(), 1); assertEquals( getPathUrnArraysFromLineageResult(downstreamLineageTimeFilter), - Set.of(new UrnArray(datasetOneUrn, dataJobOneUrn))); + Set.of(new UrnArray(dataset1Urn, dataJobOneUrn))); } @Test @@ -372,28 +372,28 @@ public void testGetLineageTimeFilteringSkipsShorterButNonMatchingPaths() { List edges = Arrays.asList( // d1 <-Consumes- dj1 -Produces-> d2 <-DownstreamOf- d3 - new Edge(dataJobOneUrn, datasetOneUrn, consumes, 5L, null, 5L, null, null), - new Edge(dataJobOneUrn, datasetTwoUrn, produces, 7L, null, 7L, null, null), - new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf, 9L, null, null, null, null), + new Edge(dataJobOneUrn, dataset1Urn, consumes, 5L, null, 5L, null, null), + new Edge(dataJobOneUrn, dataset2Urn, produces, 7L, null, 7L, null, null), + new Edge(dataset3Urn, dataset2Urn, downstreamOf, 9L, null, null, null, null), // d1 <-DownstreamOf- d3 (shorter path from d3 to d1, but with very old time) - new Edge(datasetThreeUrn, datasetOneUrn, downstreamOf, 1L, null, 2L, null, null)); + new Edge(dataset3Urn, dataset1Urn, downstreamOf, 1L, null, 2L, null, null)); edges.forEach(service::addEdge); // no time filtering, shorter path from d3 to d1 is returned EntityLineageResult upstreamLineageNoTimeFiltering = - service.getLineage(datasetThreeUrn, LineageDirection.UPSTREAM, 0, 1000, 3); + service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 3); assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageNoTimeFiltering), Set.of( - new UrnArray(datasetThreeUrn, datasetTwoUrn), - new UrnArray(datasetThreeUrn, datasetTwoUrn, dataJobOneUrn), - new UrnArray(datasetThreeUrn, datasetOneUrn))); + new UrnArray(dataset3Urn, dataset2Urn), + new UrnArray(dataset3Urn, dataset2Urn, dataJobOneUrn), + new UrnArray(dataset3Urn, dataset1Urn))); // with time filtering, shorter path from d3 to d1 is excluded so longer path is returned EntityLineageResult upstreamLineageTimeFiltering = service.getLineage( - datasetThreeUrn, + dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, @@ -402,9 +402,9 @@ public void testGetLineageTimeFilteringSkipsShorterButNonMatchingPaths() { assertEquals( getPathUrnArraysFromLineageResult(upstreamLineageTimeFiltering), Set.of( - new UrnArray(datasetThreeUrn, datasetTwoUrn), - new UrnArray(datasetThreeUrn, datasetTwoUrn, dataJobOneUrn), - new UrnArray(datasetThreeUrn, datasetTwoUrn, dataJobOneUrn, datasetOneUrn))); + new UrnArray(dataset3Urn, dataset2Urn), + new UrnArray(dataset3Urn, dataset2Urn, dataJobOneUrn), + new UrnArray(dataset3Urn, dataset2Urn, dataJobOneUrn, dataset1Urn))); } @Override diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java index 8d08c1362a3406..b389f8228a98d6 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java @@ -336,60 +336,39 @@ public void testTimestampLineage() throws Exception { Arrays.asList( // One upstream edge new Edge( - datasetTwoUrn, - datasetOneUrn, - downstreamOf, - initialTime, - null, - initialTime, - null, - null), + dataset2Urn, dataset1Urn, downstreamOf, initialTime, null, initialTime, null, null), // Two downstream new Edge( - datasetThreeUrn, - datasetTwoUrn, - downstreamOf, - initialTime, - null, - initialTime, - null, - null), + dataset3Urn, dataset2Urn, downstreamOf, initialTime, null, initialTime, null, null), new Edge( - datasetFourUrn, - datasetTwoUrn, - downstreamOf, - initialTime, - null, - initialTime, - null, - null), + dataset4Urn, dataset2Urn, downstreamOf, initialTime, null, initialTime, null, null), // One with null values, should always be returned - new Edge(datasetFiveUrn, datasetTwoUrn, downstreamOf, null, null, null, null, null)); + new Edge(dataset5Urn, dataset2Urn, downstreamOf, null, null, null, null, null)); edges.forEach(getGraphService()::addEdge); syncAfterWrite(); // Without timestamps - EntityLineageResult upstreamResult = getUpstreamLineage(datasetTwoUrn, null, null); - EntityLineageResult downstreamResult = getDownstreamLineage(datasetTwoUrn, null, null); + EntityLineageResult upstreamResult = getUpstreamLineage(dataset2Urn, null, null); + EntityLineageResult downstreamResult = getDownstreamLineage(dataset2Urn, null, null); Assert.assertEquals(Integer.valueOf(1), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(3), downstreamResult.getTotal()); // Timestamp before - upstreamResult = getUpstreamLineage(datasetTwoUrn, 0L, initialTime - 10); - downstreamResult = getDownstreamLineage(datasetTwoUrn, 0L, initialTime - 10); + upstreamResult = getUpstreamLineage(dataset2Urn, 0L, initialTime - 10); + downstreamResult = getDownstreamLineage(dataset2Urn, 0L, initialTime - 10); Assert.assertEquals(Integer.valueOf(0), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(1), downstreamResult.getTotal()); // Timestamp after - upstreamResult = getUpstreamLineage(datasetTwoUrn, initialTime + 10, initialTime + 100); - downstreamResult = getDownstreamLineage(datasetTwoUrn, initialTime + 10, initialTime + 100); + upstreamResult = getUpstreamLineage(dataset2Urn, initialTime + 10, initialTime + 100); + downstreamResult = getDownstreamLineage(dataset2Urn, initialTime + 10, initialTime + 100); Assert.assertEquals(Integer.valueOf(0), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(1), downstreamResult.getTotal()); // Timestamp included - upstreamResult = getUpstreamLineage(datasetTwoUrn, initialTime - 10, initialTime + 10); - downstreamResult = getDownstreamLineage(datasetTwoUrn, initialTime - 10, initialTime + 10); + upstreamResult = getUpstreamLineage(dataset2Urn, initialTime - 10, initialTime + 10); + downstreamResult = getDownstreamLineage(dataset2Urn, initialTime - 10, initialTime + 10); Assert.assertEquals(Integer.valueOf(1), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(3), downstreamResult.getTotal()); @@ -398,17 +377,10 @@ public void testTimestampLineage() throws Exception { edges = Arrays.asList( new Edge( - datasetTwoUrn, - datasetOneUrn, - downstreamOf, - initialTime, - null, - updatedTime, - null, - null), + dataset2Urn, dataset1Urn, downstreamOf, initialTime, null, updatedTime, null, null), new Edge( - datasetThreeUrn, - datasetTwoUrn, + dataset3Urn, + dataset2Urn, downstreamOf, initialTime, null, @@ -420,20 +392,20 @@ public void testTimestampLineage() throws Exception { syncAfterWrite(); // Without timestamps - upstreamResult = getUpstreamLineage(datasetTwoUrn, null, null); - downstreamResult = getDownstreamLineage(datasetTwoUrn, null, null); + upstreamResult = getUpstreamLineage(dataset2Urn, null, null); + downstreamResult = getDownstreamLineage(dataset2Urn, null, null); Assert.assertEquals(Integer.valueOf(1), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(3), downstreamResult.getTotal()); // Window includes initial time and updated time - upstreamResult = getUpstreamLineage(datasetTwoUrn, initialTime - 10, updatedTime + 10); - downstreamResult = getDownstreamLineage(datasetTwoUrn, initialTime - 10, updatedTime + 10); + upstreamResult = getUpstreamLineage(dataset2Urn, initialTime - 10, updatedTime + 10); + downstreamResult = getDownstreamLineage(dataset2Urn, initialTime - 10, updatedTime + 10); Assert.assertEquals(Integer.valueOf(1), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(3), downstreamResult.getTotal()); // Window includes updated time but not initial time - upstreamResult = getUpstreamLineage(datasetTwoUrn, initialTime + 10, updatedTime + 10); - downstreamResult = getDownstreamLineage(datasetTwoUrn, initialTime + 10, updatedTime + 10); + upstreamResult = getUpstreamLineage(dataset2Urn, initialTime + 10, updatedTime + 10); + downstreamResult = getDownstreamLineage(dataset2Urn, initialTime + 10, updatedTime + 10); Assert.assertEquals(Integer.valueOf(1), upstreamResult.getTotal()); Assert.assertEquals(Integer.valueOf(2), downstreamResult.getTotal()); } @@ -447,7 +419,16 @@ public void testTimestampLineage() throws Exception { * @return The Upstream lineage for urn from the window from startTime to endTime */ private EntityLineageResult getUpstreamLineage(Urn urn, Long startTime, Long endTime) { - return getLineage(urn, LineageDirection.UPSTREAM, startTime, endTime, null); + return getLineage(urn, LineageDirection.UPSTREAM, startTime, endTime, 0, null); + } + + private EntityLineageResult getUpstreamLineage(Urn urn, Long startTime, Long endTime, int count) { + return getLineage(urn, LineageDirection.UPSTREAM, startTime, endTime, count, null); + } + + private EntityLineageResult getUpstreamLineage( + Urn urn, Long startTime, Long endTime, int count, int exploreLimit) { + return getLineage(urn, LineageDirection.UPSTREAM, startTime, endTime, count, exploreLimit); } /** @@ -459,7 +440,7 @@ private EntityLineageResult getUpstreamLineage(Urn urn, Long startTime, Long end * @return The Downstream lineage for urn from the window from startTime to endTime */ private EntityLineageResult getDownstreamLineage(Urn urn, Long startTime, Long endTime) { - return getLineage(urn, LineageDirection.DOWNSTREAM, startTime, endTime, null); + return getLineage(urn, LineageDirection.DOWNSTREAM, startTime, endTime, 0, null); } /** @@ -476,13 +457,14 @@ private EntityLineageResult getLineage( LineageDirection direction, Long startTime, Long endTime, + int count, @Nullable Integer entitiesExploredPerHopLimit) { return getGraphService() .getLineage( urn, direction, 0, - 0, + count, 3, new LineageFlags() .setStartTimeMillis(startTime, SetMode.REMOVE_IF_NULL) From 3e39129f7b9332c246285b3a5f61531a74d6608b Mon Sep 17 00:00:00 2001 From: Valerii Date: Mon, 1 Apr 2024 21:22:47 +0300 Subject: [PATCH 02/17] fix(ingest/tableau) Fix Tableau lineage ingestion from Clickhouse (#10167) --- .../src/datahub/ingestion/source/tableau_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py index e547934bc4a2d8..881f6c63e094d0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py @@ -573,7 +573,7 @@ def get_fully_qualified_table_name( .replace("`", "") ) - if platform in ("athena", "hive", "mysql"): + if platform in ("athena", "hive", "mysql", "clickhouse"): # it two tier database system (athena, hive, mysql), just take final 2 fully_qualified_table_name = ".".join( fully_qualified_table_name.split(".")[-2:] From 14bbc0b5909f7a205811914bd26226fbeb40e367 Mon Sep 17 00:00:00 2001 From: Christian Groll Date: Mon, 1 Apr 2024 21:57:52 +0200 Subject: [PATCH 03/17] [oracle ingestion]: get database name when using service (#10158) --- .../datahub/ingestion/source/sql/oracle.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index bcf0f26008ae30..0a67d6228e6dbc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -122,6 +122,17 @@ def __init__(self, inspector_instance: Inspector): # tables that we don't want to ingest into the DataHub self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX") + def get_db_name(self) -> str: + try: + # Try to retrieve current DB name by executing query + db_name = self._inspector_instance.bind.execute( + sql.text("select sys_context('USERENV','DB_NAME') from dual") + ).scalar() + return str(db_name) + except sqlalchemy.exc.DatabaseError as e: + logger.error("Error fetching DB name: " + str(e)) + return "" + def get_schema_names(self) -> List[str]: cursor = self._inspector_instance.bind.execute( sql.text("SELECT username FROM dba_users ORDER BY username") @@ -582,6 +593,22 @@ def create(cls, config_dict, ctx): config = OracleConfig.parse_obj(config_dict) return cls(config, ctx) + def get_db_name(self, inspector: Inspector) -> str: + """ + This overwrites the default implementation, which only tries to read + database name from Connection URL, which does not work when using + service instead of database. + In that case, it tries to retrieve the database name by sending a query to the DB. + """ + + # call default implementation first + db_name = super().get_db_name(inspector) + + if db_name == "" and isinstance(inspector, OracleInspectorObjectWrapper): + db_name = inspector.get_db_name() + + return db_name + def get_inspectors(self) -> Iterable[Inspector]: for inspector in super().get_inspectors(): event.listen( From ef637ccb37ad139fc1d28144547419879b491d41 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Mon, 1 Apr 2024 15:01:09 -0500 Subject: [PATCH 04/17] fix(docker): fix versioning for compose file post release (#10176) --- .../docker-compose-without-neo4j.override.yml | 2 +- docker/docker-compose.override.yml | 2 +- docker/mysql/docker-compose.mysql.yml | 2 +- ...ocker-compose-without-neo4j.quickstart.yml | 2 +- .../quickstart/docker-compose.quickstart.yml | 2 +- .../quickstart_version_mapping.yaml | 6 +++--- .../src/datahub/cli/quickstart_versioning.py | 8 ++++---- .../cli/test_quickstart_version_mapping.py | 20 +++++++++---------- 8 files changed, 22 insertions(+), 22 deletions(-) diff --git a/docker/docker-compose-without-neo4j.override.yml b/docker/docker-compose-without-neo4j.override.yml index 5b31a54e6473f7..37ae41e383e7c5 100644 --- a/docker/docker-compose-without-neo4j.override.yml +++ b/docker/docker-compose-without-neo4j.override.yml @@ -45,7 +45,7 @@ services: - DATAHUB_PRECREATE_TOPICS=${DATAHUB_PRECREATE_TOPICS:-false} mysql: hostname: mysql - image: mysql:${DATAHUB_MYSQL_VERSION:-5.7} + image: mysql:${DATAHUB_MYSQL_VERSION:-8.2} command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password ports: - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 diff --git a/docker/docker-compose.override.yml b/docker/docker-compose.override.yml index 1b314a76aa7553..d443a3f4629dfd 100644 --- a/docker/docker-compose.override.yml +++ b/docker/docker-compose.override.yml @@ -30,7 +30,7 @@ services: - DATAHUB_PRECREATE_TOPICS=${DATAHUB_PRECREATE_TOPICS:-false} mysql: hostname: mysql - image: mysql:${DATAHUB_MYSQL_VERSION:-5.7} + image: mysql:${DATAHUB_MYSQL_VERSION:-8.2} command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password ports: - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 diff --git a/docker/mysql/docker-compose.mysql.yml b/docker/mysql/docker-compose.mysql.yml index e60058a6b509b5..d8c7767985000f 100644 --- a/docker/mysql/docker-compose.mysql.yml +++ b/docker/mysql/docker-compose.mysql.yml @@ -4,7 +4,7 @@ version: '3.8' services: mysql: hostname: mysql - image: mysql:${DATAHUB_MYSQL_VERSION:-5.7} + image: mysql:${DATAHUB_MYSQL_VERSION:-8.2} env_file: env/docker.env command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin ports: diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 6f7368a0a08264..176e5539aa491d 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -210,7 +210,7 @@ services: test: mysqladmin ping -h mysql -u $$MYSQL_USER --password=$$MYSQL_PASSWORD timeout: 5s hostname: mysql - image: mysql:${DATAHUB_MYSQL_VERSION:-5.7} + image: mysql:${DATAHUB_MYSQL_VERSION:-8.2} ports: - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 restart: on-failure diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index 586c0cf23f48cb..e39695f52a4372 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -217,7 +217,7 @@ services: test: mysqladmin ping -h mysql -u $$MYSQL_USER --password=$$MYSQL_PASSWORD timeout: 5s hostname: mysql - image: mysql:${DATAHUB_MYSQL_VERSION:-5.7} + image: mysql:${DATAHUB_MYSQL_VERSION:-8.2} ports: - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 restart: on-failure diff --git a/docker/quickstart/quickstart_version_mapping.yaml b/docker/quickstart/quickstart_version_mapping.yaml index b08cfda175aa9f..d9b7b4d661f064 100644 --- a/docker/quickstart/quickstart_version_mapping.yaml +++ b/docker/quickstart/quickstart_version_mapping.yaml @@ -23,7 +23,7 @@ quickstart_version_map: default: composefile_git_ref: master docker_tag: head - mysql_tag: "5.7" + mysql_tag: "8.2" # default: # Use this to pin default to a specific version. # composefile_git_ref: fd1bd51541a132017a648f4a2f037eec8f70ba26 # v0.10.0 + quickstart compose file fixes # docker_tag: v0.10.0 @@ -31,12 +31,12 @@ quickstart_version_map: head: composefile_git_ref: master docker_tag: head - mysql_tag: "5.7" + mysql_tag: "8.2" # v0.13.0 we upgraded MySQL image for EOL v0.13.0: composefile_git_ref: master - docker_tag: head + docker_tag: v0.13.0 mysql_tag: "8.2" # v0.9.6 images contain security vulnerabilities diff --git a/metadata-ingestion/src/datahub/cli/quickstart_versioning.py b/metadata-ingestion/src/datahub/cli/quickstart_versioning.py index 1c3ce93c1f7887..493869ac77bb83 100644 --- a/metadata-ingestion/src/datahub/cli/quickstart_versioning.py +++ b/metadata-ingestion/src/datahub/cli/quickstart_versioning.py @@ -82,7 +82,7 @@ def fetch_quickstart_config(cls) -> "QuickstartVersionMappingConfig": return QuickstartVersionMappingConfig( quickstart_version_map={ "default": QuickstartExecutionPlan( - composefile_git_ref="master", docker_tag="head", mysql_tag="5.7" + composefile_git_ref="master", docker_tag="head", mysql_tag="8.2" ), } ) @@ -94,7 +94,7 @@ def fetch_quickstart_config(cls) -> "QuickstartVersionMappingConfig": try: release = cls._fetch_latest_version() config.quickstart_version_map["stable"] = QuickstartExecutionPlan( - composefile_git_ref=release, docker_tag=release, mysql_tag="5.7" + composefile_git_ref=release, docker_tag=release, mysql_tag="8.2" ) except Exception: click.echo( @@ -116,8 +116,8 @@ def get_quickstart_execution_plan( requested_version = "default" composefile_git_ref = requested_version docker_tag = requested_version - # Default to 5.7 if not specified in version map - mysql_tag = "5.7" + # Default to 8.2 if not specified in version map + mysql_tag = "8.2" result = self.quickstart_version_map.get( requested_version, QuickstartExecutionPlan( diff --git a/metadata-ingestion/tests/unit/cli/test_quickstart_version_mapping.py b/metadata-ingestion/tests/unit/cli/test_quickstart_version_mapping.py index 3b06e48522955d..38f3451a191a43 100644 --- a/metadata-ingestion/tests/unit/cli/test_quickstart_version_mapping.py +++ b/metadata-ingestion/tests/unit/cli/test_quickstart_version_mapping.py @@ -9,7 +9,7 @@ "default": { "composefile_git_ref": "master", "docker_tag": "latest", - "mysql_tag": "5.7", + "mysql_tag": "8.2", }, "v0.9.6": { "composefile_git_ref": "v0.9.6.1", @@ -19,17 +19,17 @@ "v2.0.0": { "composefile_git_ref": "v2.0.1", "docker_tag": "v2.0.0", - "mysql_tag": "5.7", + "mysql_tag": "8.2", }, "v1.0.0": { "composefile_git_ref": "v1.0.0", "docker_tag": "v1.0.0", - "mysql_tag": "5.7", + "mysql_tag": "8.2", }, "stable": { "composefile_git_ref": "v1.0.1", "docker_tag": "latest", - "mysql_tag": "5.7", + "mysql_tag": "8.2", }, }, } @@ -41,7 +41,7 @@ def test_quickstart_version_config(): expected = QuickstartExecutionPlan( docker_tag="v1.0.0", composefile_git_ref="v1.0.0", - mysql_tag="5.7", + mysql_tag="8.2", ) assert execution_plan == expected @@ -51,7 +51,7 @@ def test_quickstart_version_config_default(): expected = QuickstartExecutionPlan( docker_tag="v2.0.0", composefile_git_ref="v2.0.1", - mysql_tag="5.7", + mysql_tag="8.2", ) assert execution_plan == expected @@ -59,7 +59,7 @@ def test_quickstart_version_config_default(): def test_quickstart_version_config_stable(): execution_plan = example_version_mapper.get_quickstart_execution_plan("stable") expected = QuickstartExecutionPlan( - docker_tag="latest", composefile_git_ref="v1.0.1", mysql_tag="5.7" + docker_tag="latest", composefile_git_ref="v1.0.1", mysql_tag="8.2" ) assert execution_plan == expected @@ -68,13 +68,13 @@ def test_quickstart_forced_stable(): example_version_mapper.quickstart_version_map["default"] = QuickstartExecutionPlan( composefile_git_ref="v1.0.1", docker_tag="latest", - mysql_tag="5.7", + mysql_tag="8.2", ) execution_plan = example_version_mapper.get_quickstart_execution_plan(None) expected = QuickstartExecutionPlan( docker_tag="latest", composefile_git_ref="v1.0.1", - mysql_tag="5.7", + mysql_tag="8.2", ) assert execution_plan == expected @@ -92,7 +92,7 @@ def test_quickstart_forced_not_a_version_tag(): expected = QuickstartExecutionPlan( docker_tag="NOT A VERSION", composefile_git_ref="NOT A VERSION", - mysql_tag="5.7", + mysql_tag="8.2", ) assert execution_plan == expected From 9a0a53bbe44d0ca5ea420c1654f87c4f7c636c67 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 1 Apr 2024 15:54:37 -0500 Subject: [PATCH 05/17] fix(restoreIndices): batchSize vs limit (#10178) --- .../upgrade/restoreindices/SendMAEStep.java | 5 +- .../ReindexDataJobViaNodesCLLStep.java | 21 +++--- .../DatahubUpgradeNonBlockingTest.java | 64 +++++++++++++++++++ .../linkedin/metadata/entity/AspectDao.java | 3 +- .../metadata/entity/EntityServiceImpl.java | 48 +++++++------- .../entity/cassandra/CassandraAspectDao.java | 3 +- .../metadata/entity/ebean/EbeanAspectDao.java | 43 ++++++++++--- .../metadata/entity/EntityServiceTest.java | 14 ++-- .../kafka/MceConsumerApplicationTest.java | 3 +- .../elastic/OperationsController.java | 27 +++++--- .../com.linkedin.entity.aspects.restspec.json | 12 ++++ ...nkedin.operations.operations.restspec.json | 12 ++++ .../com.linkedin.entity.aspects.snapshot.json | 12 ++++ ...nkedin.operations.operations.snapshot.json | 12 ++++ .../resources/entity/AspectResource.java | 7 +- .../operations/OperationsResource.java | 10 +-- .../metadata/resources/operations/Utils.java | 24 +++++-- .../metadata/entity/EntityService.java | 3 +- .../restoreindices/RestoreIndicesArgs.java | 50 +++++++++------ 19 files changed, 275 insertions(+), 98 deletions(-) create mode 100644 datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java index aca27892d2e3a1..83bc96ad449d1b 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java @@ -48,7 +48,7 @@ public KafkaJob(UpgradeContext context, RestoreIndicesArgs args) { @Override public RestoreIndicesResult call() { - return _entityService.restoreIndices(args, context.report()::addLine); + return _entityService.streamRestoreIndices(args, context.report()::addLine).findFirst().get(); } } @@ -85,7 +85,10 @@ private List iterateFutures(List entityService; @@ -33,13 +32,17 @@ public Function executable() { return (context) -> { RestoreIndicesArgs args = new RestoreIndicesArgs() - .setAspectName(DATA_JOB_INPUT_OUTPUT_ASPECT_NAME) - .setUrnLike("urn:li:" + DATA_JOB_ENTITY_NAME + ":%") - .setBatchSize(batchSize); - RestoreIndicesResult result = - entityService.restoreIndices(args, x -> context.report().addLine((String) x)); - context.report().addLine("Rows migrated: " + result.rowsMigrated); - context.report().addLine("Rows ignored: " + result.ignored); + .aspectName(DATA_JOB_INPUT_OUTPUT_ASPECT_NAME) + .urnLike("urn:li:" + DATA_JOB_ENTITY_NAME + ":%") + .batchSize(batchSize); + + entityService + .streamRestoreIndices(args, x -> context.report().addLine((String) x)) + .forEach( + result -> { + context.report().addLine("Rows migrated: " + result.rowsMigrated); + context.report().addLine("Rows ignored: " + result.ignored); + }); BootstrapStep.setUpgradeResult(UPGRADE_ID_URN, entityService); context.report().addLine("State updated: " + UPGRADE_ID_URN); diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java new file mode 100644 index 00000000000000..e1257df9ad7484 --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java @@ -0,0 +1,64 @@ +package com.linkedin.datahub.upgrade; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.testng.AssertJUnit.assertNotNull; + +import com.linkedin.datahub.upgrade.impl.DefaultUpgradeManager; +import com.linkedin.datahub.upgrade.system.SystemUpdateNonBlocking; +import com.linkedin.datahub.upgrade.system.vianodes.ReindexDataJobViaNodesCLL; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; +import java.util.List; +import javax.inject.Named; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +@ActiveProfiles("test") +@SpringBootTest( + classes = {UpgradeCliApplication.class, UpgradeCliApplicationTestConfiguration.class}, + properties = { + "BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_ENABLED=true", + "kafka.schemaRegistry.type=INTERNAL", + "DATAHUB_UPGRADE_HISTORY_TOPIC_NAME=test_due_topic", + "METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=test_mcl_versioned_topic" + }, + args = {"-u", "SystemUpdateNonBlocking"}) +public class DatahubUpgradeNonBlockingTest extends AbstractTestNGSpringContextTests { + + @Autowired(required = false) + @Named("systemUpdateNonBlocking") + private SystemUpdateNonBlocking systemUpdateNonBlocking; + + @Autowired + @Test + public void testSystemUpdateNonBlockingInit() { + assertNotNull(systemUpdateNonBlocking); + } + + @Test + public void testReindexDataJobViaNodesCLLPaging() { + EntityService mockService = mock(EntityService.class); + ReindexDataJobViaNodesCLL cllUpgrade = new ReindexDataJobViaNodesCLL(mockService, true, 10); + SystemUpdateNonBlocking upgrade = + new SystemUpdateNonBlocking(List.of(), List.of(cllUpgrade), null); + DefaultUpgradeManager manager = new DefaultUpgradeManager(); + manager.register(upgrade); + manager.execute("SystemUpdateNonBlocking", List.of()); + verify(mockService, times(1)) + .streamRestoreIndices( + eq( + new RestoreIndicesArgs() + .batchSize(10) + .limit(0) + .aspectName("dataJobInputOutput") + .urnLike("urn:li:dataJob:%")), + any()); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java index b031377842176b..e836b69ef43051 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java @@ -5,7 +5,6 @@ import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.utils.metrics.MetricUtils; -import io.ebean.PagedList; import io.ebean.Transaction; import java.sql.Timestamp; import java.util.List; @@ -106,7 +105,7 @@ ListResult listUrns( Integer countAspect(@Nonnull final String aspectName, @Nullable String urnLike); @Nonnull - PagedList getPagedAspects(final RestoreIndicesArgs args); + Stream> streamAspectBatches(final RestoreIndicesArgs args); @Nonnull Stream streamAspects(String entityName, String aspectName); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 7f11170d12e726..754c5f272e2755 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -49,7 +49,6 @@ import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.config.PreProcessHooks; -import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.ebean.batch.DeleteItemImpl; @@ -76,7 +75,6 @@ import com.linkedin.mxe.SystemMetadata; import com.linkedin.r2.RemoteInvocationException; import com.linkedin.util.Pair; -import io.ebean.PagedList; import io.ebean.Transaction; import io.opentelemetry.extension.annotations.WithSpan; import java.net.URISyntaxException; @@ -1177,38 +1175,38 @@ public Integer getCountAspect(@Nonnull String aspectName, @Nullable String urnLi @Nonnull @Override - public RestoreIndicesResult restoreIndices( + public Stream streamRestoreIndices( @Nonnull RestoreIndicesArgs args, @Nonnull Consumer logger) { logger.accept(String.format("Args are %s", args)); logger.accept( String.format( - "Reading rows %s through %s from the aspects table started.", - args.start, args.start + args.batchSize)); - long startTime = System.currentTimeMillis(); - PagedList rows = aspectDao.getPagedAspects(args); - long timeSqlQueryMs = System.currentTimeMillis() - startTime; + "Reading rows %s through %s (0 == infinite) in batches of %s from the aspects table started.", + args.start, args.limit, args.batchSize)); - logger.accept( - String.format( - "Reading rows %s through %s from the aspects table completed.", - args.start, args.start + args.batchSize)); + long startTime = System.currentTimeMillis(); + return aspectDao + .streamAspectBatches(args) + .map( + batchStream -> { + long timeSqlQueryMs = System.currentTimeMillis() - startTime; - List systemAspects = - EntityUtils.toSystemAspectFromEbeanAspects( - rows != null ? rows.getList() : List.of(), this); + List systemAspects = + EntityUtils.toSystemAspectFromEbeanAspects( + batchStream.collect(Collectors.toList()), this); - RestoreIndicesResult result = restoreIndices(systemAspects, logger); + RestoreIndicesResult result = restoreIndices(systemAspects, logger); + result.timeSqlQueryMs = timeSqlQueryMs; - try { - TimeUnit.MILLISECONDS.sleep(args.batchDelayMs); - } catch (InterruptedException e) { - throw new RuntimeException( - "Thread interrupted while sleeping after successful batch migration."); - } - - result.timeSqlQueryMs = timeSqlQueryMs; - return result; + logger.accept("Batch completed."); + try { + TimeUnit.MILLISECONDS.sleep(args.batchDelayMs); + } catch (InterruptedException e) { + throw new RuntimeException( + "Thread interrupted while sleeping after successful batch migration."); + } + return result; + }); } @Nonnull diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java index c1e76e7c678363..71b9b9ad86f726 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java @@ -34,7 +34,6 @@ import com.linkedin.metadata.query.ExtraInfo; import com.linkedin.metadata.query.ExtraInfoArray; import com.linkedin.metadata.query.ListResultMetadata; -import io.ebean.PagedList; import io.ebean.Transaction; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; @@ -492,7 +491,7 @@ public Integer countAspect(@Nonnull String aspectName, @Nullable String urnLike) } @Nonnull - public PagedList getPagedAspects(final RestoreIndicesArgs args) { + public Stream> streamAspectBatches(final RestoreIndicesArgs args) { // Not implemented return null; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java index 23d443c10b71fc..161218b6707dcb 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java @@ -8,6 +8,7 @@ import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; +import com.google.common.collect.Iterators; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.AspectRetriever; @@ -43,10 +44,12 @@ import java.net.URISyntaxException; import java.sql.Timestamp; import java.time.Clock; +import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -58,6 +61,7 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; +import java.util.stream.StreamSupport; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.persistence.PersistenceException; @@ -495,7 +499,7 @@ public Integer countAspect(@Nonnull String aspectName, @Nullable String urnLike) @Nonnull @Override - public PagedList getPagedAspects(final RestoreIndicesArgs args) { + public Stream> streamAspectBatches(final RestoreIndicesArgs args) { ExpressionList exp = _server .find(EbeanAspectV2.class) @@ -511,6 +515,15 @@ public PagedList getPagedAspects(final RestoreIndicesArgs args) { if (args.urnLike != null) { exp = exp.like(EbeanAspectV2.URN_COLUMN, args.urnLike); } + if (args.gePitEpochMs > 0) { + exp = + exp.ge( + EbeanAspectV2.CREATED_ON_COLUMN, + Timestamp.from(Instant.ofEpochMilli(args.gePitEpochMs))) + .le( + EbeanAspectV2.CREATED_ON_COLUMN, + Timestamp.from(Instant.ofEpochMilli(args.lePitEpochMs))); + } int start = args.start; if (args.urnBasedPagination) { @@ -531,13 +544,27 @@ public PagedList getPagedAspects(final RestoreIndicesArgs args) { } } - return exp.orderBy() - .asc(EbeanAspectV2.URN_COLUMN) - .orderBy() - .asc(EbeanAspectV2.ASPECT_COLUMN) - .setFirstRow(start) - .setMaxRows(args.batchSize) - .findPagedList(); + if (args.limit > 0) { + exp = exp.setMaxRows(args.limit); + } + + return partition( + exp.orderBy() + .asc(EbeanAspectV2.URN_COLUMN) + .orderBy() + .asc(EbeanAspectV2.ASPECT_COLUMN) + .setFirstRow(start) + .findStream(), + args.batchSize); + } + + private static Stream> partition(Stream source, int size) { + final Iterator it = source.iterator(); + final Iterator> partIt = + Iterators.transform(Iterators.partition(it, size), List::stream); + final Iterable> iterable = () -> partIt; + + return StreamSupport.stream(iterable.spliterator(), false); } @Override diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index 25f9e4b28a32ab..24707a4a6f32bb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -1578,13 +1578,13 @@ public void testRestoreIndices() throws Exception { clearInvocations(_mockProducer); RestoreIndicesArgs args = new RestoreIndicesArgs(); - args.setAspectName(UPSTREAM_LINEAGE_ASPECT_NAME); - args.setBatchSize(1); - args.setStart(0); - args.setBatchDelayMs(1L); - args.setNumThreads(1); - args.setUrn(urnStr); - _entityServiceImpl.restoreIndices(args, obj -> {}); + args.aspectName(UPSTREAM_LINEAGE_ASPECT_NAME); + args.batchSize(1); + args.start(0); + args.batchDelayMs(1L); + args.numThreads(1); + args.urn(urnStr); + _entityServiceImpl.streamRestoreIndices(args, obj -> {}).collect(Collectors.toList()); ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); diff --git a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java index bce8664689e2c5..84a4f4e839a083 100644 --- a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java +++ b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java @@ -7,6 +7,7 @@ import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; import io.datahubproject.metadata.jobs.common.health.kafka.KafkaHealthIndicator; +import java.util.stream.Stream; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.boot.test.web.client.TestRestTemplate; @@ -30,7 +31,7 @@ public class MceConsumerApplicationTest extends AbstractTestNGSpringContextTests public void testRestliServletConfig() { RestoreIndicesResult mockResult = new RestoreIndicesResult(); mockResult.setRowsMigrated(100); - when(_mockEntityService.restoreIndices(any(), any())).thenReturn(mockResult); + when(_mockEntityService.streamRestoreIndices(any(), any())).thenReturn(Stream.of(mockResult)); String response = this.restTemplate.postForObject( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java index f7c6b4ec071c4e..e371dfaf1d8fa8 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java @@ -250,13 +250,17 @@ public ResponseEntity explainSearchQuery( @Tag(name = "RestoreIndices") @GetMapping(path = "/restoreIndices", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Restore ElasticSearch indices from primary storage based on URNs.") - public ResponseEntity restoreIndices( + public ResponseEntity> restoreIndices( @RequestParam(required = false, name = "aspectName") @Nullable String aspectName, @RequestParam(required = false, name = "urn") @Nullable String urn, @RequestParam(required = false, name = "urnLike") @Nullable String urnLike, - @RequestParam(required = false, name = "batchSize", defaultValue = "100") @Nullable + @RequestParam(required = false, name = "batchSize", defaultValue = "500") @Nullable Integer batchSize, - @RequestParam(required = false, name = "start", defaultValue = "0") @Nullable Integer start) { + @RequestParam(required = false, name = "start", defaultValue = "0") @Nullable Integer start, + @RequestParam(required = false, name = "limit", defaultValue = "0") @Nullable Integer limit, + @RequestParam(required = false, name = "gePitEpochMs", defaultValue = "0") @Nullable + Long gePitEpochMs, + @RequestParam(required = false, name = "lePitEpochMs") @Nullable Long lePitEpochMs) { Authentication authentication = AuthenticationContext.getAuthentication(); if (!AuthUtil.isAPIAuthorized( @@ -266,16 +270,21 @@ public ResponseEntity restoreIndices( RestoreIndicesArgs args = new RestoreIndicesArgs() - .setAspectName(aspectName) - .setUrnLike(urnLike) - .setUrn( + .aspectName(aspectName) + .urnLike(urnLike) + .urn( Optional.ofNullable(urn) .map(urnStr -> UrnUtils.getUrn(urnStr).toString()) .orElse(null)) - .setStart(start) - .setBatchSize(batchSize); + .start(start) + .batchSize(batchSize) + .limit(limit) + .gePitEpochMs(gePitEpochMs) + .lePitEpochMs(lePitEpochMs); - return ResponseEntity.of(Optional.of(entityService.restoreIndices(args, log::info))); + return ResponseEntity.of( + Optional.of( + entityService.streamRestoreIndices(args, log::info).collect(Collectors.toList()))); } @Tag(name = "RestoreIndices") diff --git a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json index 917540aca8728c..32e7a0e58e5355 100644 --- a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json +++ b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json @@ -110,6 +110,18 @@ "name" : "batchSize", "type" : "int", "optional" : true + }, { + "name" : "limit", + "type" : "int", + "optional" : true + }, { + "name" : "gePitEpochMs", + "type" : "long", + "optional" : true + }, { + "name" : "lePitEpochMs", + "type" : "long", + "optional" : true } ], "returns" : "string" } ], diff --git a/metadata-service/restli-api/src/main/idl/com.linkedin.operations.operations.restspec.json b/metadata-service/restli-api/src/main/idl/com.linkedin.operations.operations.restspec.json index 0fb6a18a7974bd..ce5b2b27904ec8 100644 --- a/metadata-service/restli-api/src/main/idl/com.linkedin.operations.operations.restspec.json +++ b/metadata-service/restli-api/src/main/idl/com.linkedin.operations.operations.restspec.json @@ -55,6 +55,18 @@ "name" : "batchSize", "type" : "int", "optional" : true + }, { + "name" : "limit", + "type" : "int", + "optional" : true + }, { + "name" : "gePitEpochMs", + "type" : "long", + "optional" : true + }, { + "name" : "lePitEpochMs", + "type" : "long", + "optional" : true } ], "returns" : "string" }, { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 2a96e9963bf01b..becdcdd0215fde 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -4136,6 +4136,18 @@ "name" : "batchSize", "type" : "int", "optional" : true + }, { + "name" : "limit", + "type" : "int", + "optional" : true + }, { + "name" : "gePitEpochMs", + "type" : "long", + "optional" : true + }, { + "name" : "lePitEpochMs", + "type" : "long", + "optional" : true } ], "returns" : "string" } ], diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index d7199bed56d2ce..0573a342da4205 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -3782,6 +3782,18 @@ "name" : "batchSize", "type" : "int", "optional" : true + }, { + "name" : "limit", + "type" : "int", + "optional" : true + }, { + "name" : "gePitEpochMs", + "type" : "long", + "optional" : true + }, { + "name" : "lePitEpochMs", + "type" : "long", + "optional" : true } ], "returns" : "string" }, { diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index 2f1e27dbe2575b..21d688c7e6e1b6 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -297,7 +297,10 @@ public Task restoreIndices( @ActionParam(PARAM_URN) @Optional @Nullable String urn, @ActionParam(PARAM_URN_LIKE) @Optional @Nullable String urnLike, @ActionParam("start") @Optional @Nullable Integer start, - @ActionParam("batchSize") @Optional @Nullable Integer batchSize) { + @ActionParam("batchSize") @Optional @Nullable Integer batchSize, + @ActionParam("limit") @Optional @Nullable Integer limit, + @ActionParam("gePitEpochMs") @Optional @Nullable Long gePitEpochMs, + @ActionParam("lePitEpochMs") @Optional @Nullable Long lePitEpochMs) { return RestliUtil.toTask( () -> { if (!isAPIAuthorized( @@ -308,7 +311,7 @@ public Task restoreIndices( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to update entities."); } return Utils.restoreIndices( - aspectName, urn, urnLike, start, batchSize, _authorizer, _entityService); + aspectName, urn, urnLike, start, batchSize, limit, gePitEpochMs, lePitEpochMs, _authorizer, _entityService); }, MetricRegistry.name(this.getClass(), "restoreIndices")); } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java index 13d88f30dd032a..8b87923a6d4236 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java @@ -91,12 +91,12 @@ public Task restoreIndices( @ActionParam(PARAM_URN) @Optional @Nullable String urn, @ActionParam(PARAM_URN_LIKE) @Optional @Nullable String urnLike, @ActionParam("start") @Optional @Nullable Integer start, - @ActionParam("batchSize") @Optional @Nullable Integer batchSize) { + @ActionParam("batchSize") @Optional @Nullable Integer batchSize, + @ActionParam("limit") @Optional @Nullable Integer limit, + @ActionParam("gePitEpochMs") @Optional @Nullable Long gePitEpochMs, + @ActionParam("lePitEpochMs") @Optional @Nullable Long lePitEpochMs) { return RestliUtil.toTask( - () -> { - return Utils.restoreIndices( - aspectName, urn, urnLike, start, batchSize, _authorizer, _entityService); - }, + () -> Utils.restoreIndices(aspectName, urn, urnLike, start, batchSize, limit, gePitEpochMs, lePitEpochMs, _authorizer, _entityService), MetricRegistry.name(this.getClass(), "restoreIndices")); } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java index 5f999482cd859e..d4f04bf62fbd81 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java @@ -7,14 +7,15 @@ import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.metadata.authorization.Disjunctive; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; +import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; import com.linkedin.restli.common.HttpStatus; import com.linkedin.restli.server.RestLiServiceException; import java.util.HashMap; import java.util.Map; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @@ -31,6 +32,9 @@ public static String restoreIndices( @Nullable String urnLike, @Nullable Integer start, @Nullable Integer batchSize, + @Nullable Integer limit, + @Nullable Long gePitEpochMs, + @Nullable Long lePitEpochMs, @Nonnull Authorizer authorizer, @Nonnull EntityService entityService) { @@ -49,14 +53,20 @@ public static String restoreIndices( } RestoreIndicesArgs args = new RestoreIndicesArgs() - .setAspectName(aspectName) - .setUrnLike(urnLike) - .setUrn(urn) - .setStart(start) - .setBatchSize(batchSize); + .aspectName(aspectName) + .urnLike(urnLike) + .urn(urn) + .start(start) + .batchSize(batchSize) + .limit(limit) + .gePitEpochMs(gePitEpochMs) + .lePitEpochMs(lePitEpochMs); Map result = new HashMap<>(); result.put("args", args); - result.put("result", entityService.restoreIndices(args, log::info)); + result.put("result", entityService + .streamRestoreIndices(args, log::info) + .map(RestoreIndicesResult::toString) + .collect(Collectors.joining("\n"))); return result.toString(); } } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index 9c44aefbed19db..33dffb4ed975cb 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -31,6 +31,7 @@ import java.util.Set; import java.util.concurrent.Future; import java.util.function.Consumer; +import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -244,7 +245,7 @@ String batchApplyRetention( Integer getCountAspect(@Nonnull String aspectName, @Nullable String urnLike); // TODO: Extract this to a different service, doesn't need to be here - RestoreIndicesResult restoreIndices( + Stream streamRestoreIndices( @Nonnull RestoreIndicesArgs args, @Nonnull Consumer logger); // Restore indices from list using key lookups (no scans) diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java index e50b44b7f0eca3..b4da40871cdd48 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java @@ -1,13 +1,24 @@ package com.linkedin.metadata.entity.restoreindices; +import java.time.Instant; import lombok.Data; +import lombok.experimental.Accessors; @Data +@Accessors(fluent = true) public class RestoreIndicesArgs implements Cloneable { + public static final int DEFAULT_BATCH_SIZE = 500; + public static final int DEFAULT_NUM_THREADS = 1; + public static final int DEFAULT_BATCH_DELAY_MS = 1; + public static final long DEFAULT_GE_PIT_EPOCH_MS = 0; + public int start = 0; - public int batchSize = 10; - public int numThreads = 1; - public long batchDelayMs = 1; + public int batchSize = DEFAULT_BATCH_SIZE; + public int limit = 0; + public int numThreads = DEFAULT_NUM_THREADS; + public long batchDelayMs = DEFAULT_BATCH_DELAY_MS; + public long gePitEpochMs = DEFAULT_GE_PIT_EPOCH_MS; + public long lePitEpochMs; public String aspectName; public String urn; public String urnLike; @@ -26,37 +37,38 @@ public RestoreIndicesArgs clone() { } } - public RestoreIndicesArgs setAspectName(String aspectName) { - this.aspectName = aspectName; + public RestoreIndicesArgs start(Integer start) { + this.start = start != null ? start : 0; return this; } - public RestoreIndicesArgs setUrnLike(String urnLike) { - this.urnLike = urnLike; + public RestoreIndicesArgs batchSize(Integer batchSize) { + this.batchSize = batchSize != null ? batchSize : DEFAULT_BATCH_SIZE; return this; } - public RestoreIndicesArgs setUrn(String urn) { - this.urn = urn; + public RestoreIndicesArgs limit(Integer limit) { + this.limit = limit != null ? limit : 0; return this; } - public RestoreIndicesArgs setStart(Integer start) { - if (start != null) { - this.start = start; - } + public RestoreIndicesArgs numThreads(Integer numThreads) { + this.numThreads = numThreads != null ? numThreads : DEFAULT_NUM_THREADS; return this; } - public RestoreIndicesArgs setBatchSize(Integer batchSize) { - if (batchSize != null) { - this.batchSize = batchSize; - } + public RestoreIndicesArgs batchDelayMs(Long batchDelayMs) { + this.batchDelayMs = batchDelayMs != null ? batchDelayMs : DEFAULT_BATCH_DELAY_MS; + return this; + } + + public RestoreIndicesArgs gePitEpochMs(Long gePitEpochMs) { + this.gePitEpochMs = gePitEpochMs != null ? gePitEpochMs : DEFAULT_GE_PIT_EPOCH_MS; return this; } - public RestoreIndicesArgs setUrnBasedPagination(Boolean urnBasedPagination) { - this.urnBasedPagination = urnBasedPagination; + public RestoreIndicesArgs lePitEpochMs(Long lePitEpochMs) { + this.lePitEpochMs = lePitEpochMs != null ? lePitEpochMs : Instant.now().toEpochMilli(); return this; } } From bf52807e12f16facff9f88372f1fc598e87675d2 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 2 Apr 2024 07:28:06 -0700 Subject: [PATCH 06/17] feat(ui): show classification in test connection (#10156) --- .../app/ingest/source/builder/RecipeForm/TestConnection/types.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/TestConnection/types.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/TestConnection/types.ts index 3395f0c67d8c8a..4f401e34d1a39c 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/TestConnection/types.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/TestConnection/types.ts @@ -12,6 +12,7 @@ export enum SourceCapability { TAGS = 'Extract Tags', SCHEMA_METADATA = 'Schema Metadata', CONTAINERS = 'Asset Containers', + CLASSIFICATION = 'Classification', } export interface ConnectionCapability { From db33c8646a74b271de3a5e4eef32ce1541251538 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 2 Apr 2024 07:28:43 -0700 Subject: [PATCH 07/17] fix(ingest): add classification dep for dynamodb (#10162) --- metadata-ingestion/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 7d6ba719eb353d..33325b26d4e158 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -314,7 +314,7 @@ "dbt": {"requests"} | sqlglot_lib | aws_common, "dbt-cloud": {"requests"} | sqlglot_lib, "druid": sql_common | {"pydruid>=0.6.2"}, - "dynamodb": aws_common, + "dynamodb": aws_common | classification_lib, # Starting with 7.14.0 python client is checking if it is connected to elasticsearch client. If its not it throws # UnsupportedProductError # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/release-notes.html#rn-7-14-0 From c9b9afc5307e1ae602675d67b37823a931a137f6 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 2 Apr 2024 07:29:27 -0700 Subject: [PATCH 08/17] feat(ingest/dbt): enable model performance and compiled code by default (#10164) --- docs/how/updating-datahub.md | 3 ++- .../src/datahub/ingestion/source/dbt/dbt_common.py | 8 ++------ .../src/datahub/ingestion/source/dbt/dbt_core.py | 6 +++--- metadata-ingestion/tests/integration/dbt/test_dbt.py | 9 +-------- metadata-ingestion/tests/unit/test_dbt_source.py | 2 +- 5 files changed, 9 insertions(+), 19 deletions(-) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 60504aaa7b80a2..8051777a5ee070 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -26,7 +26,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe - #10055 - Assertion entities generated by dbt are now associated with the dbt dataset entity, and not the entity in the data warehouse. - #10090 - For Redshift ingestion, `use_lineage_v2` is now enabled by default. - #10147 - For looker ingestion, the browse paths for looker Dashboard, Chart, View, Explore have been updated to align with Looker UI. This does not affect URNs or lineage but primarily affects (improves) browsing experience. -- +- #10164 - For dbt ingestion, `entities_enabled.model_performance` and `include_compiled_code` are now both enabled by default. Upgrading dbt ingestion will also require upgrading the backend to 0.13.1. + ### Potential Downtime ### Deprecations diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 788a4f0b5d6163..4876e2b6fcff4a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -179,9 +179,7 @@ class DBTEntitiesEnabled(ConfigModel): description="Emit metadata for test results when set to Yes or Only", ) model_performance: EmitDirective = Field( - # TODO: This is currently disabled by default, but will be enabled by default once - # the models have stabilized. - EmitDirective.NO, + EmitDirective.YES, description="Emit model performance metadata when set to Yes or Only. " "Only supported with dbt core.", ) @@ -349,9 +347,7 @@ class DBTCommonConfig( _remove_use_compiled_code = pydantic_removed_field("use_compiled_code") include_compiled_code: bool = Field( - # TODO: Once the formattedViewLogic field model change is included in a server - # release, probably 0.13.1, we can flip the default to True. - default=False, + default=True, description="When enabled, includes the compiled code in the emitted metadata.", ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py index d04fa59ecbb6fe..c885ee6525b086 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py @@ -53,9 +53,9 @@ class DBTCoreConfig(DBTCommonConfig): run_results_paths: List[str] = Field( default=[], description="Path to output of dbt test run as run_results files in JSON format. " - "If invoking dbt multiple times, you can provide paths to multiple run result files." - "See https://docs.getdbt.com/reference/artifacts/run-results-json. " - "If not specified, test execution results will not be populated in DataHub.", + "If not specified, test execution results and model performance metadata will not be populated in DataHub." + "If invoking dbt multiple times, you can provide paths to multiple run result files. " + "See https://docs.getdbt.com/reference/artifacts/run-results-json.", ) # Because we now also collect model performance metadata, the "test_results" field was renamed to "run_results". diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index 953ff24f7ba577..5f7d65f5b23773 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -26,8 +26,6 @@ _default_dbt_source_args = { # Needed to avoid needing to access datahub server. "write_semantics": "OVERRIDE", - # Needed until this is made the default. - "include_compiled_code": True, } @@ -216,12 +214,7 @@ def set_paths( manifest_file="sample_dbt_manifest_2.json", sources_file="sample_dbt_sources_2.json", run_results_files=["sample_dbt_run_results_2.json"], - source_config_modifiers={ - "entities_enabled": { - # TODO: Remove this once it becomes the default. - "model_performance": "YES", - }, - }, + source_config_modifiers={}, ), ], ids=lambda dbt_test_config: dbt_test_config.run_id, diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 91a4e568d8200b..b0db18594f76d6 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -293,7 +293,7 @@ def test_dbt_entity_emission_configuration_helpers(): assert config.entities_enabled.can_emit_node_type("source") assert config.entities_enabled.can_emit_node_type("test") assert config.entities_enabled.can_emit_test_results - assert not config.entities_enabled.can_emit_model_performance + assert config.entities_enabled.can_emit_model_performance assert not config.entities_enabled.is_only_test_results() config_dict = { From 77c4629ccf18a65df3bbe4bf7352e7ae1b8e8b55 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 2 Apr 2024 09:36:44 -0500 Subject: [PATCH 09/17] refactor(docker): move to acryldata repo for all images (#9459) --- .../actions/docker-custom-build-and-push/action.yml | 2 +- .github/workflows/docker-unified.yml | 12 ++++++------ README.md | 2 +- build.gradle | 2 +- docker/README.md | 12 ++++++------ docker/docker-compose-with-cassandra.yml | 6 +++--- docker/docker-compose-without-neo4j.yml | 8 ++++---- docker/docker-compose.consumers-without-neo4j.yml | 4 ++-- docker/docker-compose.consumers.dev.yml | 4 ++-- docker/docker-compose.consumers.yml | 4 ++-- docker/docker-compose.dev.yml | 6 +++--- docker/docker-compose.yml | 8 ++++---- docker/ingestion/docker-compose.yml | 2 +- docker/profiles/docker-compose.frontend.yml | 4 ++-- docker/profiles/docker-compose.gms.yml | 12 ++++++------ docker/profiles/docker-compose.prerequisites.yml | 10 +++++----- docker/quickstart/docker-compose-m1.quickstart.yml | 8 ++++---- .../docker-compose-without-neo4j-m1.quickstart.yml | 8 ++++---- .../docker-compose-without-neo4j.quickstart.yml | 8 ++++---- ...er-compose.consumers-without-neo4j.quickstart.yml | 4 ++-- .../docker-compose.consumers.quickstart.yml | 4 ++-- docker/quickstart/docker-compose.quickstart.yml | 8 ++++---- docs/authentication/guides/add-users.md | 2 +- docs/authentication/guides/jaas.md | 4 ++-- .../guides/sso/configure-oidc-behind-proxy.md | 2 +- docs/deploy/aws.md | 4 ++-- docs/deploy/azure.md | 2 +- docs/docker/development.md | 8 ++++---- docs/how/extract-container-logs.md | 4 ++-- docs/troubleshooting/quickstart.md | 8 ++++---- .../src/datahub/ingestion/sink/datahub_rest.py | 2 +- .../src/datahub/telemetry/telemetry.py | 2 +- metadata-ingestion/src/datahub/upgrade/upgrade.py | 6 ++++-- .../linkedin/metadata/kafka/MclConsumerConfig.java | 2 +- .../linkedin/metadata/kafka/McpConsumerConfig.java | 2 +- .../main/java/com/datahub/gms/servlet/Config.java | 2 +- smoke-test/tests/read_only/test_services_up.py | 2 +- 37 files changed, 96 insertions(+), 94 deletions(-) diff --git a/.github/actions/docker-custom-build-and-push/action.yml b/.github/actions/docker-custom-build-and-push/action.yml index ca0796180cd573..3f8ea7a4c88ebd 100644 --- a/.github/actions/docker-custom-build-and-push/action.yml +++ b/.github/actions/docker-custom-build-and-push/action.yml @@ -20,7 +20,7 @@ inputs: required: false images: - # e.g. linkedin/datahub-gms + # e.g. acryldata/datahub-gms description: "List of Docker images to use as base name for tags" required: true build-args: diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 38ffa3484c0bff..5396e6f17cb974 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -17,12 +17,12 @@ concurrency: cancel-in-progress: true env: - DATAHUB_GMS_IMAGE: "linkedin/datahub-gms" - DATAHUB_FRONTEND_IMAGE: "linkedin/datahub-frontend-react" - DATAHUB_MAE_CONSUMER_IMAGE: "linkedin/datahub-mae-consumer" - DATAHUB_MCE_CONSUMER_IMAGE: "linkedin/datahub-mce-consumer" - DATAHUB_KAFKA_SETUP_IMAGE: "linkedin/datahub-kafka-setup" - DATAHUB_ELASTIC_SETUP_IMAGE: "linkedin/datahub-elasticsearch-setup" + DATAHUB_GMS_IMAGE: "acryldata/datahub-gms" + DATAHUB_FRONTEND_IMAGE: "acryldata/datahub-frontend-react" + DATAHUB_MAE_CONSUMER_IMAGE: "acryldata/datahub-mae-consumer" + DATAHUB_MCE_CONSUMER_IMAGE: "acryldata/datahub-mce-consumer" + DATAHUB_KAFKA_SETUP_IMAGE: "acryldata/datahub-kafka-setup" + DATAHUB_ELASTIC_SETUP_IMAGE: "acryldata/datahub-elasticsearch-setup" DATAHUB_MYSQL_SETUP_IMAGE: "acryldata/datahub-mysql-setup" DATAHUB_UPGRADE_IMAGE: "acryldata/datahub-upgrade" DATAHUB_INGESTION_BASE_IMAGE: "acryldata/datahub-ingestion-base" diff --git a/README.md b/README.md index 6b8fa520e432ef..dddb32da73f237 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ HOSTED_DOCS_ONLY--> [![Version](https://img.shields.io/github/v/release/datahub-project/datahub?include_prereleases)](https://github.com/datahub-project/datahub/releases/latest) [![PyPI version](https://badge.fury.io/py/acryl-datahub.svg)](https://badge.fury.io/py/acryl-datahub) [![build & test](https://github.com/datahub-project/datahub/workflows/build%20&%20test/badge.svg?branch=master&event=push)](https://github.com/datahub-project/datahub/actions?query=workflow%3A%22build+%26+test%22+branch%3Amaster+event%3Apush) -[![Docker Pulls](https://img.shields.io/docker/pulls/linkedin/datahub-gms.svg)](https://hub.docker.com/r/linkedin/datahub-gms) +[![Docker Pulls](https://img.shields.io/docker/pulls/acryldata/datahub-gms.svg)](https://hub.docker.com/r/acryldata/datahub-gms) [![Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://slack.datahubproject.io) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/datahub-project/datahub/blob/master/docs/CONTRIBUTING.md) [![GitHub commit activity](https://img.shields.io/github/commit-activity/m/datahub-project/datahub)](https://github.com/datahub-project/datahub/pulls?q=is%3Apr) diff --git a/build.gradle b/build.gradle index 0d9c0f5dc18b08..5cf43755fceffe 100644 --- a/build.gradle +++ b/build.gradle @@ -55,7 +55,7 @@ buildscript { ext.openLineageVersion = '1.5.0' ext.logbackClassicJava8 = '1.2.12' - ext.docker_registry = 'linkedin' + ext.docker_registry = 'acryldata' apply from: './repositories.gradle' buildscript.repositories.addAll(project.repositories) diff --git a/docker/README.md b/docker/README.md index 3510649707c65d..ad847dc70cf3c0 100644 --- a/docker/README.md +++ b/docker/README.md @@ -26,13 +26,13 @@ DataHub Docker Images: Do not use `latest` or `debug` tags for any of the image as those are not supported and present only due to legacy reasons. Please use `head` or tags specific for versions like `v0.8.40`. For production we recommend using version specific tags not `head`. * [acryldata/datahub-ingestion](https://hub.docker.com/r/acryldata/datahub-ingestion/) -* [linkedin/datahub-gms](https://hub.docker.com/repository/docker/linkedin/datahub-gms/) -* [linkedin/datahub-frontend-react](https://hub.docker.com/repository/docker/linkedin/datahub-frontend-react/) -* [linkedin/datahub-mae-consumer](https://hub.docker.com/repository/docker/linkedin/datahub-mae-consumer/) -* [linkedin/datahub-mce-consumer](https://hub.docker.com/repository/docker/linkedin/datahub-mce-consumer/) +* [acryldata/datahub-gms](https://hub.docker.com/repository/docker/acryldata/datahub-gms/) +* [acryldata/datahub-frontend-react](https://hub.docker.com/repository/docker/acryldata/datahub-frontend-react/) +* [acryldata/datahub-mae-consumer](https://hub.docker.com/repository/docker/acryldata/datahub-mae-consumer/) +* [acryldata/datahub-mce-consumer](https://hub.docker.com/repository/docker/acryldata/datahub-mce-consumer/) * [acryldata/datahub-upgrade](https://hub.docker.com/r/acryldata/datahub-upgrade/) -* [linkedin/datahub-kafka-setup](https://hub.docker.com/r/acryldata/datahub-kafka-setup/) -* [linkedin/datahub-elasticsearch-setup](https://hub.docker.com/r/linkedin/datahub-elasticsearch-setup/) +* [acryldata/datahub-kafka-setup](https://hub.docker.com/r/acryldata/datahub-kafka-setup/) +* [acryldata/datahub-elasticsearch-setup](https://hub.docker.com/r/acryldata/datahub-elasticsearch-setup/) * [acryldata/datahub-mysql-setup](https://hub.docker.com/r/acryldata/datahub-mysql-setup/) * [acryldata/datahub-postgres-setup](https://hub.docker.com/r/acryldata/datahub-postgres-setup/) * [acryldata/datahub-actions](https://hub.docker.com/r/acryldata/datahub-actions). Do not use `acryldata/acryl-datahub-actions` as that is deprecated and no longer used. diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml index c99b6e97b4d808..d722b07b9a7af4 100644 --- a/docker/docker-compose-with-cassandra.yml +++ b/docker/docker-compose-with-cassandra.yml @@ -8,7 +8,7 @@ version: '3.9' services: datahub-frontend-react: hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - 9002:9002 build: @@ -32,7 +32,7 @@ services: condition: service_healthy datahub-gms: hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - 8080:8080 build: @@ -85,7 +85,7 @@ services: # This "container" is a workaround to pre-create search indices elasticsearch-setup: hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} build: context: ../ dockerfile: docker/elasticsearch-setup/Dockerfile diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index 0ea61e4be7281a..eae36fb849fd5c 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -8,7 +8,7 @@ version: '3.9' services: datahub-frontend-react: hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 build: @@ -33,7 +33,7 @@ services: condition: service_healthy datahub-gms: hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 build: @@ -73,7 +73,7 @@ services: # This "container" is a workaround to pre-create search indices elasticsearch-setup: hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} build: context: ../ dockerfile: docker/elasticsearch-setup/Dockerfile @@ -88,7 +88,7 @@ services: datahub_setup_job: true kafka-setup: hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} build: dockerfile: ./docker/kafka-setup/Dockerfile context: ../ diff --git a/docker/docker-compose.consumers-without-neo4j.yml b/docker/docker-compose.consumers-without-neo4j.yml index b1c492c4c7df94..f1aa6b30cede09 100644 --- a/docker/docker-compose.consumers-without-neo4j.yml +++ b/docker/docker-compose.consumers-without-neo4j.yml @@ -7,7 +7,7 @@ services: - MCE_CONSUMER_ENABLED=false datahub-mae-consumer: hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-linkedin/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: - 9091:9091 build: @@ -19,7 +19,7 @@ services: - KAFKA_CONSUMER_HEALTH_CHECK_ENABLED=${KAFKA_CONSUMER_HEALTH_CHECK_ENABLED:-true} datahub-mce-consumer: hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-linkedin/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-acryldata/datahub-mce-consumer}:${DATAHUB_VERSION:-head} ports: - 9090:9090 build: diff --git a/docker/docker-compose.consumers.dev.yml b/docker/docker-compose.consumers.dev.yml index 5c272a9cf9b8ae..00f7b52df151f3 100644 --- a/docker/docker-compose.consumers.dev.yml +++ b/docker/docker-compose.consumers.dev.yml @@ -1,7 +1,7 @@ version: '3.9' services: datahub-mae-consumer: - image: linkedin/datahub-mae-consumer:debug + image: acryldata/datahub-mae-consumer:debug build: context: ../ dockerfile: docker/datahub-mae-consumer/Dockerfile @@ -13,7 +13,7 @@ services: - ../metadata-jobs/mae-consumer-job/build/libs/:/datahub/datahub-mae-consumer/bin/ - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-mae-consumer/scripts/prometheus-config.yaml datahub-mce-consumer: - image: linkedin/datahub-mce-consumer:debug + image: acryldata/datahub-mce-consumer:debug build: context: ../ dockerfile: docker/datahub-mce-consumer/Dockerfile diff --git a/docker/docker-compose.consumers.yml b/docker/docker-compose.consumers.yml index 977e29b9a4abca..74b9adaeb99485 100644 --- a/docker/docker-compose.consumers.yml +++ b/docker/docker-compose.consumers.yml @@ -7,7 +7,7 @@ services: - MCE_CONSUMER_ENABLED=false datahub-mae-consumer: hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-linkedin/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: - 9091:9091 build: @@ -22,7 +22,7 @@ services: condition: service_healthy datahub-mce-consumer: hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-linkedin/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-acryldata/datahub-mce-consumer}:${DATAHUB_VERSION:-head} ports: - 9090:9090 build: diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index c7a3c5098d940d..b6ac43a9eda434 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -11,7 +11,7 @@ version: '3.9' services: datahub-frontend-react: - image: linkedin/datahub-frontend-react:head + image: acryldata/datahub-frontend-react:head ports: - ${DATAHUB_MAPPED_FRONTEND_DEBUG_PORT:-5002}:5002 - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 @@ -27,7 +27,7 @@ services: - ../datahub-frontend/build/stage/main:/datahub-frontend - ./monitoring/client-prometheus-config.yaml:/datahub-frontend/client-prometheus-config.yaml datahub-gms: - image: linkedin/datahub-gms:debug + image: acryldata/datahub-gms:debug ports: - ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001 - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 @@ -76,7 +76,7 @@ services: - ${HOME}/.datahub/plugins:/etc/datahub/plugins # Pre-creates the search indices using local mapping/settings.json elasticsearch-setup: - image: linkedin/datahub-elasticsearch-setup:head + image: acryldata/datahub-elasticsearch-setup:head build: context: elasticsearch-setup dockerfile: Dockerfile diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 79cd72a487a370..96f37496859a46 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -8,7 +8,7 @@ version: '3.9' services: datahub-frontend-react: hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 build: @@ -32,7 +32,7 @@ services: condition: service_healthy datahub-gms: hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} environment: - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true} ports: @@ -75,7 +75,7 @@ services: # This "container" is a workaround to pre-create search indices elasticsearch-setup: hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} build: context: ../ dockerfile: docker/elasticsearch-setup/Dockerfile @@ -93,7 +93,7 @@ services: # explicitly wait for this container kafka-setup: hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} build: dockerfile: ./docker/kafka-setup/Dockerfile context: ../ diff --git a/docker/ingestion/docker-compose.yml b/docker/ingestion/docker-compose.yml index 2ba6872c0313a8..06d4e47aa4a404 100644 --- a/docker/ingestion/docker-compose.yml +++ b/docker/ingestion/docker-compose.yml @@ -5,7 +5,7 @@ services: build: context: ../../ dockerfile: docker/datahub-ingestion/Dockerfile - image: linkedin/datahub-ingestion:${DATAHUB_VERSION:-head} + image: acryldata/datahub-ingestion:${DATAHUB_VERSION:-head} hostname: ingestion command: "ingest -c /sample_recipe.yml" volumes: diff --git a/docker/profiles/docker-compose.frontend.yml b/docker/profiles/docker-compose.frontend.yml index 4b2e7417fa61c1..345493ba516508 100644 --- a/docker/profiles/docker-compose.frontend.yml +++ b/docker/profiles/docker-compose.frontend.yml @@ -1,7 +1,7 @@ x-datahub-frontend-service: &datahub-frontend-service hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 env_file: @@ -15,7 +15,7 @@ x-datahub-frontend-service: &datahub-frontend-service x-datahub-frontend-service-dev: &datahub-frontend-service-dev <<: *datahub-frontend-service - image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-frontend-react}:debug + image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-frontend-react}:debug ports: - ${DATAHUB_MAPPED_FRONTEND_DEBUG_PORT:-5002}:5002 - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 961bd4464af957..e9baa65290e50f 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -90,7 +90,7 @@ x-datahub-system-update-service-dev: &datahub-system-update-service-dev ################################# x-datahub-gms-service: &datahub-gms-service hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 env_file: @@ -112,7 +112,7 @@ x-datahub-gms-service: &datahub-gms-service x-datahub-gms-service-dev: &datahub-gms-service-dev <<: *datahub-gms-service - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-gms}:debug + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:debug ports: - ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001 - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 @@ -137,7 +137,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev ################################# x-datahub-mae-consumer-service: &datahub-mae-consumer-service hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: - 9091:9091 env_file: @@ -149,7 +149,7 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev <<: *datahub-mae-consumer-service - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-mae-consumer}:debug + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:debug environment: <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env] volumes: @@ -163,7 +163,7 @@ x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev ################################# x-datahub-mce-consumer-service: &datahub-mce-consumer-service hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-head} ports: - 9090:9090 env_file: @@ -175,7 +175,7 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-mce-consumer}:debug + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:debug environment: <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env] volumes: diff --git a/docker/profiles/docker-compose.prerequisites.yml b/docker/profiles/docker-compose.prerequisites.yml index 7b1f6b8c99c0eb..8de220093dda52 100644 --- a/docker/profiles/docker-compose.prerequisites.yml +++ b/docker/profiles/docker-compose.prerequisites.yml @@ -256,7 +256,7 @@ services: kafka-setup: &kafka-setup profiles: *profiles-quickstart hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-kafka-setup}:${DATAHUB_VERSION:-head} env_file: kafka-setup/env/docker.env environment: &kafka-setup-env DATAHUB_PRECREATE_TOPICS: ${DATAHUB_PRECREATE_TOPICS:-false} @@ -273,7 +273,7 @@ services: environment: <<: *kafka-setup-env DATAHUB_PRECREATE_TOPICS: ${DATAHUB_PRECREATE_TOPICS:-true} - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-kafka-setup}:debug + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-kafka-setup}:debug elasticsearch: profiles: *elasticsearch-profiles hostname: search @@ -297,7 +297,7 @@ services: volumes: - esdata:/usr/share/elasticsearch/data elasticsearch-setup-dev: &elasticsearch-setup-dev - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-elasticsearch-setup}:debug + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:debug profiles: *elasticsearch-profiles hostname: elasticsearch-setup env_file: elasticsearch-setup/env/docker.env @@ -335,7 +335,7 @@ services: <<: *elasticsearch-setup-dev profiles: *opensearch-profiles-quickstart hostname: opensearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} environment: <<: *search-datastore-environment USE_AWS_ELASTICSEARCH: ${USE_AWS_ELASTICSEARCH:-true} @@ -348,7 +348,7 @@ services: <<: *opensearch-setup profiles: *opensearch-profiles-dev hostname: opensearch-setup-dev - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-elasticsearch-setup}:debug + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-elasticsearch-setup}:debug environment: <<: *search-datastore-environment USE_AWS_ELASTICSEARCH: ${USE_AWS_ELASTICSEARCH:-true} diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml index 50f0c90c831755..d2ac2f151fcbbd 100644 --- a/docker/quickstart/docker-compose-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -64,7 +64,7 @@ services: - ELASTIC_CLIENT_HOST=elasticsearch - ELASTIC_CLIENT_PORT=9200 hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: @@ -111,7 +111,7 @@ services: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health timeout: 5s hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 volumes: @@ -185,7 +185,7 @@ services: - ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PROTOCOL=http hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true kafka-setup: @@ -200,7 +200,7 @@ services: - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - USE_CONFLUENT_SCHEMA_REGISTRY=TRUE hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true mysql: diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index 9608b4383ab5d2..1ba467d7fb9289 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -64,7 +64,7 @@ services: - ELASTIC_CLIENT_HOST=elasticsearch - ELASTIC_CLIENT_PORT=9200 hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: @@ -106,7 +106,7 @@ services: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health timeout: 5s hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 volumes: @@ -178,7 +178,7 @@ services: - ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PROTOCOL=http hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true kafka-setup: @@ -193,7 +193,7 @@ services: - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - USE_CONFLUENT_SCHEMA_REGISTRY=TRUE hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true mysql: diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 176e5539aa491d..893af253095bf3 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -64,7 +64,7 @@ services: - ELASTIC_CLIENT_HOST=elasticsearch - ELASTIC_CLIENT_PORT=9200 hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: @@ -106,7 +106,7 @@ services: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health timeout: 5s hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 volumes: @@ -178,7 +178,7 @@ services: - ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PROTOCOL=http hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true kafka-setup: @@ -193,7 +193,7 @@ services: - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - USE_CONFLUENT_SCHEMA_REGISTRY=TRUE hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true mysql: diff --git a/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml index f61bb53d72ecc0..a4211acedcf102 100644 --- a/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml @@ -20,7 +20,7 @@ services: - GRAPH_SERVICE_IMPL=elasticsearch - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-linkedin/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: - 9091:9091 datahub-mce-consumer: @@ -52,7 +52,7 @@ services: - PE_CONSUMER_ENABLED=false - UI_INGESTION_ENABLED=false hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-linkedin/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-acryldata/datahub-mce-consumer}:${DATAHUB_VERSION:-head} ports: - 9090:9090 version: '3.9' diff --git a/docker/quickstart/docker-compose.consumers.quickstart.yml b/docker/quickstart/docker-compose.consumers.quickstart.yml index 3ceb5d537ffd82..e7571e4baf8b4e 100644 --- a/docker/quickstart/docker-compose.consumers.quickstart.yml +++ b/docker/quickstart/docker-compose.consumers.quickstart.yml @@ -27,7 +27,7 @@ services: - GRAPH_SERVICE_IMPL=neo4j - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-linkedin/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: - 9091:9091 datahub-mce-consumer: @@ -66,7 +66,7 @@ services: - PE_CONSUMER_ENABLED=false - UI_INGESTION_ENABLED=false hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-linkedin/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-acryldata/datahub-mce-consumer}:${DATAHUB_VERSION:-head} ports: - 9090:9090 version: '3.9' diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index e39695f52a4372..f3490ce502626d 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -64,7 +64,7 @@ services: - ELASTIC_CLIENT_HOST=elasticsearch - ELASTIC_CLIENT_PORT=9200 hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-acryldata/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: @@ -111,7 +111,7 @@ services: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health timeout: 5s hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 volumes: @@ -185,7 +185,7 @@ services: - ELASTICSEARCH_PORT=9200 - ELASTICSEARCH_PROTOCOL=http hostname: elasticsearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-acryldata/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true kafka-setup: @@ -200,7 +200,7 @@ services: - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - USE_CONFLUENT_SCHEMA_REGISTRY=TRUE hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-acryldata/datahub-kafka-setup}:${DATAHUB_VERSION:-head} labels: datahub_setup_job: true mysql: diff --git a/docs/authentication/guides/add-users.md b/docs/authentication/guides/add-users.md index d380cacd6665e4..86dac3ea328e53 100644 --- a/docs/authentication/guides/add-users.md +++ b/docs/authentication/guides/add-users.md @@ -134,7 +134,7 @@ For example, to mount a user.props file that is stored on my local filesystem at build: context: ../ dockerfile: docker/datahub-frontend/Dockerfile - image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} + image: acryldata/datahub-frontend-react:${DATAHUB_VERSION:-head} ..... # The new stuff volumes: diff --git a/docs/authentication/guides/jaas.md b/docs/authentication/guides/jaas.md index 6268d608f49260..42a87a781bd002 100644 --- a/docs/authentication/guides/jaas.md +++ b/docs/authentication/guides/jaas.md @@ -29,7 +29,7 @@ datahub-frontend-react: build: context: ../ dockerfile: docker/datahub-frontend/Dockerfile - image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} + image: acryldata/datahub-frontend-react:${DATAHUB_VERSION:-head} env_file: datahub-frontend/env/docker.env hostname: datahub-frontend-react container_name: datahub-frontend-react @@ -56,7 +56,7 @@ datahub-frontend-react: build: context: ../ dockerfile: docker/datahub-frontend/Dockerfile - image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} + image: acryldata/datahub-frontend-react:${DATAHUB_VERSION:-head} env_file: datahub-frontend/env/docker.env hostname: datahub-frontend-react container_name: datahub-frontend-react diff --git a/docs/authentication/guides/sso/configure-oidc-behind-proxy.md b/docs/authentication/guides/sso/configure-oidc-behind-proxy.md index 684bf768f2bafa..c00f43228ea1e3 100644 --- a/docs/authentication/guides/sso/configure-oidc-behind-proxy.md +++ b/docs/authentication/guides/sso/configure-oidc-behind-proxy.md @@ -34,7 +34,7 @@ To build a custom image for your frontend, with the certificates built-in, you c Example Dockerfile: ```dockerfile -FROM linkedin/datahub-frontend-react: +FROM acryldata/datahub-frontend-react: COPY /truststore-directory /certificates ``` diff --git a/docs/deploy/aws.md b/docs/deploy/aws.md index d060eddd9acc88..355ed414670081 100644 --- a/docs/deploy/aws.md +++ b/docs/deploy/aws.md @@ -137,7 +137,7 @@ file used to deploy datahub). Change datahub-frontend values to the following. datahub-frontend: enabled: true image: - repository: linkedin/datahub-frontend-react + repository: acryldata/datahub-frontend-react tag: "latest" ingress: enabled: true @@ -305,7 +305,7 @@ a different way of creating time based indices. elasticsearchSetupJob: enabled: true image: - repository: linkedin/datahub-elasticsearch-setup + repository: acryldata/datahub-elasticsearch-setup tag: "***" extraEnvs: - name: USE_AWS_ELASTICSEARCH diff --git a/docs/deploy/azure.md b/docs/deploy/azure.md index b940b82827e947..6ddd5fc5ba1d69 100644 --- a/docs/deploy/azure.md +++ b/docs/deploy/azure.md @@ -165,7 +165,7 @@ In order to use the ingress controller to expose frontend pod, we need to update datahub-frontend: enabled: true image: - repository: linkedin/datahub-frontend-react + repository: acryldata/datahub-frontend-react # tag: "v0.10.0 # defaults to .global.datahub.version # Set up ingress to expose react front-end diff --git a/docs/docker/development.md b/docs/docker/development.md index 91a303744a03bd..35c708a4ac4907 100644 --- a/docs/docker/development.md +++ b/docs/docker/development.md @@ -30,12 +30,12 @@ containers with remote debugging ports enabled. Once the `debug` docker images are constructed you'll see images similar to the following: ```shell -linkedin/datahub-frontend-react debug e52fef698025 28 minutes ago 763MB -linkedin/datahub-kafka-setup debug 3375aaa2b12d 55 minutes ago 659MB -linkedin/datahub-gms debug ea2b0a8ea115 56 minutes ago 408MB +acryldata/datahub-frontend-react debug e52fef698025 28 minutes ago 763MB +acryldata/datahub-kafka-setup debug 3375aaa2b12d 55 minutes ago 659MB +acryldata/datahub-gms debug ea2b0a8ea115 56 minutes ago 408MB acryldata/datahub-upgrade debug 322377a7a21d 56 minutes ago 463MB acryldata/datahub-mysql-setup debug 17768edcc3e5 2 hours ago 58.2MB -linkedin/datahub-elasticsearch-setup debug 4d935be7c62c 2 hours ago 26.1MB +acryldata/datahub-elasticsearch-setup debug 4d935be7c62c 2 hours ago 26.1MB ``` At this point it is possible to view the DataHub UI at `http://localhost:9002` as you normally would with quickstart. diff --git a/docs/how/extract-container-logs.md b/docs/how/extract-container-logs.md index b5fbb4c83cc645..d702a0acc91230 100644 --- a/docs/how/extract-container-logs.md +++ b/docs/how/extract-container-logs.md @@ -15,8 +15,8 @@ To do so, you can view all containers that Docker knows about by running the fol ``` johnjoyce@Johns-MBP datahub-fork % docker container ls CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -6c4a280bc457 linkedin/datahub-frontend-react "datahub-frontend/bi…" 5 days ago Up 46 hours (healthy) 0.0.0.0:9002->9002/tcp datahub-frontend-react -122a2488ab63 linkedin/datahub-gms "/bin/sh -c /datahub…" 5 days ago Up 5 days (healthy) 0.0.0.0:8080->8080/tcp datahub-gms +6c4a280bc457 acryldata/datahub-frontend-react "datahub-frontend/bi…" 5 days ago Up 46 hours (healthy) 0.0.0.0:9002->9002/tcp datahub-frontend-react +122a2488ab63 acryldata/datahub-gms "/bin/sh -c /datahub…" 5 days ago Up 5 days (healthy) 0.0.0.0:8080->8080/tcp datahub-gms 7682dcc64afa confluentinc/cp-schema-registry:5.4.0 "/etc/confluent/dock…" 5 days ago Up 5 days 0.0.0.0:8081->8081/tcp schema-registry 3680fcaef3ed confluentinc/cp-kafka:5.4.0 "/etc/confluent/dock…" 5 days ago Up 5 days 0.0.0.0:9092->9092/tcp, 0.0.0.0:29092->29092/tcp broker 9d6730ddd4c4 neo4j:4.0.6 "/sbin/tini -g -- /d…" 5 days ago Up 5 days 0.0.0.0:7474->7474/tcp, 7473/tcp, 0.0.0.0:7687->7687/tcp neo4j diff --git a/docs/troubleshooting/quickstart.md b/docs/troubleshooting/quickstart.md index 0392ffc426a6cb..0dfe11179083c2 100644 --- a/docs/troubleshooting/quickstart.md +++ b/docs/troubleshooting/quickstart.md @@ -88,10 +88,10 @@ You can list all Docker containers in your local by running `docker container ls ``` CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -979830a342ce linkedin/datahub-mce-consumer:latest "bash -c 'while ping…" 10 hours ago Up 10 hours datahub-mce-consumer -3abfc72e205d linkedin/datahub-frontend-react:latest "datahub-frontend…" 10 hours ago Up 10 hours 0.0.0.0:9002->9002/tcp datahub-frontend -50b2308a8efd linkedin/datahub-mae-consumer:latest "bash -c 'while ping…" 10 hours ago Up 10 hours datahub-mae-consumer -4d6b03d77113 linkedin/datahub-gms:latest "bash -c 'dockerize …" 10 hours ago Up 10 hours 0.0.0.0:8080->8080/tcp datahub-gms +979830a342ce acryldata/datahub-mce-consumer:latest "bash -c 'while ping…" 10 hours ago Up 10 hours datahub-mce-consumer +3abfc72e205d acryldata/datahub-frontend-react:latest "datahub-frontend…" 10 hours ago Up 10 hours 0.0.0.0:9002->9002/tcp datahub-frontend +50b2308a8efd acryldata/datahub-mae-consumer:latest "bash -c 'while ping…" 10 hours ago Up 10 hours datahub-mae-consumer +4d6b03d77113 acryldata/datahub-gms:latest "bash -c 'dockerize …" 10 hours ago Up 10 hours 0.0.0.0:8080->8080/tcp datahub-gms c267c287a235 landoop/schema-registry-ui:latest "/run.sh" 10 hours ago Up 10 hours 0.0.0.0:8000->8000/tcp schema-registry-ui 4b38899cc29a confluentinc/cp-schema-registry:5.2.1 "/etc/confluent/dock…" 10 hours ago Up 10 hours 0.0.0.0:8081->8081/tcp schema-registry 37c29781a263 confluentinc/cp-kafka:5.2.1 "/etc/confluent/dock…" 10 hours ago Up 10 hours 0.0.0.0:9092->9092/tcp, 0.0.0.0:29092->29092/tcp broker diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py index a37f6ad8d279ea..007b7487cb6a4e 100644 --- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py +++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py @@ -104,7 +104,7 @@ def __post_init__(self) -> None: self.report.gms_version = ( gms_config.get("versions", {}) - .get("linkedin/datahub", {}) + .get("acryldata/datahub", {}) .get("version", "") ) self.report.max_threads = self.config.max_threads diff --git a/metadata-ingestion/src/datahub/telemetry/telemetry.py b/metadata-ingestion/src/datahub/telemetry/telemetry.py index a802125e76b4e1..08df9e80ecf290 100644 --- a/metadata-ingestion/src/datahub/telemetry/telemetry.py +++ b/metadata-ingestion/src/datahub/telemetry/telemetry.py @@ -335,7 +335,7 @@ def _server_props(self, server: Optional[DataHubGraph]) -> Dict[str, str]: "serverType", "missing" ), "server_version": server.server_config.get("versions", {}) - .get("linkedin/datahub", {}) + .get("acryldata/datahub", {}) .get("version", "missing"), "server_id": server.server_id or "missing", } diff --git a/metadata-ingestion/src/datahub/upgrade/upgrade.py b/metadata-ingestion/src/datahub/upgrade/upgrade.py index 075bfd29008f64..446f1a05b71a6e 100644 --- a/metadata-ingestion/src/datahub/upgrade/upgrade.py +++ b/metadata-ingestion/src/datahub/upgrade/upgrade.py @@ -139,10 +139,12 @@ async def get_server_version_stats( current_server_release_date = None if server_config: server_version_string = ( - server_config.get("versions", {}).get("linkedin/datahub", {}).get("version") + server_config.get("versions", {}) + .get("acryldata/datahub", {}) + .get("version") ) commit_hash = ( - server_config.get("versions", {}).get("linkedin/datahub", {}).get("commit") + server_config.get("versions", {}).get("acryldata/datahub", {}).get("commit") ) server_type = server_config.get("datahub", {}).get("serverType", "unknown") if server_type == "quickstart" and commit_hash: diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MclConsumerConfig.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MclConsumerConfig.java index 686e2a816ffb56..280ca87d1cf048 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MclConsumerConfig.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MclConsumerConfig.java @@ -39,7 +39,7 @@ public MclConsumerConfig(GitVersion gitVersion) throws JsonProcessingException { config.put("noCode", "true"); Map versionConfig = new HashMap<>(); - versionConfig.put("linkedin/datahub", gitVersion.toConfig()); + versionConfig.put("acryldata/datahub", gitVersion.toConfig()); config.put("versions", versionConfig); configJson = OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(config); } diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/McpConsumerConfig.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/McpConsumerConfig.java index b04ecc7761eb65..3db6dfa79516eb 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/McpConsumerConfig.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/McpConsumerConfig.java @@ -39,7 +39,7 @@ public McpConsumerConfig(GitVersion gitVersion) throws JsonProcessingException { config.put("noCode", "true"); Map versionConfig = new HashMap<>(); - versionConfig.put("linkedin/datahub", gitVersion.toConfig()); + versionConfig.put("acryldata/datahub", gitVersion.toConfig()); config.put("versions", versionConfig); configJson = OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(config); } diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java index cfa4c6425c1316..33015c4adbec5f 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/Config.java @@ -97,7 +97,7 @@ protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws IO GitVersion version = getGitVersion(ctx); Map versionConfig = new HashMap<>(); - versionConfig.put("linkedin/datahub", version.toConfig()); + versionConfig.put("acryldata/datahub", version.toConfig()); config.put("versions", versionConfig); ConfigurationProvider configProvider = getConfigProvider(ctx); diff --git a/smoke-test/tests/read_only/test_services_up.py b/smoke-test/tests/read_only/test_services_up.py index 4e00f910ceb73b..1fd43f884323c4 100644 --- a/smoke-test/tests/read_only/test_services_up.py +++ b/smoke-test/tests/read_only/test_services_up.py @@ -25,7 +25,7 @@ def test_gms_config_accessible() -> None: assert gms_config is not None if DATAHUB_VERSION is not None: - assert gms_config["versions"]["linkedin/datahub"]["version"] == DATAHUB_VERSION + assert gms_config["versions"]["acryldata/datahub"]["version"] == DATAHUB_VERSION else: print("[WARN] TEST_DATAHUB_VERSION is not set") From 3671860c58ca8339f05c478d9a1cbb6860351b50 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 2 Apr 2024 10:46:01 -0500 Subject: [PATCH 10/17] fix(github): fix docker publish (#10186) --- .github/workflows/docker-unified.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 5396e6f17cb974..5e1409003c4765 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -73,7 +73,7 @@ jobs: - name: Check whether publishing enabled id: publish env: - ENABLE_PUBLISH: ${{ secrets.DOCKER_PASSWORD != '' && secrets.ACRYL_DOCKER_PASSWORD != '' }} + ENABLE_PUBLISH: ${{ secrets.ACRYL_DOCKER_PASSWORD != '' }} run: | echo "Enable publish: ${{ env.ENABLE_PUBLISH }}" echo "publish=${{ env.ENABLE_PUBLISH }}" >> $GITHUB_OUTPUT @@ -127,8 +127,8 @@ jobs: images: | ${{ env.DATAHUB_GMS_IMAGE }} tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/datahub-gms/Dockerfile @@ -191,8 +191,8 @@ jobs: images: | ${{ env.DATAHUB_MAE_CONSUMER_IMAGE }} tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/datahub-mae-consumer/Dockerfile @@ -255,8 +255,8 @@ jobs: images: | ${{ env.DATAHUB_MCE_CONSUMER_IMAGE }} tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/datahub-mce-consumer/Dockerfile @@ -385,8 +385,8 @@ jobs: images: | ${{ env.DATAHUB_FRONTEND_IMAGE }} tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/datahub-frontend/Dockerfile @@ -439,8 +439,8 @@ jobs: images: | ${{ env.DATAHUB_KAFKA_SETUP_IMAGE }} tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/kafka-setup/Dockerfile @@ -481,8 +481,8 @@ jobs: images: | ${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }} tags: ${{ needs.setup.outputs.tag }} - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/elasticsearch-setup/Dockerfile From a89e189e93db89616517ff2dc61bf2478fc592c8 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Tue, 2 Apr 2024 10:48:17 -0500 Subject: [PATCH 11/17] feat(lineage): mark nodes as explored (#10180) --- .../UrnSearchAcrossLineageResultsMapper.java | 4 +++- .../src/main/resources/search.graphql | 9 ++++++-- .../graph/elastic/ESGraphQueryDAO.java | 19 +++++++++++---- .../metadata/search/LineageSearchService.java | 1 + .../search/SearchGraphServiceTestBase.java | 23 +++++++++++++++++++ .../metadata/graph/LineageRelationship.pdl | 5 ++++ .../metadata/search/LineageSearchEntity.pdl | 5 ++++ ...com.linkedin.entity.entities.snapshot.json | 5 ++++ ...nkedin.lineage.relationships.snapshot.json | 5 ++++ 9 files changed, 69 insertions(+), 7 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java index b39b960bb75801..b85303909c0801 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java @@ -16,6 +16,7 @@ import com.linkedin.metadata.search.LineageSearchEntity; import com.linkedin.metadata.search.LineageSearchResult; import com.linkedin.metadata.search.SearchResultMetadata; +import java.util.ArrayList; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -69,7 +70,8 @@ private SearchAcrossLineageResult mapResult( .map(p -> mapPath(context, p)) .collect(Collectors.toList())) .setDegree(searchEntity.getDegree()) - .setDegrees(searchEntity.getDegrees().stream().collect(Collectors.toList())) + .setDegrees(new ArrayList<>(searchEntity.getDegrees())) + .setExplored(Boolean.TRUE.equals(searchEntity.isExplored())) .build(); } diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 2b29994332d07a..13c1ff2e8a7648 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -644,7 +644,7 @@ type ScrollResults { } """ -Results returned by issueing a search across relationships query +Results returned by issuing a search across relationships query """ type SearchAcrossLineageResults { """ @@ -679,7 +679,7 @@ type SearchAcrossLineageResults { } """ -Results returned by issueing a search across relationships query using scroll API +Results returned by issuing a search across relationships query using scroll API """ type ScrollAcrossLineageResults { """ @@ -742,6 +742,11 @@ type SearchAcrossLineageResult { """ degrees: [Int!] + """ + Marks whether or not this entity was explored further for lineage + """ + explored: Boolean! + } """ diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index 15a16833aeb7bc..ea8d8fea54633c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -345,6 +345,8 @@ private Stream processOneHopLineage( int i) { // Do one hop on the lineage graph + int numHops = i + 1; // Zero indexed for loop counter, one indexed count + int remainingHops = maxHops - numHops; List oneHopRelationships = getLineageRelationshipsInBatches( currentLevel, @@ -352,8 +354,8 @@ private Stream processOneHopLineage( graphFilters, visitedEntities, viaEntities, - i + 1, - maxHops - (i + 1), + numHops, + remainingHops, remainingTime, existingPaths, exploreMultiplePaths, @@ -387,8 +389,9 @@ private Stream processOneHopLineage( || platformMatches( lineageRelationship.getEntity(), ignoreAsHops.get(entityType))))) - .forEach( - lineageRelationship -> additionalCurrentLevel.add(lineageRelationship.getEntity())); + .map(LineageRelationship::getEntity) + .forEach(additionalCurrentLevel::add); + ; if (!additionalCurrentLevel.isEmpty()) { Stream ignoreAsHopUrns = processOneHopLineage( @@ -417,6 +420,14 @@ private Stream processOneHopLineage( .sorted(Comparator.comparing(Urn::toString)) .limit(lineageFlags.getEntitiesExploredPerHopLimit()); } + if (remainingHops > 0) { + // If there are hops remaining, we expect to explore everything getting passed back to the + // loop, barring a timeout + List entitiesToExplore = intermediateStream.collect(Collectors.toList()); + entitiesToExplore.forEach(urn -> result.get(urn).setExplored(true)); + // reassign the stream after consuming it + intermediateStream = entitiesToExplore.stream(); + } } return intermediateStream; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index 3ea117663c23da..bb316f6f2b41c3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -738,6 +738,7 @@ private LineageSearchEntity buildLineageSearchEntity( if (lineageRelationship.hasDegrees()) { entity.setDegrees(lineageRelationship.getDegrees()); } + entity.setExplored(Boolean.TRUE.equals(lineageRelationship.isExplored())); } return entity; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java index b389f8228a98d6..85ca7ce7a1629d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java @@ -410,6 +410,29 @@ public void testTimestampLineage() throws Exception { Assert.assertEquals(Integer.valueOf(2), downstreamResult.getTotal()); } + @Test + public void testExplored() throws Exception { + + List edges = + Arrays.asList( + // One upstream edge + new Edge(dataset2Urn, dataset1Urn, downstreamOf, null, null, null, null, null), + // Two downstream + new Edge(dataset3Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + new Edge(dataset4Urn, dataset2Urn, downstreamOf, null, null, null, null, null), + // One with null values, should always be returned + new Edge(dataset5Urn, dataset2Urn, downstreamOf, null, null, null, null, null)); + + edges.forEach(getGraphService()::addEdge); + syncAfterWrite(); + + EntityLineageResult result = getUpstreamLineage(dataset2Urn, null, null, 10); + Assert.assertTrue(Boolean.TRUE.equals(result.getRelationships().get(0).isExplored())); + + EntityLineageResult result2 = getUpstreamLineage(dataset2Urn, null, null, 10, 0); + Assert.assertTrue(result2.getRelationships().get(0).isExplored() == null); + } + /** * Utility method to reduce repeated parameters for lineage tests * diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl index c25a1cee7db474..a169157955e67b 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl @@ -67,4 +67,9 @@ record LineageRelationship { * Replaces the deprecated field "degree". **/ degrees: optional array[int] + + /** + * Marks this relationship as explored during the graph walk + */ + explored: optional boolean } diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl index e99115893712d2..fdfc8b2d53291c 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl @@ -29,4 +29,9 @@ record LineageSearchEntity includes SearchEntity { * The degrees of separation (number of hops) between the source and this entity */ degrees: array[int] = [] + + /** + * Marks an entity as having been explored for as a part of the graph walk + */ + explored: optional boolean } \ No newline at end of file diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 011b9e419a0c0e..4915f06ffe5d2a 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -6205,6 +6205,11 @@ }, "doc" : "The degrees of separation (number of hops) between the source and this entity ", "default" : [ ] + }, { + "name" : "explored", + "type" : "boolean", + "doc" : "Marks an entity as having been explored for as a part of the graph walk", + "optional" : true } ] } }, diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json index 056ca0e4da2065..00b3c925d0e731 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json @@ -177,6 +177,11 @@ }, "doc" : "The different depths at which this entity is discovered in the lineage graph.\nMarked as optional to maintain backward compatibility, but is filled out by implementations. \nReplaces the deprecated field \"degree\".\n", "optional" : true + }, { + "name" : "explored", + "type" : "boolean", + "doc" : "Marks this relationship as explored during the graph walk", + "optional" : true } ] } }, From e0b20e159b3413dab62f301371502fa41630990f Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Tue, 2 Apr 2024 21:34:22 +0530 Subject: [PATCH 12/17] feat(ingest/gc): add index truncation logic (#10099) --- .../datahub/ingestion/source/gc/datahub_gc.py | 135 +++++++++++++++++- 1 file changed, 133 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py b/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py index bf21e293e6a2fd..de74470585e5ed 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py @@ -1,10 +1,13 @@ +import datetime +import logging +import re import time from dataclasses import dataclass -from typing import Iterable +from typing import Dict, Iterable from pydantic import Field -from datahub.configuration.common import ConfigModel +from datahub.configuration.common import ConfigModel, OperationalError from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, @@ -15,12 +18,30 @@ from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit +logger = logging.getLogger(__name__) + class DataHubGcSourceConfig(ConfigModel): cleanup_expired_tokens: bool = Field( default=True, description="Whether to clean up expired tokens or not", ) + truncate_indices: bool = Field( + default=True, + description="Whether to truncate elasticsearch indices or not which can be safely truncated", + ) + truncate_index_older_than_days: int = Field( + default=30, + description="Indices older than this number of days will be truncated", + ) + truncation_watch_until: int = Field( + default=10000, + description="Wait for truncation of indices until this number of documents are left", + ) + truncation_sleep_between_seconds: int = Field( + default=30, + description="Sleep between truncation monitoring.", + ) @dataclass @@ -51,8 +72,118 @@ def get_workunits_internal( ) -> Iterable[MetadataWorkUnit]: if self.config.cleanup_expired_tokens: self.revoke_expired_tokens() + if self.config.truncate_indices: + self.truncate_indices() yield from [] + def truncate_indices(self) -> None: + self._truncate_timeseries_helper(aspect_name="operation", entity_type="dataset") + self._truncate_timeseries_helper( + aspect_name="datasetusagestatistics", entity_type="dataset" + ) + self._truncate_timeseries_helper( + aspect_name="chartUsageStatistics", entity_type="chart" + ) + self._truncate_timeseries_helper( + aspect_name="dashboardUsageStatistics", entity_type="dashboard" + ) + + def _truncate_timeseries_helper(self, aspect_name: str, entity_type: str) -> None: + self._truncate_timeseries_with_watch_optional( + aspect_name=aspect_name, entity_type=entity_type, watch=False + ) + self._truncate_timeseries_with_watch_optional( + aspect_name=aspect_name, entity_type=entity_type, watch=True + ) + + def _truncate_timeseries_with_watch_optional( + self, aspect_name: str, entity_type: str, watch: bool + ) -> None: + graph = self.graph + assert graph is not None + if watch: + to_delete = 1 + while to_delete > 0: + response = self.truncate_timeseries_util( + aspect=aspect_name, + dry_run=watch, + days_ago=self.config.truncate_index_older_than_days, + entity_type=entity_type, + ) + val = response.get("value", "") + if "This was a dry run" not in val or "out of" not in val: + return + prev_to_delete = to_delete + to_delete, total = re.findall(r"\d+", val)[:2] + to_delete = int(to_delete) + if to_delete <= 0: + logger.info("Nothing to delete.") + return + logger.info(f"to_delete {to_delete} / {total}") + if to_delete == prev_to_delete: + logger.info("Seems to be stuck. Ending the loop.") + break + elif to_delete < self.config.truncation_watch_until: + logger.info("Too small truncation. Not going to watch.") + return + else: + time.sleep(self.config.truncation_sleep_between_seconds) + else: + self.truncate_timeseries_util( + aspect=aspect_name, + dry_run=watch, + days_ago=self.config.truncate_index_older_than_days, + entity_type=entity_type, + ) + + def x_days_ago_millis(self, days: int) -> int: + x_days_ago_datetime = datetime.datetime.now( + datetime.timezone.utc + ) - datetime.timedelta(days=days) + return int(x_days_ago_datetime.timestamp() * 1000) + + def truncate_timeseries_util( + self, + aspect: str, + days_ago: int, + dry_run: bool = True, + entity_type: str = "dataset", + ) -> Dict: + graph = self.graph + assert graph is not None + + gms_url = graph._gms_server + if not dry_run: + logger.info( + f"Going to truncate timeseries for {aspect} for {gms_url} older than {days_ago} days" + ) + days_ago_millis = self.x_days_ago_millis(days_ago) + url = f"{gms_url}/operations?action=truncateTimeseriesAspect" + try: + response = graph._post_generic( + url=url, + payload_dict={ + "entityType": entity_type, + "aspect": aspect, + "endTimeMillis": days_ago_millis, + "dryRun": dry_run, + }, + ) + # logger.info(f"Response: {response}") + except OperationalError: + response = graph._post_generic( + url=url, + payload_dict={ + "entityType": entity_type, + "aspect": aspect, + "endTimeMillis": days_ago_millis, + "dryRun": dry_run, + "forceDeleteByQuery": True, + }, + ) + # logger.info(f"Response: {response}") + return response + def revoke_expired_tokens(self) -> None: total = 1 while total > 0: From 4bba834ffd6fb945d1437caec99b5b6707fb2e04 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 2 Apr 2024 12:12:58 -0500 Subject: [PATCH 13/17] fix(entity-service): fix findFirst when already present (#10187) --- .../java/com/linkedin/metadata/entity/EntityServiceImpl.java | 2 +- .../java/com/linkedin/metadata/entity/DeleteEntityService.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 754c5f272e2755..ae3a1b63ba0eb4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -930,7 +930,7 @@ public IngestResult ingestProposal( AspectsBatchImpl.builder().mcps(List.of(proposal), auditStamp, this).build(), async) .stream() .findFirst() - .get(); + .orElse(null); } /** diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java index 2cd1aadf7665d6..45fb85ffe42b11 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java @@ -349,7 +349,7 @@ private void updateAspect( final IngestResult ingestProposalResult = _entityService.ingestProposal(proposal, auditStamp, false); - if (!ingestProposalResult.isSqlCommitted()) { + if (ingestProposalResult != null && !ingestProposalResult.isSqlCommitted()) { log.error( "Failed to ingest aspect with references removed. Before {}, after: {}, please check MCP processor" + " logs for more information", From 2873736eace1dbc6bd8d19206b0d4c4e2f02a535 Mon Sep 17 00:00:00 2001 From: dushayntAW <158567391+dushayntAW@users.noreply.github.com> Date: Tue, 2 Apr 2024 23:05:47 +0530 Subject: [PATCH 14/17] fix(ingestion/salesforce): fixed the issue by escaping the markdown string (#10157) --- .../datahub/ingestion/source/salesforce.py | 7 +- .../account_fields_soql_response.json | 68 +++++++++++++++++++ .../salesforce/salesforce_mces_golden.json | 34 ++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py index 6d52646f85d0a7..35af541c9e5326 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py +++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py @@ -573,10 +573,15 @@ def _get_schema_field( fieldTags: List[str] = self.get_field_tags(fieldName, field) + description = self._get_field_description(field, customField) + + # escaping string starting with `#` + description = "\\" + description if description.startswith("#") else description + schemaField = SchemaFieldClass( fieldPath=fieldPath, type=SchemaFieldDataTypeClass(type=TypeClass()), # type:ignore - description=self._get_field_description(field, customField), + description=description, # nativeDataType is set to data type shown on salesforce user interface, # not the corresponding API data type names. nativeDataType=field["FieldDefinition"]["DataType"], diff --git a/metadata-ingestion/tests/integration/salesforce/mock_files/account_fields_soql_response.json b/metadata-ingestion/tests/integration/salesforce/mock_files/account_fields_soql_response.json index 947761b8c79a0f..138c5743235888 100644 --- a/metadata-ingestion/tests/integration/salesforce/mock_files/account_fields_soql_response.json +++ b/metadata-ingestion/tests/integration/salesforce/mock_files/account_fields_soql_response.json @@ -2508,6 +2508,74 @@ }, "RelationshipName": null, "IsNillable": true + }, + { + "attributes": { + "type": "EntityParticle", + "url": "/services/data/v54.0/tooling/sobjects/EntityParticle/Account.Unique_Account" + }, + "QualifiedApiName": "Unique_Account", + "DeveloperName": "Unique_Account", + "Label": "# Unique_Account", + "FieldDefinition": { + "attributes": { + "type": "FieldDefinition", + "url": "/services/data/v54.0/tooling/sobjects/FieldDefinition/Account.Unique_Account" + }, + "DataType": "Text(80)", + "LastModifiedDate": null, + "LastModifiedBy": null, + "IsIndexed": false, + "ComplianceGroup": null, + "SecurityClassification": null + }, + "DataType": "string", + "Precision": 0, + "Scale": 0, + "Length": 80, + "Digits": 0, + "IsUnique": false, + "IsCompound": false, + "IsComponent": false, + "ReferenceTo": { + "referenceTo": null + }, + "RelationshipName": null, + "IsNillable": true + }, + { + "attributes": { + "type": "EntityParticle", + "url": "/services/data/v54.0/tooling/sobjects/EntityParticle/Account.Unique_Number" + }, + "QualifiedApiName": "Unique_Number", + "DeveloperName": "Unique_Account", + "Label": "#Unique_Number", + "FieldDefinition": { + "attributes": { + "type": "FieldDefinition", + "url": "/services/data/v54.0/tooling/sobjects/FieldDefinition/Account.Unique_Number" + }, + "DataType": "Text(80)", + "LastModifiedDate": null, + "LastModifiedBy": null, + "IsIndexed": false, + "ComplianceGroup": null, + "SecurityClassification": null + }, + "DataType": "string", + "Precision": 0, + "Scale": 0, + "Length": 80, + "Digits": 0, + "IsUnique": false, + "IsCompound": false, + "IsComponent": false, + "ReferenceTo": { + "referenceTo": null + }, + "RelationshipName": null, + "IsNillable": true } ] } \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json index 4e54f199eafed4..90df0997495f03 100644 --- a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json +++ b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json @@ -1350,6 +1350,40 @@ }, "isPartOfKey": false, "jsonProps": "{}" + }, + { + "fieldPath": "Unique_Account", + "nullable": true, + "description": "\\# Unique_Account", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "Text(80)", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false, + "jsonProps": "{}" + }, + { + "fieldPath": "Unique_Number", + "nullable": true, + "description": "\\#Unique_Number", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "Text(80)", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false, + "jsonProps": "{}" } ], "primaryKeys": [ From 5c06f7a245356759470391f770ffb62b4b738042 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Tue, 2 Apr 2024 22:13:05 +0200 Subject: [PATCH 15/17] fix(ingest/bigquery): Supporting lineage extraction in case the select query result's target table is set on job (#10191) Co-authored-by: Harshal Sheth --- .../datahub/ingestion/source/bigquery_v2/lineage.py | 11 ++++++++++- .../tests/unit/test_bigquery_lineage.py | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index 764f4f24499249..0d205679a8bf3e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -752,8 +752,17 @@ def _create_lineage_map( # Try the sql parser first. if self.config.lineage_use_sql_parser: + if e.statementType == "SELECT": + # We wrap select statements in a CTE to make them parseable as insert statement. + # This is a workaround for the sql parser to support the case where the user runs a query and inserts the result into a table.. + query = f"""create table `{destination_table.table_identifier.get_table_name()}` AS + ( + {e.query} + )""" + else: + query = e.query raw_lineage = sqlglot_lineage( - e.query, + query, schema_resolver=sql_parser_schema_resolver, default_db=e.project_id, ) diff --git a/metadata-ingestion/tests/unit/test_bigquery_lineage.py b/metadata-ingestion/tests/unit/test_bigquery_lineage.py index 1edac3fde0a6c3..5d8c040b4123b5 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_lineage.py +++ b/metadata-ingestion/tests/unit/test_bigquery_lineage.py @@ -28,7 +28,7 @@ def lineage_entries() -> List[QueryEvent]: SELECT first.a, second.b FROM `my_project.my_dataset.my_source_table1` first LEFT JOIN `my_project.my_dataset.my_source_table2` second ON first.id = second.id """, - statementType="SELECT", + statementType="INSERT", project_id="proj_12344", end_time=None, referencedTables=[ From 57de905c66b6992aefb2051708fa83898fa82cec Mon Sep 17 00:00:00 2001 From: trialiya <41265764+trialiya@users.noreply.github.com> Date: Tue, 2 Apr 2024 23:33:58 +0300 Subject: [PATCH 16/17] fix(retention): fix time-based retention (#10118) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Хазиев Ленар --- .../metadata/entity/cassandra/CassandraRetentionService.java | 2 +- .../linkedin/metadata/entity/ebean/EbeanRetentionService.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java index 91e31975298771..bc6ee6ddd5026f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java @@ -195,7 +195,7 @@ private void applyTimeBasedRetention( @Nonnull final Urn urn, @Nonnull final String aspectName, @Nonnull final TimeBasedRetention retention) { - Timestamp threshold = new Timestamp(_clock.millis() - retention.getMaxAgeInSeconds() * 1000); + Timestamp threshold = new Timestamp(_clock.millis() - retention.getMaxAgeInSeconds() * 1000L); SimpleStatement ss = deleteFrom(CassandraAspect.TABLE_NAME) .whereColumn(CassandraAspect.URN_COLUMN) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java index 250a81d9c8edcf..77752153aad47e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java @@ -156,7 +156,7 @@ private Expression getTimeBasedRetentionQuery(@Nonnull final TimeBasedRetention return new SimpleExpression( EbeanAspectV2.CREATED_ON_COLUMN, Op.LT, - new Timestamp(_clock.millis() - retention.getMaxAgeInSeconds() * 1000)); + new Timestamp(_clock.millis() - retention.getMaxAgeInSeconds() * 1000L)); } private void applyRetention( From 888a1de9fc85169ead4eb9ba207ce85b56abcbd4 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Tue, 2 Apr 2024 19:36:52 -0500 Subject: [PATCH 17/17] feat(lineage): give via and paths in entity lineage response (#10192) --- .../resolvers/load/EntityLineageResultResolver.java | 7 +++++++ .../datahub/graphql/types/mappers/MapperUtils.java | 9 +++++++++ .../mappers/UrnSearchAcrossLineageResultsMapper.java | 10 +--------- datahub-graphql-core/src/main/resources/entity.graphql | 4 ++++ datahub-graphql-core/src/main/resources/search.graphql | 5 +++++ .../metadata/graph/elastic/ESGraphQueryDAO.java | 8 +++++--- .../linkedin/metadata/search/LineageSearchService.java | 1 + .../linkedin/metadata/graph/LineageRelationship.pdl | 5 +++++ .../linkedin/metadata/search/LineageSearchEntity.pdl | 5 +++++ .../com.linkedin.entity.entities.snapshot.json | 5 +++++ .../com.linkedin.lineage.relationships.snapshot.json | 5 +++++ 11 files changed, 52 insertions(+), 12 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java index 8de18ec01e6dc2..e28ec3dbb870fa 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java @@ -1,8 +1,10 @@ package com.linkedin.datahub.graphql.resolvers.load; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; +import static com.linkedin.datahub.graphql.types.mappers.MapperUtils.*; import com.datahub.authorization.AuthorizationConfiguration; +import com.linkedin.common.UrnArrayArray; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.SetMode; @@ -156,6 +158,11 @@ private LineageRelationship mapEntityRelationship( result.setUpdatedActor(UrnToEntityMapper.map(context, updatedActor)); } result.setIsManual(lineageRelationship.hasIsManual() && lineageRelationship.isIsManual()); + if (lineageRelationship.getPaths() != null) { + UrnArrayArray paths = lineageRelationship.getPaths(); + result.setPaths( + paths.stream().map(path -> mapPath(context, path)).collect(Collectors.toList())); + } return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java index 3cae0155a86db5..6bda333256a4c9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java @@ -3,9 +3,11 @@ import static com.linkedin.datahub.graphql.util.SearchInsightsUtil.*; import static com.linkedin.metadata.utils.SearchUtil.*; +import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.AggregationMetadata; +import com.linkedin.datahub.graphql.generated.EntityPath; import com.linkedin.datahub.graphql.generated.FacetMetadata; import com.linkedin.datahub.graphql.generated.MatchedField; import com.linkedin.datahub.graphql.generated.SearchResult; @@ -104,4 +106,11 @@ public static SearchSuggestion mapSearchSuggestion( return new SearchSuggestion( suggestion.getText(), suggestion.getScore(), Math.toIntExact(suggestion.getFrequency())); } + + public static EntityPath mapPath(@Nullable final QueryContext context, UrnArray path) { + EntityPath entityPath = new EntityPath(); + entityPath.setPath( + path.stream().map(p -> UrnToEntityMapper.map(context, p)).collect(Collectors.toList())); + return entityPath; + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java index b85303909c0801..ca363deb90c4de 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java @@ -3,11 +3,9 @@ import static com.linkedin.datahub.graphql.types.mappers.MapperUtils.*; import static com.linkedin.datahub.graphql.util.SearchInsightsUtil.*; -import com.linkedin.common.UrnArray; import com.linkedin.data.template.RecordTemplate; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.Entity; -import com.linkedin.datahub.graphql.generated.EntityPath; import com.linkedin.datahub.graphql.generated.FreshnessStats; import com.linkedin.datahub.graphql.generated.SearchAcrossLineageResult; import com.linkedin.datahub.graphql.generated.SearchAcrossLineageResults; @@ -72,13 +70,7 @@ private SearchAcrossLineageResult mapResult( .setDegree(searchEntity.getDegree()) .setDegrees(new ArrayList<>(searchEntity.getDegrees())) .setExplored(Boolean.TRUE.equals(searchEntity.isExplored())) + .setIgnoredAsHop(Boolean.TRUE.equals(searchEntity.isIgnoredAsHop())) .build(); } - - private EntityPath mapPath(@Nullable final QueryContext context, UrnArray path) { - EntityPath entityPath = new EntityPath(); - entityPath.setPath( - path.stream().map(p -> UrnToEntityMapper.map(context, p)).collect(Collectors.toList())); - return entityPath; - } } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index b750d206261018..106148c425791c 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -1331,6 +1331,10 @@ type LineageRelationship { """ isManual: Boolean + """ + The paths traversed for this relationship + """ + paths: [EntityPath] } """ diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 13c1ff2e8a7648..499ac3a0860d40 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -747,6 +747,11 @@ type SearchAcrossLineageResult { """ explored: Boolean! + """ + Whether this relationship was ignored as a hop + """ + ignoredAsHop: Boolean! + } """ diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index ea8d8fea54633c..bdcbf020ecf781 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -389,9 +389,11 @@ private Stream processOneHopLineage( || platformMatches( lineageRelationship.getEntity(), ignoreAsHops.get(entityType))))) - .map(LineageRelationship::getEntity) - .forEach(additionalCurrentLevel::add); - ; + .forEach( + lineageRelationship -> { + additionalCurrentLevel.add(lineageRelationship.getEntity()); + lineageRelationship.setIgnoredAsHop(true); + }); if (!additionalCurrentLevel.isEmpty()) { Stream ignoreAsHopUrns = processOneHopLineage( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index bb316f6f2b41c3..94f56fec2acc93 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -739,6 +739,7 @@ private LineageSearchEntity buildLineageSearchEntity( entity.setDegrees(lineageRelationship.getDegrees()); } entity.setExplored(Boolean.TRUE.equals(lineageRelationship.isExplored())); + entity.setIgnoredAsHop(Boolean.TRUE.equals(lineageRelationship.isIgnoredAsHop())); } return entity; } diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl index a169157955e67b..552dd7323b5517 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/graph/LineageRelationship.pdl @@ -72,4 +72,9 @@ record LineageRelationship { * Marks this relationship as explored during the graph walk */ explored: optional boolean + + /** + * Whether this relationship was ignored as a hop while performing the graph walk + */ + ignoredAsHop: optional boolean } diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl index fdfc8b2d53291c..3fd8a48c6bf5ee 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/search/LineageSearchEntity.pdl @@ -34,4 +34,9 @@ record LineageSearchEntity includes SearchEntity { * Marks an entity as having been explored for as a part of the graph walk */ explored: optional boolean + + /** + * Whether this relationship was ignored as a hop while performing the graph walk + */ + ignoredAsHop: optional boolean } \ No newline at end of file diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 4915f06ffe5d2a..43845a5fbbf6a3 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -6210,6 +6210,11 @@ "type" : "boolean", "doc" : "Marks an entity as having been explored for as a part of the graph walk", "optional" : true + }, { + "name" : "ignoredAsHop", + "type" : "boolean", + "doc" : "Whether this relationship was ignored as a hop while performing the graph walk", + "optional" : true } ] } }, diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json index 00b3c925d0e731..3886faffadedbb 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.lineage.relationships.snapshot.json @@ -182,6 +182,11 @@ "type" : "boolean", "doc" : "Marks this relationship as explored during the graph walk", "optional" : true + }, { + "name" : "ignoredAsHop", + "type" : "boolean", + "doc" : "Whether this relationship was ignored as a hop while performing the graph walk", + "optional" : true } ] } },