From c3c4bef1ad746a57a1a6cff821a732fe8114f695 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 3 Jan 2024 22:59:39 +0530 Subject: [PATCH 01/16] ci(doc): tweak build rule to avoid docker build for docs (#9555) --- .github/workflows/docker-unified.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 454e7661402459..8afce059572c7d 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -4,12 +4,14 @@ on: branches: - master paths-ignore: + - "docs-website/**" - "docs/**" - "**.md" pull_request: branches: - "**" paths-ignore: + - "docs-website/**" - "docs/**" - "**.md" release: From c9613043c86e169a888d5ac60f0efdcd1551a2b0 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 3 Jan 2024 14:28:22 -0500 Subject: [PATCH 02/16] fix(ingest): improve kafka-connect test stability (#9519) --- .../tests/integration/kafka/docker-compose.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/tests/integration/kafka/docker-compose.yml b/metadata-ingestion/tests/integration/kafka/docker-compose.yml index 43f30cbe1e6651..0a4422e07515cf 100644 --- a/metadata-ingestion/tests/integration/kafka/docker-compose.yml +++ b/metadata-ingestion/tests/integration/kafka/docker-compose.yml @@ -1,5 +1,5 @@ --- -version: '3.8' +version: "3.8" services: zookeeper: image: confluentinc/cp-zookeeper:7.2.2 @@ -9,7 +9,8 @@ services: ports: - "52181" volumes: - - test_zkdata:/var/opt/zookeeper + - test_zkdata:/var/lib/zookeeper/data + - test_zklogs:/var/lib/zookeeper/log broker: image: confluentinc/cp-kafka:7.2.2 @@ -34,3 +35,4 @@ services: volumes: test_zkdata: + test_zklogs: From 83b904e379b0e9a13d22659e483c6d3d4c9b29ba Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 3 Jan 2024 14:28:32 -0500 Subject: [PATCH 03/16] fix(ingest/looker): add user stats to report (#9505) --- .../ingestion/source/looker/looker_common.py | 5 +++++ .../ingestion/source/looker/looker_config.py | 5 ----- .../ingestion/source/looker/looker_source.py | 13 +++++-------- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 53533a8d27c9b5..94a56bb9281cb5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -1059,6 +1059,7 @@ class LookerDashboardSourceReport(StaleEntityRemovalSourceReport): dashboards_scanned_for_usage: int = 0 charts_scanned_for_usage: int = 0 charts_with_activity: LossySet[str] = dataclasses_field(default_factory=LossySet) + accessed_dashboards: int = 0 dashboards_with_activity: LossySet[str] = dataclasses_field( default_factory=LossySet ) @@ -1066,6 +1067,10 @@ class LookerDashboardSourceReport(StaleEntityRemovalSourceReport): _looker_explore_registry: Optional[LookerExploreRegistry] = None total_explores: int = 0 explores_scanned: int = 0 + + resolved_user_ids: int = 0 + email_ids_missing: int = 0 # resolved users with missing email addresses + _looker_api: Optional[LookerAPI] = None query_latency: Dict[str, datetime.timedelta] = dataclasses_field( default_factory=dict diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index 514f22b4f21580..52a21e8f122597 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -160,11 +160,6 @@ class LookerDashboardSourceConfig( description="When enabled, extracts ownership from Looker directly. When disabled, ownership is left empty " "for dashboards and charts.", ) - actor: Optional[str] = Field( - None, - description="This config is deprecated in favor of `extract_owners`. Previously, was the actor to use in " - "ownership properties of ingested metadata.", - ) strip_user_ids_from_email: bool = Field( False, description="When enabled, converts Looker user emails of the form name@domain.com to urn:li:corpuser:name " diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index 7e8fbfde120420..0cce267bf5579c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -129,9 +129,6 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase): source_config: LookerDashboardSourceConfig reporter: LookerDashboardSourceReport user_registry: LookerUserRegistry - accessed_dashboards: int = 0 - resolved_user_ids: int = 0 - email_ids_missing: int = 0 # resolved users with missing email addresses reachable_look_registry: Set[ str ] # Keep track of look-id which are reachable from Dashboard @@ -866,7 +863,7 @@ def _get_folder_path(self, folder: FolderBase, client: LookerAPI) -> str: def _get_looker_dashboard( self, dashboard: Dashboard, client: LookerAPI ) -> LookerDashboard: - self.accessed_dashboards += 1 + self.reporter.accessed_dashboards += 1 if dashboard.folder is None: logger.debug(f"{dashboard.id} has no folder") dashboard_folder_path = None @@ -928,9 +925,9 @@ def _get_looker_user(self, user_id: Optional[str]) -> Optional[LookerUser]: if user is not None and self.source_config.extract_owners: # Keep track of how many user ids we were able to resolve - self.resolved_user_ids += 1 + self.reporter.resolved_user_ids += 1 if user.email is None: - self.email_ids_missing += 1 + self.reporter.email_ids_missing += 1 return user @@ -1313,8 +1310,8 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: if ( self.source_config.extract_owners - and self.resolved_user_ids > 0 - and self.email_ids_missing == self.resolved_user_ids + and self.reporter.resolved_user_ids > 0 + and self.reporter.email_ids_missing == self.reporter.resolved_user_ids ): # Looks like we tried to extract owners and could not find their email addresses. This is likely a permissions issue self.reporter.report_warning( From 186b6f942d3fa7f0ce379add72cbcb57bccd4bb0 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Wed, 3 Jan 2024 12:21:06 -0800 Subject: [PATCH 04/16] perf(lineage): Rewrite lineage query for Elastic graph store (#9552) --- .../graph/elastic/ESGraphQueryDAO.java | 82 ++++--- .../graph/search/ESGraphQueryDAOTest.java | 94 ++++++- ...1.json => lineage_query_filters_full.json} | 98 ++++---- ...eage_query_filters_full_empty_filters.json | 60 +++++ ...e_query_filters_full_multiple_filters.json | 229 ++++++++++++++++++ .../lineage_query_filters_limited.json | 32 +++ 6 files changed, 508 insertions(+), 87 deletions(-) rename metadata-io/src/test/resources/elasticsearch/sample_filters/{lineage_query_filters_1.json => lineage_query_filters_full.json} (81%) create mode 100644 metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_empty_filters.json create mode 100644 metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_multiple_filters.json create mode 100644 metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_limited.json diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index 92960bc9222ab4..97cb186ce948ce 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -336,17 +336,10 @@ private List getLineageRelationships( Collectors.toMap( Function.identity(), entityType -> lineageRegistry.getLineageRelationships(entityType, direction))); - BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); - // Get all relation types relevant to the set of urns to hop from - urnsPerEntityType.forEach( - (entityType, urns) -> - finalQuery.should( - getQueryForLineage( - urns, - edgesPerEntityType.getOrDefault(entityType, Collections.emptyList()), - graphFilters, - startTimeMillis, - endTimeMillis))); + + QueryBuilder finalQuery = + getLineageQuery( + urnsPerEntityType, edgesPerEntityType, graphFilters, startTimeMillis, endTimeMillis); SearchResponse response = executeSearchQuery(finalQuery, 0, graphQueryConfiguration.getMaxResult()); Set entityUrnSet = new HashSet<>(entityUrns); @@ -361,18 +354,53 @@ private List getLineageRelationships( entityUrnSet, response, validEdges, visitedEntities, numHops, existingPaths); } - // Get search query for given list of edges and source urns @VisibleForTesting - public static QueryBuilder getQueryForLineage( - @Nonnull List urns, - @Nonnull List lineageEdges, + public static QueryBuilder getLineageQuery( + @Nonnull Map> urnsPerEntityType, + @Nonnull Map> edgesPerEntityType, @Nonnull GraphFilters graphFilters, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis) { - BoolQueryBuilder query = QueryBuilders.boolQuery(); - if (lineageEdges.isEmpty()) { - return query; + BoolQueryBuilder entityTypeQueries = QueryBuilders.boolQuery(); + // Get all relation types relevant to the set of urns to hop from + urnsPerEntityType.forEach( + (entityType, urns) -> { + if (edgesPerEntityType.containsKey(entityType) + && !edgesPerEntityType.get(entityType).isEmpty()) { + entityTypeQueries.should( + getLineageQueryForEntityType( + urns, edgesPerEntityType.get(entityType), graphFilters)); + } + }); + + BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); + + finalQuery.filter(entityTypeQueries); + finalQuery.filter(buildEntityTypesFilter(graphFilters.getAllowedEntityTypes(), SOURCE)); + finalQuery.filter(buildEntityTypesFilter(graphFilters.getAllowedEntityTypes(), DESTINATION)); + + /* + * Optional - Add edge filtering based on time windows. + */ + if (startTimeMillis != null && endTimeMillis != null) { + finalQuery.filter(TimeFilterUtils.getEdgeTimeFilterQuery(startTimeMillis, endTimeMillis)); + } else { + log.debug( + String.format( + "Empty time filter range provided: start time %s, end time: %s. Skipping application of time filters", + startTimeMillis, endTimeMillis)); } + + return finalQuery; + } + + // Get search query for given list of edges and source urns + @VisibleForTesting + public static QueryBuilder getLineageQueryForEntityType( + @Nonnull List urns, + @Nonnull List lineageEdges, + @Nonnull GraphFilters graphFilters) { + BoolQueryBuilder query = QueryBuilders.boolQuery(); Map> edgesByDirection = lineageEdges.stream().collect(Collectors.groupingBy(EdgeInfo::getDirection)); @@ -388,18 +416,6 @@ public static QueryBuilder getQueryForLineage( query.should(getIncomingEdgeQuery(urns, incomingEdges, graphFilters)); } - /* - * Optional - Add edge filtering based on time windows. - */ - if (startTimeMillis != null && endTimeMillis != null) { - query.must(TimeFilterUtils.getEdgeTimeFilterQuery(startTimeMillis, endTimeMillis)); - } else { - log.debug( - String.format( - "Empty time filter range provided: start time %s, end time: %s. Skipping application of time filters", - startTimeMillis, endTimeMillis)); - } - return query; } @@ -601,9 +617,6 @@ private static BoolQueryBuilder getOutGoingEdgeQuery( BoolQueryBuilder outgoingEdgeQuery = QueryBuilders.boolQuery(); outgoingEdgeQuery.must(buildUrnFilters(urns, SOURCE)); outgoingEdgeQuery.must(buildEdgeFilters(outgoingEdges)); - outgoingEdgeQuery.must(buildEntityTypesFilter(graphFilters.getAllowedEntityTypes(), SOURCE)); - outgoingEdgeQuery.must( - buildEntityTypesFilter(graphFilters.getAllowedEntityTypes(), DESTINATION)); return outgoingEdgeQuery; } @@ -612,9 +625,6 @@ private static BoolQueryBuilder getIncomingEdgeQuery( BoolQueryBuilder incomingEdgeQuery = QueryBuilders.boolQuery(); incomingEdgeQuery.must(buildUrnFilters(urns, DESTINATION)); incomingEdgeQuery.must(buildEdgeFilters(incomingEdges)); - incomingEdgeQuery.must(buildEntityTypesFilter(graphFilters.getAllowedEntityTypes(), SOURCE)); - incomingEdgeQuery.must( - buildEntityTypesFilter(graphFilters.getAllowedEntityTypes(), DESTINATION)); return incomingEdgeQuery; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java index 9fc9490bfd7ef1..5b7f880e6d83ab 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java @@ -23,16 +23,40 @@ public class ESGraphQueryDAOTest { - private static final String TEST_QUERY_FILE = - "elasticsearch/sample_filters/lineage_query_filters_1.json"; + private static final String TEST_QUERY_FILE_LIMITED = + "elasticsearch/sample_filters/lineage_query_filters_limited.json"; + private static final String TEST_QUERY_FILE_FULL = + "elasticsearch/sample_filters/lineage_query_filters_full.json"; + private static final String TEST_QUERY_FILE_FULL_EMPTY_FILTERS = + "elasticsearch/sample_filters/lineage_query_filters_full_empty_filters.json"; + private static final String TEST_QUERY_FILE_FULL_MULTIPLE_FILTERS = + "elasticsearch/sample_filters/lineage_query_filters_full_multiple_filters.json"; @Test private static void testGetQueryForLineageFullArguments() throws Exception { - URL url = Resources.getResource(TEST_QUERY_FILE); - String expectedQuery = Resources.toString(url, StandardCharsets.UTF_8); - - List urns = new ArrayList<>(); + URL urlLimited = Resources.getResource(TEST_QUERY_FILE_LIMITED); + String expectedQueryLimited = Resources.toString(urlLimited, StandardCharsets.UTF_8); + URL urlFull = Resources.getResource(TEST_QUERY_FILE_FULL); + String expectedQueryFull = Resources.toString(urlFull, StandardCharsets.UTF_8); + URL urlFullEmptyFilters = Resources.getResource(TEST_QUERY_FILE_FULL_EMPTY_FILTERS); + String expectedQueryFullEmptyFilters = + Resources.toString(urlFullEmptyFilters, StandardCharsets.UTF_8); + URL urlFullMultipleFilters = Resources.getResource(TEST_QUERY_FILE_FULL_MULTIPLE_FILTERS); + String expectedQueryFullMultipleFilters = + Resources.toString(urlFullMultipleFilters, StandardCharsets.UTF_8); + + List urns = List.of(Urn.createFromString("urn:li:dataset:test-urn")); + List urnsMultiple1 = + ImmutableList.of( + UrnUtils.getUrn("urn:li:dataset:test-urn"), + UrnUtils.getUrn("urn:li:dataset:test-urn2"), + UrnUtils.getUrn("urn:li:dataset:test-urn3")); + List urnsMultiple2 = + ImmutableList.of( + UrnUtils.getUrn("urn:li:chart:test-urn"), + UrnUtils.getUrn("urn:li:chart:test-urn2"), + UrnUtils.getUrn("urn:li:chart:test-urn3")); List edgeInfos = new ArrayList<>( ImmutableList.of( @@ -40,14 +64,64 @@ private static void testGetQueryForLineageFullArguments() throws Exception { "DownstreamOf", RelationshipDirection.INCOMING, Constants.DATASET_ENTITY_NAME))); + List edgeInfosMultiple1 = + ImmutableList.of( + new LineageRegistry.EdgeInfo( + "DownstreamOf", RelationshipDirection.OUTGOING, Constants.DATASET_ENTITY_NAME), + new LineageRegistry.EdgeInfo( + "Consumes", RelationshipDirection.OUTGOING, Constants.DATASET_ENTITY_NAME)); + List edgeInfosMultiple2 = + ImmutableList.of( + new LineageRegistry.EdgeInfo( + "DownstreamOf", RelationshipDirection.OUTGOING, Constants.DATA_JOB_ENTITY_NAME), + new LineageRegistry.EdgeInfo( + "Consumes", RelationshipDirection.OUTGOING, Constants.DATA_JOB_ENTITY_NAME)); + String entityType = "testEntityType"; + Map> urnsPerEntityType = Map.of(entityType, urns); + Map> urnsPerEntityTypeMultiple = + Map.of( + Constants.DATASET_ENTITY_NAME, + urnsMultiple1, + Constants.CHART_ENTITY_NAME, + urnsMultiple2); + Map> edgesPerEntityType = Map.of(entityType, edgeInfos); + Map> edgesPerEntityTypeMultiple = + Map.of( + Constants.DATASET_ENTITY_NAME, edgeInfosMultiple1, + Constants.DATA_JOB_ENTITY_NAME, edgeInfosMultiple2); GraphFilters graphFilters = new GraphFilters(ImmutableList.of(Constants.DATASET_ENTITY_NAME)); + GraphFilters graphFiltersMultiple = + new GraphFilters( + ImmutableList.of( + Constants.DATASET_ENTITY_NAME, + Constants.DASHBOARD_ENTITY_NAME, + Constants.DATA_JOB_ENTITY_NAME)); Long startTime = 0L; Long endTime = 1L; - QueryBuilder builder = - ESGraphQueryDAO.getQueryForLineage(urns, edgeInfos, graphFilters, startTime, endTime); - - Assert.assertEquals(builder.toString(), expectedQuery); + QueryBuilder limitedBuilder = + ESGraphQueryDAO.getLineageQueryForEntityType(urns, edgeInfos, graphFilters); + + QueryBuilder fullBuilder = + ESGraphQueryDAO.getLineageQuery( + urnsPerEntityType, edgesPerEntityType, graphFilters, startTime, endTime); + + QueryBuilder fullBuilderEmptyFilters = + ESGraphQueryDAO.getLineageQuery( + urnsPerEntityType, edgesPerEntityType, GraphFilters.emptyGraphFilters, null, null); + + QueryBuilder fullBuilderMultipleFilters = + ESGraphQueryDAO.getLineageQuery( + urnsPerEntityTypeMultiple, + edgesPerEntityTypeMultiple, + graphFiltersMultiple, + startTime, + endTime); + + Assert.assertEquals(limitedBuilder.toString(), expectedQueryLimited); + Assert.assertEquals(fullBuilder.toString(), expectedQueryFull); + Assert.assertEquals(fullBuilderEmptyFilters.toString(), expectedQueryFullEmptyFilters); + Assert.assertEquals(fullBuilderMultipleFilters.toString(), expectedQueryFullMultipleFilters); } @Test diff --git a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_1.json b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full.json similarity index 81% rename from metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_1.json rename to metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full.json index eb84638f0ccd08..0a1cee08414a9d 100644 --- a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_1.json +++ b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full.json @@ -1,6 +1,62 @@ { "bool" : { - "must" : [ + "filter" : [ + { + "bool" : { + "should" : [ + { + "bool" : { + "should" : [ + { + "bool" : { + "must" : [ + { + "terms" : { + "destination.urn" : [ + "urn:li:dataset:test-urn" + ], + "boost" : 1.0 + } + }, + { + "terms" : { + "relationshipType" : [ + "DownstreamOf" + ], + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "terms" : { + "source.entityType" : [ + "dataset" + ], + "boost" : 1.0 + } + }, + { + "terms" : { + "destination.entityType" : [ + "dataset" + ], + "boost" : 1.0 + } + }, { "bool" : { "should" : [ @@ -160,46 +216,6 @@ } } ], - "should" : [ - { - "bool" : { - "must" : [ - { - "terms" : { - "destination.urn" : [ ], - "boost" : 1.0 - } - }, - { - "terms" : { - "relationshipType" : [ - "DownstreamOf" - ], - "boost" : 1.0 - } - }, - { - "terms" : { - "source.entityType" : [ - "dataset" - ], - "boost" : 1.0 - } - }, - { - "terms" : { - "destination.entityType" : [ - "dataset" - ], - "boost" : 1.0 - } - } - ], - "adjust_pure_negative" : true, - "boost" : 1.0 - } - } - ], "adjust_pure_negative" : true, "boost" : 1.0 } diff --git a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_empty_filters.json b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_empty_filters.json new file mode 100644 index 00000000000000..ab2841d6602d82 --- /dev/null +++ b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_empty_filters.json @@ -0,0 +1,60 @@ +{ + "bool" : { + "filter" : [ + { + "bool" : { + "should" : [ + { + "bool" : { + "should" : [ + { + "bool" : { + "must" : [ + { + "terms" : { + "destination.urn" : [ + "urn:li:dataset:test-urn" + ], + "boost" : 1.0 + } + }, + { + "terms" : { + "relationshipType" : [ + "DownstreamOf" + ], + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "terms" : { + "source.entityType" : [ ], + "boost" : 1.0 + } + }, + { + "terms" : { + "destination.entityType" : [ ], + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } +} \ No newline at end of file diff --git a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_multiple_filters.json b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_multiple_filters.json new file mode 100644 index 00000000000000..39f595e0e8dd2d --- /dev/null +++ b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_full_multiple_filters.json @@ -0,0 +1,229 @@ +{ + "bool" : { + "filter" : [ + { + "bool" : { + "should" : [ + { + "bool" : { + "should" : [ + { + "bool" : { + "must" : [ + { + "terms" : { + "source.urn" : [ + "urn:li:dataset:test-urn", + "urn:li:dataset:test-urn2", + "urn:li:dataset:test-urn3" + ], + "boost" : 1.0 + } + }, + { + "terms" : { + "relationshipType" : [ + "DownstreamOf", + "Consumes" + ], + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "terms" : { + "source.entityType" : [ + "dataset", + "dashboard", + "dataJob" + ], + "boost" : 1.0 + } + }, + { + "terms" : { + "destination.entityType" : [ + "dataset", + "dashboard", + "dataJob" + ], + "boost" : 1.0 + } + }, + { + "bool" : { + "should" : [ + { + "bool" : { + "should" : [ + { + "bool" : { + "must" : [ + { + "exists" : { + "field" : "createdOn", + "boost" : 1.0 + } + }, + { + "range" : { + "createdOn" : { + "from" : 0, + "to" : 1, + "include_lower" : true, + "include_upper" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "bool" : { + "must" : [ + { + "exists" : { + "field" : "updatedOn", + "boost" : 1.0 + } + }, + { + "range" : { + "updatedOn" : { + "from" : 0, + "to" : 1, + "include_lower" : true, + "include_upper" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "bool" : { + "must" : [ + { + "bool" : { + "should" : [ + { + "bool" : { + "must_not" : [ + { + "exists" : { + "field" : "createdOn", + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "bool" : { + "must" : [ + { + "term" : { + "createdOn" : { + "value" : 0, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "bool" : { + "should" : [ + { + "bool" : { + "must_not" : [ + { + "exists" : { + "field" : "updatedOn", + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "bool" : { + "must" : [ + { + "term" : { + "updatedOn" : { + "value" : 0, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "term" : { + "properties.source" : { + "value" : "UI", + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } +} \ No newline at end of file diff --git a/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_limited.json b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_limited.json new file mode 100644 index 00000000000000..95d468ec3dac8e --- /dev/null +++ b/metadata-io/src/test/resources/elasticsearch/sample_filters/lineage_query_filters_limited.json @@ -0,0 +1,32 @@ +{ + "bool" : { + "should" : [ + { + "bool" : { + "must" : [ + { + "terms" : { + "destination.urn" : [ + "urn:li:dataset:test-urn" + ], + "boost" : 1.0 + } + }, + { + "terms" : { + "relationshipType" : [ + "DownstreamOf" + ], + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } +} \ No newline at end of file From f06b5c782099ace00116fd33dda73af5a48e4184 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 3 Jan 2024 15:30:11 -0500 Subject: [PATCH 05/16] feat(ingest): improve config loading helpers (#9477) --- .../datahub/configuration/config_loader.py | 48 ++++++++------ .../datahub/ingestion/run/pipeline_config.py | 3 +- .../src/datahub/secret/__init__.py | 0 .../datahub/secret/datahub_secret_store.py | 66 +++++++++++++++++++ .../datahub/secret/datahub_secrets_client.py | 45 +++++++++++++ .../src/datahub/secret/secret_common.py | 59 +++++++++++++++++ .../src/datahub/secret/secret_store.py | 43 ++++++++++++ 7 files changed, 244 insertions(+), 20 deletions(-) create mode 100644 metadata-ingestion/src/datahub/secret/__init__.py create mode 100644 metadata-ingestion/src/datahub/secret/datahub_secret_store.py create mode 100644 metadata-ingestion/src/datahub/secret/datahub_secrets_client.py create mode 100644 metadata-ingestion/src/datahub/secret/secret_common.py create mode 100644 metadata-ingestion/src/datahub/secret/secret_store.py diff --git a/metadata-ingestion/src/datahub/configuration/config_loader.py b/metadata-ingestion/src/datahub/configuration/config_loader.py index 2f41af6f7286e6..4266bac0c79abd 100644 --- a/metadata-ingestion/src/datahub/configuration/config_loader.py +++ b/metadata-ingestion/src/datahub/configuration/config_loader.py @@ -1,56 +1,59 @@ import io +import os import pathlib import re import sys import tempfile import unittest.mock -from typing import Any, Dict, Set, Union +from typing import Any, Dict, Mapping, Optional, Set, Union from urllib import parse import requests -from expandvars import UnboundVariable, expandvars +from expandvars import UnboundVariable, expand from datahub.configuration.common import ConfigurationError, ConfigurationMechanism from datahub.configuration.json_loader import JsonConfigurationMechanism from datahub.configuration.toml import TomlConfigurationMechanism from datahub.configuration.yaml import YamlConfigurationMechanism +Environ = Mapping[str, str] -def _resolve_element(element: str) -> str: + +def _resolve_element(element: str, environ: Environ) -> str: if re.search(r"(\$\{).+(\})", element): - return expandvars(element, nounset=True) + return expand(element, nounset=True, environ=environ) elif element.startswith("$"): try: - return expandvars(element, nounset=True) + return expand(element, nounset=True, environ=environ) except UnboundVariable: return element else: return element -def _resolve_list(ele_list: list) -> list: +def _resolve_list(ele_list: list, environ: Environ) -> list: new_v: list = [] for ele in ele_list: if isinstance(ele, str): - new_v.append(_resolve_element(ele)) + new_v.append(_resolve_element(ele, environ=environ)) elif isinstance(ele, list): - new_v.append(_resolve_list(ele)) + new_v.append(_resolve_list(ele, environ=environ)) elif isinstance(ele, dict): - new_v.append(resolve_env_variables(ele)) + new_v.append(resolve_env_variables(ele, environ=environ)) else: new_v.append(ele) return new_v -def resolve_env_variables(config: dict) -> dict: +def resolve_env_variables(config: dict, environ: Environ) -> dict: new_dict: Dict[Any, Any] = {} for k, v in config.items(): if isinstance(v, dict): - new_dict[k] = resolve_env_variables(v) + new_dict[k] = resolve_env_variables(v, environ=environ) elif isinstance(v, list): - new_dict[k] = _resolve_list(v) + new_dict[k] = _resolve_list(v, environ=environ) elif isinstance(v, str): - new_dict[k] = _resolve_element(v) + new_dict[k] = _resolve_element(v, environ=environ) else: new_dict[k] = v return new_dict @@ -60,13 +63,20 @@ def list_referenced_env_variables(config: dict) -> Set[str]: # This is a bit of a hack, but expandvars does a bunch of escaping # and other logic that we don't want to duplicate here. - with unittest.mock.patch("expandvars.getenv") as mock_getenv: - mock_getenv.return_value = "mocked_value" + vars = set() + + def mock_get_env(key: str, default: Optional[str] = None) -> str: + vars.add(key) + if default is not None: + return default + return "mocked_value" + + mock = unittest.mock.MagicMock() + mock.get.side_effect = mock_get_env - resolve_env_variables(config) + resolve_env_variables(config, environ=mock) - calls = mock_getenv.mock_calls - return set([call[1][0] for call in calls]) + return vars WRITE_TO_FILE_DIRECTIVE_PREFIX = "__DATAHUB_TO_FILE_" @@ -147,7 +157,7 @@ def load_config_file( config = raw_config.copy() if resolve_env_vars: - config = resolve_env_variables(config) + config = resolve_env_variables(config, environ=os.environ) if process_directives: config = _process_directives(config) diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py index f22f94c9e93514..c0f6add6df0063 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py @@ -1,5 +1,6 @@ import datetime import logging +import os import uuid from typing import Any, Dict, List, Optional @@ -112,7 +113,7 @@ def default_sink_is_datahub_rest(cls, values: Dict[str, Any]) -> Any: } # resolve env variables if present default_sink_config = config_loader.resolve_env_variables( - default_sink_config + default_sink_config, environ=os.environ ) values["sink"] = default_sink_config diff --git a/metadata-ingestion/src/datahub/secret/__init__.py b/metadata-ingestion/src/datahub/secret/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/src/datahub/secret/datahub_secret_store.py b/metadata-ingestion/src/datahub/secret/datahub_secret_store.py new file mode 100644 index 00000000000000..8301ff2d9dc1a6 --- /dev/null +++ b/metadata-ingestion/src/datahub/secret/datahub_secret_store.py @@ -0,0 +1,66 @@ +import logging +from typing import Any, Dict, List, Optional, Union + +from pydantic import BaseModel, validator + +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph +from datahub.secret.datahub_secrets_client import DataHubSecretsClient +from datahub.secret.secret_store import SecretStore + +logger = logging.getLogger(__name__) + + +class DataHubSecretStoreConfig(BaseModel): + graph_client: Optional[DataHubGraph] = None + graph_client_config: Optional[DatahubClientConfig] = None + + class Config: + arbitrary_types_allowed = True + + @validator("graph_client") + def check_graph_connection(cls, v: DataHubGraph) -> DataHubGraph: + if v is not None: + v.test_connection() + return v + + +# An implementation of SecretStore that fetches secrets from DataHub +class DataHubSecretStore(SecretStore): + # Client for fetching secrets from DataHub GraphQL API + client: DataHubSecretsClient + + def __init__(self, config: DataHubSecretStoreConfig): + # Attempt to establish an outbound connection to DataHub and create a client. + if config.graph_client is not None: + self.client = DataHubSecretsClient(graph=config.graph_client) + elif config.graph_client_config is not None: + graph = DataHubGraph(config.graph_client_config) + self.client = DataHubSecretsClient(graph) + else: + raise Exception( + "Invalid configuration provided: unable to construct DataHub Graph Client." + ) + + def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]: + # Fetch the secret from DataHub, using the credentials provided in the configuration. + # Use the GraphQL API. + try: + return self.client.get_secret_values(secret_names) + except Exception: + # Failed to resolve secrets, return empty. + logger.exception( + f"Caught exception while attempting to fetch secrets from DataHub. Secret names: {secret_names}" + ) + return {} + + def get_secret_value(self, secret_name: str) -> Union[str, None]: + secret_value_dict = self.get_secret_values([secret_name]) + return secret_value_dict.get(secret_name) + + def get_id(self) -> str: + return "datahub" + + @classmethod + def create(cls, config: Any) -> "DataHubSecretStore": + config = DataHubSecretStoreConfig.parse_obj(config) + return cls(config) diff --git a/metadata-ingestion/src/datahub/secret/datahub_secrets_client.py b/metadata-ingestion/src/datahub/secret/datahub_secrets_client.py new file mode 100644 index 00000000000000..c60aeff5db2f3f --- /dev/null +++ b/metadata-ingestion/src/datahub/secret/datahub_secrets_client.py @@ -0,0 +1,45 @@ +from typing import Dict, List, Optional + +from datahub.ingestion.graph.client import DataHubGraph + + +class DataHubSecretsClient: + """Class used to fetch secrets from DataHub.""" + + graph: DataHubGraph + + def __init__(self, graph: DataHubGraph): + self.graph = graph + + def get_secret_values(self, secret_names: List[str]) -> Dict[str, Optional[str]]: + if len(secret_names) == 0: + return {} + + request_json = { + "query": """query getSecretValues($input: GetSecretValuesInput!) {\n + getSecretValues(input: $input) {\n + name\n + value\n + }\n + }""", + "variables": {"input": {"secrets": secret_names}}, + } + # TODO: Use graph.execute_graphql() instead. + + # Fetch secrets using GraphQL API f + response = self.graph._session.post( + f"{self.graph.config.server}/api/graphql", json=request_json + ) + response.raise_for_status() + + # Verify response + res_data = response.json() + if "errors" in res_data: + raise Exception("Failed to retrieve secrets from DataHub.") + + # Convert list of name, value secret pairs into a dict and return + secret_value_list = res_data["data"]["getSecretValues"] + secret_value_dict = dict() + for secret_value in secret_value_list: + secret_value_dict[secret_value["name"]] = secret_value["value"] + return secret_value_dict diff --git a/metadata-ingestion/src/datahub/secret/secret_common.py b/metadata-ingestion/src/datahub/secret/secret_common.py new file mode 100644 index 00000000000000..2f7a584d875380 --- /dev/null +++ b/metadata-ingestion/src/datahub/secret/secret_common.py @@ -0,0 +1,59 @@ +import json +import logging +from typing import List + +from datahub.configuration.config_loader import ( + list_referenced_env_variables, + resolve_env_variables, +) +from datahub.secret.secret_store import SecretStore + +logger = logging.getLogger(__name__) + + +def resolve_secrets(secret_names: List[str], secret_stores: List[SecretStore]) -> dict: + # Attempt to resolve secret using by checking each configured secret store. + final_secret_values = dict({}) + + for secret_store in secret_stores: + try: + # Retrieve secret values from the store. + secret_values_dict = secret_store.get_secret_values(secret_names) + # Overlay secret values from each store, if not None. + for secret_name, secret_value in secret_values_dict.items(): + if secret_value is not None: + # HACK: We previously, incorrectly replaced newline characters with + # a r'\n' string. This was a lossy conversion, since we can no longer + # distinguish between a newline character and the literal '\n' in + # the secret value. For now, we assume that all r'\n' strings are + # actually newline characters. This will break if a secret value + # genuinely contains the string r'\n'. + # Once this PR https://github.com/datahub-project/datahub/pull/9484 + # has baked for a while, we should be able to remove this hack. + # TODO: This logic should live in the DataHub secret client/store, + # not the general secret resolution logic. + secret_value = secret_value.replace(r"\n", "\n") + + final_secret_values[secret_name] = secret_value + except Exception: + logger.exception( + f"Failed to fetch secret values from secret store with id {secret_store.get_id()}" + ) + return final_secret_values + + +def resolve_recipe(recipe: str, secret_stores: List[SecretStore]) -> dict: + json_recipe_raw = json.loads(recipe) + + # 1. Extract all secrets needing resolved. + secrets_to_resolve = list_referenced_env_variables(json_recipe_raw) + + # 2. Resolve secret values + secret_values_dict = resolve_secrets(list(secrets_to_resolve), secret_stores) + + # 3. Substitute secrets into recipe file + json_recipe_resolved = resolve_env_variables( + json_recipe_raw, environ=secret_values_dict + ) + + return json_recipe_resolved diff --git a/metadata-ingestion/src/datahub/secret/secret_store.py b/metadata-ingestion/src/datahub/secret/secret_store.py new file mode 100644 index 00000000000000..d6d61d8c3c9241 --- /dev/null +++ b/metadata-ingestion/src/datahub/secret/secret_store.py @@ -0,0 +1,43 @@ +from abc import abstractmethod +from typing import Dict, List, Optional + +from datahub.configuration.common import ConfigModel + + +class SecretStoreConfig(ConfigModel): + type: str + config: Dict + + +class SecretStore: + """ + Abstract base class for a Secret Store, or a class that resolves "secret" values by name. + """ + + @classmethod + @abstractmethod + def create(cls, configs: dict) -> "SecretStore": + pass + + @abstractmethod + def get_secret_values(self, secret_names: List[str]) -> Dict[str, Optional[str]]: + """ + Attempt to fetch a group of secrets, returning a Dictionary of the secret of None if one + cannot be resolved by the store. + """ + + def get_secret_value(self, secret_name: str) -> Optional[str]: + secret_value_dict = self.get_secret_values([secret_name]) + return secret_value_dict.get(secret_name) + + @abstractmethod + def get_id(self) -> str: + """ + Get a unique name or id associated with the Secret Store. + """ + + @abstractmethod + def close(self) -> None: + """ + Wraps up the task + """ From 822d0eb014080fef030cdee84731878787c38c61 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Wed, 3 Jan 2024 15:11:07 -0600 Subject: [PATCH 06/16] feat(patch): add dashboardInfo and chartInfo support for patch (#9536) --- .../registry/SnapshotEntityRegistry.java | 4 + .../template/AspectTemplateEngine.java | 4 +- .../template/chart/ChartInfoTemplate.java | 82 ++++ .../dashboard/DashboardInfoTemplate.java | 105 +++++ .../datajob/DataJobInputOutputTemplate.java | 2 - .../registry/patch/ChartInfoTemplateTest.java | 41 ++ .../patch/DashboardInfoTemplateTest.java | 41 ++ .../UpstreamLineageTemplateTest.java | 2 +- .../src/datahub/specific/chart.py | 316 ++++++++++++++ .../src/datahub/specific/dashboard.py | 410 ++++++++++++++++++ .../src/datahub/specific/datajob.py | 12 +- .../src/datahub/specific/dataproduct.py | 10 +- .../src/datahub/specific/dataset.py | 8 +- .../src/datahub/specific/ownership.py | 2 +- .../golden_dataproduct_out_upsert.json | 2 +- .../unit/patch/complex_dataset_patch.json | 2 +- .../tests/unit/patch/test_patch_builder.py | 47 +- .../patch/chart/ChartInfoPatchBuilder.java | 41 ++ .../client/patch/common/PatchUtil.java | 84 ++++ .../dashboard/DashboardInfoPatchBuilder.java | 103 +++++ .../DataJobInputOutputPatchBuilder.java | 73 +--- .../java/datahub/client/patch/PatchTest.java | 89 ++++ 22 files changed, 1385 insertions(+), 95 deletions(-) create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/chart/ChartInfoTemplate.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/dashboard/DashboardInfoTemplate.java create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/models/registry/patch/ChartInfoTemplateTest.java create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/models/registry/patch/DashboardInfoTemplateTest.java rename entity-registry/src/test/java/com/linkedin/metadata/models/registry/{ => patch}/UpstreamLineageTemplateTest.java (99%) create mode 100644 metadata-ingestion/src/datahub/specific/chart.py create mode 100644 metadata-ingestion/src/datahub/specific/dashboard.py create mode 100644 metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/chart/ChartInfoPatchBuilder.java create mode 100644 metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/common/PatchUtil.java create mode 100644 metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/dashboard/DashboardInfoPatchBuilder.java diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java index cfc2c0901ce0da..bb0113abc9ed6c 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java @@ -12,9 +12,11 @@ import com.linkedin.metadata.models.EventSpec; import com.linkedin.metadata.models.registry.template.AspectTemplateEngine; import com.linkedin.metadata.models.registry.template.Template; +import com.linkedin.metadata.models.registry.template.chart.ChartInfoTemplate; import com.linkedin.metadata.models.registry.template.common.GlobalTagsTemplate; import com.linkedin.metadata.models.registry.template.common.GlossaryTermsTemplate; import com.linkedin.metadata.models.registry.template.common.OwnershipTemplate; +import com.linkedin.metadata.models.registry.template.dashboard.DashboardInfoTemplate; import com.linkedin.metadata.models.registry.template.dataflow.DataFlowInfoTemplate; import com.linkedin.metadata.models.registry.template.datajob.DataJobInfoTemplate; import com.linkedin.metadata.models.registry.template.datajob.DataJobInputOutputTemplate; @@ -79,6 +81,8 @@ private AspectTemplateEngine populateTemplateEngine(Map aspe aspectSpecTemplateMap.put(DATA_JOB_INFO_ASPECT_NAME, new DataJobInfoTemplate()); aspectSpecTemplateMap.put( DATA_PRODUCT_PROPERTIES_ASPECT_NAME, new DataProductPropertiesTemplate()); + aspectSpecTemplateMap.put(CHART_INFO_ASPECT_NAME, new ChartInfoTemplate()); + aspectSpecTemplateMap.put(DASHBOARD_INFO_ASPECT_NAME, new DashboardInfoTemplate()); aspectSpecTemplateMap.put(DATA_JOB_INPUT_OUTPUT_ASPECT_NAME, new DataJobInputOutputTemplate()); return new AspectTemplateEngine(aspectSpecTemplateMap); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/AspectTemplateEngine.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/AspectTemplateEngine.java index 95849a94bae29c..029eb688c5291a 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/AspectTemplateEngine.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/AspectTemplateEngine.java @@ -32,7 +32,9 @@ public class AspectTemplateEngine { DATA_FLOW_INFO_ASPECT_NAME, DATA_JOB_INFO_ASPECT_NAME, DATA_PRODUCT_PROPERTIES_ASPECT_NAME, - DATA_JOB_INPUT_OUTPUT_ASPECT_NAME) + DATA_JOB_INPUT_OUTPUT_ASPECT_NAME, + CHART_INFO_ASPECT_NAME, + DASHBOARD_INFO_ASPECT_NAME) .collect(Collectors.toSet()); private final Map> _aspectTemplateMap; diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/chart/ChartInfoTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/chart/ChartInfoTemplate.java new file mode 100644 index 00000000000000..654f923e7322d0 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/chart/ChartInfoTemplate.java @@ -0,0 +1,82 @@ +package com.linkedin.metadata.models.registry.template.chart; + +import static com.linkedin.metadata.Constants.*; + +import com.fasterxml.jackson.databind.JsonNode; +import com.linkedin.chart.ChartDataSourceTypeArray; +import com.linkedin.chart.ChartInfo; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.ChangeAuditStamps; +import com.linkedin.common.EdgeArray; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.models.registry.template.ArrayMergingTemplate; +import java.util.Collections; +import javax.annotation.Nonnull; + +public class ChartInfoTemplate implements ArrayMergingTemplate { + + private static final String INPUT_EDGES_FIELD_NAME = "inputEdges"; + private static final String INPUTS_FIELD_NAME = "inputs"; + private static final String DESTINATION_URN_FIELD_NAME = "destinationUrn"; + + @Override + public ChartInfo getSubtype(RecordTemplate recordTemplate) throws ClassCastException { + if (recordTemplate instanceof ChartInfo) { + return (ChartInfo) recordTemplate; + } + throw new ClassCastException("Unable to cast RecordTemplate to DataJobInputOutput"); + } + + @Override + public Class getTemplateType() { + return ChartInfo.class; + } + + @Nonnull + @Override + public ChartInfo getDefault() { + ChartInfo chartInfo = new ChartInfo(); + chartInfo.setDescription(""); + chartInfo.setTitle(""); + ChangeAuditStamps changeAuditStamps = new ChangeAuditStamps(); + AuditStamp auditStamp = + new AuditStamp() + .setActor(UrnUtils.getUrn(SYSTEM_ACTOR)) + .setTime(System.currentTimeMillis()); + changeAuditStamps.setCreated(auditStamp).setLastModified(auditStamp); + chartInfo.setLastModified(changeAuditStamps); + chartInfo.setInputEdges(new EdgeArray()); + + // Deprecated fields + chartInfo.setInputs(new ChartDataSourceTypeArray()); + + return chartInfo; + } + + @Nonnull + @Override + public JsonNode transformFields(JsonNode baseNode) { + JsonNode transformedNode = + arrayFieldToMap( + baseNode, + INPUT_EDGES_FIELD_NAME, + Collections.singletonList(DESTINATION_URN_FIELD_NAME)); + + transformedNode = arrayFieldToMap(transformedNode, INPUTS_FIELD_NAME, Collections.emptyList()); + + return transformedNode; + } + + @Nonnull + @Override + public JsonNode rebaseFields(JsonNode patched) { + JsonNode rebasedNode = + transformedMapToArray( + patched, INPUT_EDGES_FIELD_NAME, Collections.singletonList(DESTINATION_URN_FIELD_NAME)); + + rebasedNode = transformedMapToArray(rebasedNode, INPUTS_FIELD_NAME, Collections.emptyList()); + + return rebasedNode; + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/dashboard/DashboardInfoTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/dashboard/DashboardInfoTemplate.java new file mode 100644 index 00000000000000..eae04b5285adf4 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/dashboard/DashboardInfoTemplate.java @@ -0,0 +1,105 @@ +package com.linkedin.metadata.models.registry.template.dashboard; + +import static com.linkedin.metadata.Constants.*; + +import com.fasterxml.jackson.databind.JsonNode; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.ChangeAuditStamps; +import com.linkedin.common.ChartUrnArray; +import com.linkedin.common.EdgeArray; +import com.linkedin.common.UrnArray; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.dashboard.DashboardInfo; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.models.registry.template.ArrayMergingTemplate; +import java.util.Collections; +import javax.annotation.Nonnull; + +public class DashboardInfoTemplate implements ArrayMergingTemplate { + + private static final String CHART_EDGES_FIELD_NAME = "chartEdges"; + private static final String DATASET_EDGES_FIELD_NAME = "datasetEdges"; + private static final String DATASETS_FIELD_NAME = "datasets"; + private static final String CHARTS_FIELD_NAME = "charts"; + private static final String DESTINATION_URN_FIELD_NAME = "destinationUrn"; + + @Override + public DashboardInfo getSubtype(RecordTemplate recordTemplate) throws ClassCastException { + if (recordTemplate instanceof DashboardInfo) { + return (DashboardInfo) recordTemplate; + } + throw new ClassCastException("Unable to cast RecordTemplate to DataJobInputOutput"); + } + + @Override + public Class getTemplateType() { + return DashboardInfo.class; + } + + @Nonnull + @Override + public DashboardInfo getDefault() { + DashboardInfo dashboardInfo = new DashboardInfo(); + dashboardInfo.setTitle(""); + dashboardInfo.setDescription(""); + ChangeAuditStamps changeAuditStamps = new ChangeAuditStamps(); + AuditStamp auditStamp = + new AuditStamp() + .setActor(UrnUtils.getUrn(SYSTEM_ACTOR)) + .setTime(System.currentTimeMillis()); + changeAuditStamps.setCreated(auditStamp).setLastModified(auditStamp); + dashboardInfo.setLastModified(changeAuditStamps); + dashboardInfo.setChartEdges(new EdgeArray()); + dashboardInfo.setDatasetEdges(new EdgeArray()); + + // Deprecated fields + dashboardInfo.setDatasets(new UrnArray()); + dashboardInfo.setCharts(new ChartUrnArray()); + + return dashboardInfo; + } + + @Nonnull + @Override + public JsonNode transformFields(JsonNode baseNode) { + JsonNode transformedNode = + arrayFieldToMap( + baseNode, + CHART_EDGES_FIELD_NAME, + Collections.singletonList(DESTINATION_URN_FIELD_NAME)); + + transformedNode = + arrayFieldToMap( + transformedNode, + DATASET_EDGES_FIELD_NAME, + Collections.singletonList(DESTINATION_URN_FIELD_NAME)); + + transformedNode = + arrayFieldToMap(transformedNode, DATASETS_FIELD_NAME, Collections.emptyList()); + + transformedNode = arrayFieldToMap(transformedNode, CHARTS_FIELD_NAME, Collections.emptyList()); + + return transformedNode; + } + + @Nonnull + @Override + public JsonNode rebaseFields(JsonNode patched) { + JsonNode rebasedNode = + transformedMapToArray( + patched, + DATASET_EDGES_FIELD_NAME, + Collections.singletonList(DESTINATION_URN_FIELD_NAME)); + + rebasedNode = + transformedMapToArray( + rebasedNode, + CHART_EDGES_FIELD_NAME, + Collections.singletonList(DESTINATION_URN_FIELD_NAME)); + + rebasedNode = transformedMapToArray(rebasedNode, DATASETS_FIELD_NAME, Collections.emptyList()); + rebasedNode = transformedMapToArray(rebasedNode, CHARTS_FIELD_NAME, Collections.emptyList()); + + return rebasedNode; + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/datajob/DataJobInputOutputTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/datajob/DataJobInputOutputTemplate.java index 889297734e9777..6761892b1b31b4 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/datajob/DataJobInputOutputTemplate.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/template/datajob/DataJobInputOutputTemplate.java @@ -23,8 +23,6 @@ public class DataJobInputOutputTemplate implements ArrayMergingTemplate patchOperations = new ArrayList<>(); + ObjectNode edgeNode = instance.objectNode(); + edgeNode.put( + "destinationUrn", "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"); + JsonPatchOperation operation = + new AddOperation( + new JsonPointer( + "/inputEdges/urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"), + edgeNode); + patchOperations.add(operation); + JsonPatch patch = new JsonPatch(patchOperations); + ChartInfo result = chartInfoTemplate.applyPatch(dashboardInfo, patch); + + Assert.assertEquals( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"), + result.getInputEdges().get(0).getDestinationUrn()); + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/patch/DashboardInfoTemplateTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/patch/DashboardInfoTemplateTest.java new file mode 100644 index 00000000000000..962ff1d40d873c --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/patch/DashboardInfoTemplateTest.java @@ -0,0 +1,41 @@ +package com.linkedin.metadata.models.registry.patch; + +import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.github.fge.jackson.jsonpointer.JsonPointer; +import com.github.fge.jsonpatch.AddOperation; +import com.github.fge.jsonpatch.JsonPatch; +import com.github.fge.jsonpatch.JsonPatchOperation; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.dashboard.DashboardInfo; +import com.linkedin.metadata.models.registry.template.dashboard.DashboardInfoTemplate; +import java.util.ArrayList; +import java.util.List; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class DashboardInfoTemplateTest { + + @Test + public void testDashboardInfoTemplate() throws Exception { + DashboardInfoTemplate dashboardInfoTemplate = new DashboardInfoTemplate(); + DashboardInfo dashboardInfo = dashboardInfoTemplate.getDefault(); + List patchOperations = new ArrayList<>(); + ObjectNode edgeNode = instance.objectNode(); + edgeNode.put( + "destinationUrn", "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"); + JsonPatchOperation operation = + new AddOperation( + new JsonPointer( + "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"), + edgeNode); + patchOperations.add(operation); + JsonPatch patch = new JsonPatch(patchOperations); + DashboardInfo result = dashboardInfoTemplate.applyPatch(dashboardInfo, patch); + + Assert.assertEquals( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"), + result.getDatasetEdges().get(0).getDestinationUrn()); + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/UpstreamLineageTemplateTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/patch/UpstreamLineageTemplateTest.java similarity index 99% rename from entity-registry/src/test/java/com/linkedin/metadata/models/registry/UpstreamLineageTemplateTest.java rename to entity-registry/src/test/java/com/linkedin/metadata/models/registry/patch/UpstreamLineageTemplateTest.java index 07982a87be56cb..8f410ae8da0857 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/UpstreamLineageTemplateTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/patch/UpstreamLineageTemplateTest.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.models.registry; +package com.linkedin.metadata.models.registry.patch; import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*; diff --git a/metadata-ingestion/src/datahub/specific/chart.py b/metadata-ingestion/src/datahub/specific/chart.py new file mode 100644 index 00000000000000..5dc394e8ebe0fb --- /dev/null +++ b/metadata-ingestion/src/datahub/specific/chart.py @@ -0,0 +1,316 @@ +import time +from typing import Dict, List, Optional, TypeVar, Union +from urllib.parse import quote + +from datahub.emitter.mcp_patch_builder import MetadataPatchProposal +from datahub.metadata.schema_classes import ( + AuditStampClass, + ChartInfoClass as ChartInfo, + EdgeClass as Edge, + GlobalTagsClass as GlobalTags, + GlossaryTermAssociationClass as Term, + GlossaryTermsClass as GlossaryTerms, + KafkaAuditHeaderClass, + OwnerClass as Owner, + OwnershipTypeClass, + SystemMetadataClass, + TagAssociationClass as Tag, +) +from datahub.specific.custom_properties import CustomPropertiesPatchHelper +from datahub.specific.ownership import OwnershipPatchHelper +from datahub.utilities.urns.tag_urn import TagUrn +from datahub.utilities.urns.urn import Urn + +T = TypeVar("T", bound=MetadataPatchProposal) + + +class ChartPatchBuilder(MetadataPatchProposal): + def __init__( + self, + urn: str, + system_metadata: Optional[SystemMetadataClass] = None, + audit_header: Optional[KafkaAuditHeaderClass] = None, + ) -> None: + """ + Initializes a ChartPatchBuilder instance. + + Args: + urn: The URN of the chart + system_metadata: The system metadata of the chart (optional). + audit_header: The Kafka audit header of the chart (optional). + """ + super().__init__( + urn, "chart", system_metadata=system_metadata, audit_header=audit_header + ) + self.custom_properties_patch_helper = CustomPropertiesPatchHelper( + self, ChartInfo.ASPECT_NAME + ) + self.ownership_patch_helper = OwnershipPatchHelper(self) + + def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass: + """ + Creates an AuditStampClass instance with the current timestamp and other default values. + + Args: + message: The message associated with the audit stamp (optional). + + Returns: + An instance of AuditStampClass. + """ + return AuditStampClass( + time=int(time.time() * 1000.0), + actor="urn:li:corpuser:datahub", + message=message, + ) + + def _ensure_urn_type( + self, entity_type: str, edges: List[Edge], context: str + ) -> None: + """ + Ensures that the destination URNs in the given edges have the specified entity type. + + Args: + entity_type: The entity type to check against. + edges: A list of Edge objects. + context: The context or description of the operation. + + Raises: + ValueError: If any of the destination URNs is not of the specified entity type. + """ + for e in edges: + urn = Urn.create_from_string(e.destinationUrn) + if not urn.get_type() == entity_type: + raise ValueError( + f"{context}: {e.destinationUrn} is not of type {entity_type}" + ) + + def add_owner(self, owner: Owner) -> "ChartPatchBuilder": + """ + Adds an owner to the ChartPatchBuilder. + + Args: + owner: The Owner object to add. + + Returns: + The ChartPatchBuilder instance. + """ + self.ownership_patch_helper.add_owner(owner) + return self + + def remove_owner( + self, owner: str, owner_type: Optional[OwnershipTypeClass] = None + ) -> "ChartPatchBuilder": + """ + Removes an owner from the ChartPatchBuilder. + + Args: + owner: The owner to remove. + owner_type: The ownership type of the owner (optional). + + Returns: + The ChartPatchBuilder instance. + + Notes: + `owner_type` is optional. + """ + self.ownership_patch_helper.remove_owner(owner, owner_type) + return self + + def set_owners(self, owners: List[Owner]) -> "ChartPatchBuilder": + """ + Sets the owners of the ChartPatchBuilder. + + Args: + owners: A list of Owner objects. + + Returns: + The ChartPatchBuilder instance. + """ + self.ownership_patch_helper.set_owners(owners) + return self + + def add_input_edge(self, input: Union[Edge, Urn, str]) -> "ChartPatchBuilder": + """ + Adds an input to the ChartPatchBuilder. + + Args: + input: The input, which can be an Edge object, Urn object, or a string. + + Returns: + The ChartPatchBuilder instance. + + Notes: + If `input` is an Edge object, it is used directly. If `input` is a Urn object or string, + it is converted to an Edge object and added with default audit stamps. + """ + if isinstance(input, Edge): + input_urn: str = input.destinationUrn + input_edge: Edge = input + elif isinstance(input, (Urn, str)): + input_urn = str(input) + + input_edge = Edge( + destinationUrn=input_urn, + created=self._mint_auditstamp(), + lastModified=self._mint_auditstamp(), + ) + + self._ensure_urn_type("dataset", [input_edge], "add_dataset") + self._add_patch( + ChartInfo.ASPECT_NAME, + "add", + path=f"/inputEdges/{quote(input_urn, safe='')}", + value=input_urn, + ) + return self + + def remove_input_edge(self, input: Union[str, Urn]) -> "ChartPatchBuilder": + """ + Removes an input from the ChartPatchBuilder. + + Args: + input: The input to remove, specified as a string or Urn object. + + Returns: + The ChartPatchBuilder instance. + """ + self._add_patch( + ChartInfo.ASPECT_NAME, + "remove", + path=f"/inputEdges/{input}", + value={}, + ) + return self + + def set_input_edges(self, inputs: List[Edge]) -> "ChartPatchBuilder": + """ + Sets the input edges for the ChartPatchBuilder. + + Args: + inputs: A list of Edge objects representing the input edges. + + Returns: + The ChartPatchBuilder instance. + + Notes: + This method replaces all existing inputs with the given inputs. + """ + self._add_patch( + ChartInfo.ASPECT_NAME, + "add", + path="/inputEdges", + value=inputs, + ) + return self + + def add_tag(self, tag: Tag) -> "ChartPatchBuilder": + """ + Adds a tag to the ChartPatchBuilder. + + Args: + tag: The Tag object representing the tag to be added. + + Returns: + The ChartPatchBuilder instance. + """ + self._add_patch( + GlobalTags.ASPECT_NAME, "add", path=f"/tags/{tag.tag}", value=tag + ) + return self + + def remove_tag(self, tag: Union[str, Urn]) -> "ChartPatchBuilder": + """ + Removes a tag from the ChartPatchBuilder. + + Args: + tag: The tag to remove, specified as a string or Urn object. + + Returns: + The ChartPatchBuilder instance. + """ + if isinstance(tag, str) and not tag.startswith("urn:li:tag:"): + tag = TagUrn.create_from_id(tag) + self._add_patch(GlobalTags.ASPECT_NAME, "remove", path=f"/tags/{tag}", value={}) + return self + + def add_term(self, term: Term) -> "ChartPatchBuilder": + """ + Adds a glossary term to the ChartPatchBuilder. + + Args: + term: The Term object representing the glossary term to be added. + + Returns: + The ChartPatchBuilder instance. + """ + self._add_patch( + GlossaryTerms.ASPECT_NAME, "add", path=f"/terms/{term.urn}", value=term + ) + return self + + def remove_term(self, term: Union[str, Urn]) -> "ChartPatchBuilder": + """ + Removes a glossary term from the ChartPatchBuilder. + + Args: + term: The term to remove, specified as a string or Urn object. + + Returns: + The ChartPatchBuilder instance. + """ + if isinstance(term, str) and not term.startswith("urn:li:glossaryTerm:"): + term = "urn:li:glossaryTerm:" + term + self._add_patch( + GlossaryTerms.ASPECT_NAME, "remove", path=f"/terms/{term}", value={} + ) + return self + + def set_custom_properties( + self, custom_properties: Dict[str, str] + ) -> "ChartPatchBuilder": + """ + Sets the custom properties for the ChartPatchBuilder. + + Args: + custom_properties: A dictionary containing the custom properties to be set. + + Returns: + The ChartPatchBuilder instance. + + Notes: + This method replaces all existing custom properties with the given dictionary. + """ + self._add_patch( + ChartInfo.ASPECT_NAME, + "add", + path="/customProperties", + value=custom_properties, + ) + return self + + def add_custom_property(self, key: str, value: str) -> "ChartPatchBuilder": + """ + Adds a custom property to the ChartPatchBuilder. + + Args: + key: The key of the custom property. + value: The value of the custom property. + + Returns: + The ChartPatchBuilder instance. + """ + self.custom_properties_patch_helper.add_property(key, value) + return self + + def remove_custom_property(self, key: str) -> "ChartPatchBuilder": + """ + Removes a custom property from the ChartPatchBuilder. + + Args: + key: The key of the custom property to remove. + + Returns: + The ChartPatchBuilder instance. + """ + self.custom_properties_patch_helper.remove_property(key) + return self diff --git a/metadata-ingestion/src/datahub/specific/dashboard.py b/metadata-ingestion/src/datahub/specific/dashboard.py new file mode 100644 index 00000000000000..855dcc5685cea4 --- /dev/null +++ b/metadata-ingestion/src/datahub/specific/dashboard.py @@ -0,0 +1,410 @@ +import time +from typing import Dict, List, Optional, TypeVar, Union +from urllib.parse import quote + +from datahub.emitter.mcp_patch_builder import MetadataPatchProposal +from datahub.metadata.schema_classes import ( + AuditStampClass, + DashboardInfoClass as DashboardInfo, + EdgeClass as Edge, + GlobalTagsClass as GlobalTags, + GlossaryTermAssociationClass as Term, + GlossaryTermsClass as GlossaryTerms, + KafkaAuditHeaderClass, + OwnerClass as Owner, + OwnershipTypeClass, + SystemMetadataClass, + TagAssociationClass as Tag, +) +from datahub.specific.custom_properties import CustomPropertiesPatchHelper +from datahub.specific.ownership import OwnershipPatchHelper +from datahub.utilities.urns.tag_urn import TagUrn +from datahub.utilities.urns.urn import Urn + +T = TypeVar("T", bound=MetadataPatchProposal) + + +class DashboardPatchBuilder(MetadataPatchProposal): + def __init__( + self, + urn: str, + system_metadata: Optional[SystemMetadataClass] = None, + audit_header: Optional[KafkaAuditHeaderClass] = None, + ) -> None: + """ + Initializes a DashboardPatchBuilder instance. + + Args: + urn: The URN of the dashboard + system_metadata: The system metadata of the dashboard (optional). + audit_header: The Kafka audit header of the dashboard (optional). + """ + super().__init__( + urn, "dashboard", system_metadata=system_metadata, audit_header=audit_header + ) + self.custom_properties_patch_helper = CustomPropertiesPatchHelper( + self, DashboardInfo.ASPECT_NAME + ) + self.ownership_patch_helper = OwnershipPatchHelper(self) + + def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass: + """ + Creates an AuditStampClass instance with the current timestamp and other default values. + + Args: + message: The message associated with the audit stamp (optional). + + Returns: + An instance of AuditStampClass. + """ + return AuditStampClass( + time=int(time.time() * 1000.0), + actor="urn:li:corpuser:datahub", + message=message, + ) + + def _ensure_urn_type( + self, entity_type: str, edges: List[Edge], context: str + ) -> None: + """ + Ensures that the destination URNs in the given edges have the specified entity type. + + Args: + entity_type: The entity type to check against. + edges: A list of Edge objects. + context: The context or description of the operation. + + Raises: + ValueError: If any of the destination URNs is not of the specified entity type. + """ + for e in edges: + urn = Urn.create_from_string(e.destinationUrn) + if not urn.get_type() == entity_type: + raise ValueError( + f"{context}: {e.destinationUrn} is not of type {entity_type}" + ) + + def add_owner(self, owner: Owner) -> "DashboardPatchBuilder": + """ + Adds an owner to the DashboardPatchBuilder. + + Args: + owner: The Owner object to add. + + Returns: + The DashboardPatchBuilder instance. + """ + self.ownership_patch_helper.add_owner(owner) + return self + + def remove_owner( + self, owner: str, owner_type: Optional[OwnershipTypeClass] = None + ) -> "DashboardPatchBuilder": + """ + Removes an owner from the DashboardPatchBuilder. + + Args: + owner: The owner to remove. + owner_type: The ownership type of the owner (optional). + + Returns: + The DashboardPatchBuilder instance. + + Notes: + `owner_type` is optional. + """ + self.ownership_patch_helper.remove_owner(owner, owner_type) + return self + + def set_owners(self, owners: List[Owner]) -> "DashboardPatchBuilder": + """ + Sets the owners of the DashboardPatchBuilder. + + Args: + owners: A list of Owner objects. + + Returns: + The DashboardPatchBuilder instance. + """ + self.ownership_patch_helper.set_owners(owners) + return self + + def add_dataset_edge( + self, dataset: Union[Edge, Urn, str] + ) -> "DashboardPatchBuilder": + """ + Adds an dataset to the DashboardPatchBuilder. + + Args: + dataset: The dataset, which can be an Edge object, Urn object, or a string. + + Returns: + The DashboardPatchBuilder instance. + + Raises: + ValueError: If the dataset is not a Dataset urn. + + Notes: + If `dataset` is an Edge object, it is used directly. If `dataset` is a Urn object or string, + it is converted to an Edge object and added with default audit stamps. + """ + if isinstance(dataset, Edge): + dataset_urn: str = dataset.destinationUrn + dataset_edge: Edge = dataset + elif isinstance(dataset, (Urn, str)): + dataset_urn = str(dataset) + if not dataset_urn.startswith("urn:li:dataset:"): + raise ValueError(f"Input {dataset} is not a Dataset urn") + + dataset_edge = Edge( + destinationUrn=dataset_urn, + created=self._mint_auditstamp(), + lastModified=self._mint_auditstamp(), + ) + + self._ensure_urn_type("dataset", [dataset_edge], "add_dataset") + self._add_patch( + DashboardInfo.ASPECT_NAME, + "add", + path=f"/datasetEdges/{quote(dataset_urn, safe='')}", + value=dataset_edge, + ) + return self + + def remove_dataset_edge(self, dataset: Union[str, Urn]) -> "DashboardPatchBuilder": + """ + Removes a dataset edge from the DashboardPatchBuilder. + + Args: + dataset: The dataset to remove, specified as a string or Urn object. + + Returns: + The DashboardPatchBuilder instance. + """ + self._add_patch( + DashboardInfo.ASPECT_NAME, + "remove", + path=f"/datasetEdges/{dataset}", + value={}, + ) + return self + + def set_dataset_edges(self, datasets: List[Edge]) -> "DashboardPatchBuilder": + """ + Sets the dataset edges for the DashboardPatchBuilder. + + Args: + datasets: A list of Edge objects representing the dataset edges. + + Returns: + The DashboardPatchBuilder instance. + + Raises: + ValueError: If any of the input edges are not of type 'Datset'. + + Notes: + This method replaces all existing datasets with the given inputs. + """ + self._ensure_urn_type("dataset", datasets, "dataset edges") + self._add_patch( + DashboardInfo.ASPECT_NAME, + "add", + path="/datasetEdges", + value=datasets, + ) + return self + + def add_chart_edge(self, chart: Union[Edge, Urn, str]) -> "DashboardPatchBuilder": + """ + Adds a chart edge to the DashboardPatchBuilder. + + Args: + chart: The dataset, which can be an Edge object, Urn object, or a string. + + Returns: + The DashboardPatchBuilder instance. + + Raises: + ValueError: If the edge is not a Chart urn. + + Notes: + If `chart` is an Edge object, it is used directly. If `chart` is a Urn object or string, + it is converted to an Edge object and added with default audit stamps. + """ + if isinstance(chart, Edge): + chart_urn: str = chart.destinationUrn + chart_edge: Edge = chart + elif isinstance(chart, (Urn, str)): + chart_urn = str(chart) + if not chart_urn.startswith("urn:li:chart:"): + raise ValueError(f"Input {chart} is not a Chart urn") + + chart_edge = Edge( + destinationUrn=chart_urn, + created=self._mint_auditstamp(), + lastModified=self._mint_auditstamp(), + ) + + self._ensure_urn_type("dataset", [chart_edge], "add_chart_edge") + self._add_patch( + DashboardInfo.ASPECT_NAME, + "add", + path=f"/chartEdges/{quote(chart_urn, safe='')}", + value=chart_edge, + ) + return self + + def remove_chart_edge(self, chart: Union[str, Urn]) -> "DashboardPatchBuilder": + """ + Removes an chart edge from the DashboardPatchBuilder. + + Args: + chart: The chart to remove, specified as a string or Urn object. + + Returns: + The DashboardPatchBuilder instance. + """ + self._add_patch( + DashboardInfo.ASPECT_NAME, + "remove", + path=f"/chartEdges/{chart}", + value={}, + ) + return self + + def set_chart_edges(self, charts: List[Edge]) -> "DashboardPatchBuilder": + """ + Sets the chart edges for the DashboardPatchBuilder. + + Args: + charts: A list of Edge objects representing the chart edges. + + Returns: + The DashboardPatchBuilder instance. + + Raises: + ValueError: If any of the edges are not of type 'chart'. + + Notes: + This method replaces all existing charts with the given charts. + """ + self._ensure_urn_type("chart", charts, "set_charts") + self._add_patch( + DashboardInfo.ASPECT_NAME, + "add", + path="/chartEdges", + value=charts, + ) + return self + + def add_tag(self, tag: Tag) -> "DashboardPatchBuilder": + """ + Adds a tag to the DashboardPatchBuilder. + + Args: + tag: The Tag object representing the tag to be added. + + Returns: + The DashboardPatchBuilder instance. + """ + self._add_patch( + GlobalTags.ASPECT_NAME, "add", path=f"/tags/{tag.tag}", value=tag + ) + return self + + def remove_tag(self, tag: Union[str, Urn]) -> "DashboardPatchBuilder": + """ + Removes a tag from the DashboardPatchBuilder. + + Args: + tag: The tag to remove, specified as a string or Urn object. + + Returns: + The DashboardPatchBuilder instance. + """ + if isinstance(tag, str) and not tag.startswith("urn:li:tag:"): + tag = TagUrn.create_from_id(tag) + self._add_patch(GlobalTags.ASPECT_NAME, "remove", path=f"/tags/{tag}", value={}) + return self + + def add_term(self, term: Term) -> "DashboardPatchBuilder": + """ + Adds a glossary term to the DashboardPatchBuilder. + + Args: + term: The Term object representing the glossary term to be added. + + Returns: + The DashboardPatchBuilder instance. + """ + self._add_patch( + GlossaryTerms.ASPECT_NAME, "add", path=f"/terms/{term.urn}", value=term + ) + return self + + def remove_term(self, term: Union[str, Urn]) -> "DashboardPatchBuilder": + """ + Removes a glossary term from the DashboardPatchBuilder. + + Args: + term: The term to remove, specified as a string or Urn object. + + Returns: + The DashboardPatchBuilder instance. + """ + if isinstance(term, str) and not term.startswith("urn:li:glossaryTerm:"): + term = "urn:li:glossaryTerm:" + term + self._add_patch( + GlossaryTerms.ASPECT_NAME, "remove", path=f"/terms/{term}", value={} + ) + return self + + def set_custom_properties( + self, custom_properties: Dict[str, str] + ) -> "DashboardPatchBuilder": + """ + Sets the custom properties for the DashboardPatchBuilder. + + Args: + custom_properties: A dictionary containing the custom properties to be set. + + Returns: + The DashboardPatchBuilder instance. + + Notes: + This method replaces all existing custom properties with the given dictionary. + """ + self._add_patch( + DashboardInfo.ASPECT_NAME, + "add", + path="/customProperties", + value=custom_properties, + ) + return self + + def add_custom_property(self, key: str, value: str) -> "DashboardPatchBuilder": + """ + Adds a custom property to the DashboardPatchBuilder. + + Args: + key: The key of the custom property. + value: The value of the custom property. + + Returns: + The DashboardPatchBuilder instance. + """ + self.custom_properties_patch_helper.add_property(key, value) + return self + + def remove_custom_property(self, key: str) -> "DashboardPatchBuilder": + """ + Removes a custom property from the DashboardPatchBuilder. + + Args: + key: The key of the custom property to remove. + + Returns: + The DashboardPatchBuilder instance. + """ + self.custom_properties_patch_helper.remove_property(key) + return self diff --git a/metadata-ingestion/src/datahub/specific/datajob.py b/metadata-ingestion/src/datahub/specific/datajob.py index 7ebaee6b918c12..0338a1320c15b8 100644 --- a/metadata-ingestion/src/datahub/specific/datajob.py +++ b/metadata-ingestion/src/datahub/specific/datajob.py @@ -207,7 +207,7 @@ def set_input_datajobs(self, inputs: List[Edge]) -> "DataJobPatchBuilder": self._ensure_urn_type("dataJob", inputs, "input datajobs") self._add_patch( DataJobInputOutput.ASPECT_NAME, - "replace", + "add", path="/inputDatajobEdges", value=inputs, ) @@ -290,7 +290,7 @@ def set_input_datasets(self, inputs: List[Edge]) -> "DataJobPatchBuilder": self._ensure_urn_type("dataset", inputs, "set_input_datasets") self._add_patch( DataJobInputOutput.ASPECT_NAME, - "replace", + "add", path="/inputDatasetEdges", value=inputs, ) @@ -375,7 +375,7 @@ def set_output_datasets(self, outputs: List[Edge]) -> "DataJobPatchBuilder": self._ensure_urn_type("dataset", outputs, "set_output_datasets") self._add_patch( DataJobInputOutput.ASPECT_NAME, - "replace", + "add", path="/outputDatasetEdges", value=outputs, ) @@ -463,7 +463,7 @@ def set_input_dataset_fields(self, inputs: List[Edge]) -> "DataJobPatchBuilder": self._ensure_urn_type("schemaField", inputs, "set_input_dataset_fields") self._add_patch( DataJobInputOutput.ASPECT_NAME, - "replace", + "add", path="/inputDatasetFields", value=inputs, ) @@ -551,7 +551,7 @@ def set_output_dataset_fields(self, outputs: List[Edge]) -> "DataJobPatchBuilder self._ensure_urn_type("schemaField", outputs, "set_output_dataset_fields") self._add_patch( DataJobInputOutput.ASPECT_NAME, - "replace", + "add", path="/outputDatasetFields", value=outputs, ) @@ -636,7 +636,7 @@ def set_custom_properties( """ self._add_patch( DataJobInfo.ASPECT_NAME, - "replace", + "add", path="/customProperties", value=custom_properties, ) diff --git a/metadata-ingestion/src/datahub/specific/dataproduct.py b/metadata-ingestion/src/datahub/specific/dataproduct.py index bb49ac47b3ef86..2c174e0c9a6cbb 100644 --- a/metadata-ingestion/src/datahub/specific/dataproduct.py +++ b/metadata-ingestion/src/datahub/specific/dataproduct.py @@ -85,7 +85,7 @@ def remove_term(self, term: Union[str, Urn]) -> "DataProductPatchBuilder": def set_name(self, name: str) -> "DataProductPatchBuilder": self._add_patch( DataProductProperties.ASPECT_NAME, - "replace", + "add", path="/name", value=name, ) @@ -94,7 +94,7 @@ def set_name(self, name: str) -> "DataProductPatchBuilder": def set_description(self, description: str) -> "DataProductPatchBuilder": self._add_patch( DataProductProperties.ASPECT_NAME, - "replace", + "add", path="/description", value=description, ) @@ -105,7 +105,7 @@ def set_custom_properties( ) -> "DataProductPatchBuilder": self._add_patch( DataProductProperties.ASPECT_NAME, - "replace", + "add", path="/customProperties", value=custom_properties, ) @@ -124,7 +124,7 @@ def set_assets( ) -> "DataProductPatchBuilder": self._add_patch( DataProductProperties.ASPECT_NAME, - "replace", + "add", path="/assets", value=assets, ) @@ -151,7 +151,7 @@ def remove_asset(self, asset_urn: str) -> "DataProductPatchBuilder": def set_external_url(self, external_url: str) -> "DataProductPatchBuilder": self._add_patch( DataProductProperties.ASPECT_NAME, - "replace", + "add", path="/externalUrl", value=external_url, ) diff --git a/metadata-ingestion/src/datahub/specific/dataset.py b/metadata-ingestion/src/datahub/specific/dataset.py index 294a80572669b8..62ee4fc57b61b6 100644 --- a/metadata-ingestion/src/datahub/specific/dataset.py +++ b/metadata-ingestion/src/datahub/specific/dataset.py @@ -143,7 +143,7 @@ def remove_upstream_lineage( def set_upstream_lineages(self, upstreams: List[Upstream]) -> "DatasetPatchBuilder": self._add_patch( - UpstreamLineage.ASPECT_NAME, "replace", path="/upstreams", value=upstreams + UpstreamLineage.ASPECT_NAME, "add", path="/upstreams", value=upstreams ) return self @@ -297,7 +297,7 @@ def set_description( DatasetProperties.ASPECT_NAME if not editable else EditableDatasetProperties.ASPECT_NAME, - "replace", + "add", path="/description", value=description, ) @@ -308,7 +308,7 @@ def set_custom_properties( ) -> "DatasetPatchBuilder": self._add_patch( DatasetProperties.ASPECT_NAME, - "replace", + "add", path="/customProperties", value=custom_properties, ) @@ -326,7 +326,7 @@ def set_display_name(self, display_name: str) -> "DatasetPatchBuilder": if display_name is not None: self._add_patch( DatasetProperties.ASPECT_NAME, - "replace", + "add", path="/name", value=display_name, ) diff --git a/metadata-ingestion/src/datahub/specific/ownership.py b/metadata-ingestion/src/datahub/specific/ownership.py index 334b45a67437f2..c2a3874a3a33f3 100644 --- a/metadata-ingestion/src/datahub/specific/ownership.py +++ b/metadata-ingestion/src/datahub/specific/ownership.py @@ -43,6 +43,6 @@ def remove_owner( def set_owners(self, owners: List[OwnerClass]) -> "OwnershipPatchHelper": self._parent._add_patch( - OwnershipClass.ASPECT_NAME, "replace", path="/owners", value=owners + OwnershipClass.ASPECT_NAME, "add", path="/owners", value=owners ) return self diff --git a/metadata-ingestion/tests/unit/api/entities/dataproducts/golden_dataproduct_out_upsert.json b/metadata-ingestion/tests/unit/api/entities/dataproducts/golden_dataproduct_out_upsert.json index 97c2330f58bc75..66bc2ce0c2a0c0 100644 --- a/metadata-ingestion/tests/unit/api/entities/dataproducts/golden_dataproduct_out_upsert.json +++ b/metadata-ingestion/tests/unit/api/entities/dataproducts/golden_dataproduct_out_upsert.json @@ -5,7 +5,7 @@ "changeType": "PATCH", "aspectName": "dataProductProperties", "aspect": { - "value": "[{\"op\": \"replace\", \"path\": \"/name\", \"value\": \"Pet of the Week Campaign\"}, {\"op\": \"replace\", \"path\": \"/assets\", \"value\": [{\"destinationUrn\": \"urn:li:container:DATABASE\", \"created\": {\"time\": 1681455600000, \"actor\": \"urn:li:corpuser:datahub\", \"message\": \"yaml\"}}, {\"destinationUrn\": \"urn:li:container:SCHEMA\", \"created\": {\"time\": 1681455600000, \"actor\": \"urn:li:corpuser:datahub\", \"message\": \"yaml\"}}, {\"destinationUrn\": \"urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,test_feature_table_all_feature_dtypes)\", \"created\": {\"time\": 1681455600000, \"actor\": \"urn:li:corpuser:datahub\", \"message\": \"yaml\"}}]}, {\"op\": \"replace\", \"path\": \"/customProperties\", \"value\": {\"version\": \"2.0\", \"classification\": \"pii\"}}, {\"op\": \"replace\", \"path\": \"/externalUrl\", \"value\": \"https://github.com/datahub-project/datahub\"}]", + "value": "[{\"op\": \"add\", \"path\": \"/name\", \"value\": \"Pet of the Week Campaign\"}, {\"op\": \"add\", \"path\": \"/assets\", \"value\": [{\"destinationUrn\": \"urn:li:container:DATABASE\", \"created\": {\"time\": 1681455600000, \"actor\": \"urn:li:corpuser:datahub\", \"message\": \"yaml\"}}, {\"destinationUrn\": \"urn:li:container:SCHEMA\", \"created\": {\"time\": 1681455600000, \"actor\": \"urn:li:corpuser:datahub\", \"message\": \"yaml\"}}, {\"destinationUrn\": \"urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,test_feature_table_all_feature_dtypes)\", \"created\": {\"time\": 1681455600000, \"actor\": \"urn:li:corpuser:datahub\", \"message\": \"yaml\"}}]}, {\"op\": \"add\", \"path\": \"/customProperties\", \"value\": {\"version\": \"2.0\", \"classification\": \"pii\"}}, {\"op\": \"add\", \"path\": \"/externalUrl\", \"value\": \"https://github.com/datahub-project/datahub\"}]", "contentType": "application/json-patch+json" } }, diff --git a/metadata-ingestion/tests/unit/patch/complex_dataset_patch.json b/metadata-ingestion/tests/unit/patch/complex_dataset_patch.json index ed5a7723ac2bf1..bcc619a09401e2 100644 --- a/metadata-ingestion/tests/unit/patch/complex_dataset_patch.json +++ b/metadata-ingestion/tests/unit/patch/complex_dataset_patch.json @@ -7,7 +7,7 @@ "aspect": { "json": [ { - "op": "replace", + "op": "add", "path": "/description", "value": "test description" }, diff --git a/metadata-ingestion/tests/unit/patch/test_patch_builder.py b/metadata-ingestion/tests/unit/patch/test_patch_builder.py index f05c4978f8644e..e68f948be8aa0f 100644 --- a/metadata-ingestion/tests/unit/patch/test_patch_builder.py +++ b/metadata-ingestion/tests/unit/patch/test_patch_builder.py @@ -3,7 +3,12 @@ import pytest -from datahub.emitter.mce_builder import make_dataset_urn, make_tag_urn +from datahub.emitter.mce_builder import ( + make_chart_urn, + make_dashboard_urn, + make_dataset_urn, + make_tag_urn, +) from datahub.ingestion.sink.file import write_metadata_file from datahub.metadata.schema_classes import ( DatasetLineageTypeClass, @@ -15,6 +20,8 @@ TagAssociationClass, UpstreamClass, ) +from datahub.specific.chart import ChartPatchBuilder +from datahub.specific.dashboard import DashboardPatchBuilder from datahub.specific.dataset import DatasetPatchBuilder @@ -80,3 +87,41 @@ def test_complex_dataset_patch( pytestconfig.rootpath / "tests/unit/patch/complex_dataset_patch.json" ).read_text() ) + + +def test_basic_chart_patch_builder(): + patcher = ChartPatchBuilder( + make_chart_urn(platform="hive", name="fct_users_created") + ).add_tag(TagAssociationClass(tag=make_tag_urn("test_tag"))) + + assert patcher.build() == [ + MetadataChangeProposalClass( + entityType="chart", + entityUrn="urn:li:chart:(hive,fct_users_created)", + changeType="PATCH", + aspectName="globalTags", + aspect=GenericAspectClass( + value=b'[{"op": "add", "path": "/tags/urn:li:tag:test_tag", "value": {"tag": "urn:li:tag:test_tag"}}]', + contentType="application/json-patch+json", + ), + ), + ] + + +def test_basic_dashboard_patch_builder(): + patcher = DashboardPatchBuilder( + make_dashboard_urn(platform="hive", name="fct_users_created") + ).add_tag(TagAssociationClass(tag=make_tag_urn("test_tag"))) + + assert patcher.build() == [ + MetadataChangeProposalClass( + entityType="dashboard", + entityUrn="urn:li:dashboard:(hive,fct_users_created)", + changeType="PATCH", + aspectName="globalTags", + aspect=GenericAspectClass( + value=b'[{"op": "add", "path": "/tags/urn:li:tag:test_tag", "value": {"tag": "urn:li:tag:test_tag"}}]', + contentType="application/json-patch+json", + ), + ), + ] diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/chart/ChartInfoPatchBuilder.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/chart/ChartInfoPatchBuilder.java new file mode 100644 index 00000000000000..0655d2b3eb8eb2 --- /dev/null +++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/chart/ChartInfoPatchBuilder.java @@ -0,0 +1,41 @@ +package datahub.client.patch.chart; + +import static com.linkedin.metadata.Constants.*; +import static datahub.client.patch.common.PatchUtil.*; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.linkedin.common.urn.Urn; +import datahub.client.patch.AbstractMultiFieldPatchBuilder; +import datahub.client.patch.PatchOperationType; +import javax.annotation.Nonnull; +import org.apache.commons.lang3.tuple.ImmutableTriple; + +public class ChartInfoPatchBuilder extends AbstractMultiFieldPatchBuilder { + private static final String INPUT_EDGES_PATH_START = "/inputEdges/"; + + // Simplified with just Urn + public ChartInfoPatchBuilder addInputEdge(@Nonnull Urn urn) { + ObjectNode value = createEdgeValue(urn); + + pathValues.add( + ImmutableTriple.of(PatchOperationType.ADD.getValue(), INPUT_EDGES_PATH_START + urn, value)); + return this; + } + + public ChartInfoPatchBuilder removeInputEdge(@Nonnull Urn urn) { + pathValues.add( + ImmutableTriple.of( + PatchOperationType.REMOVE.getValue(), INPUT_EDGES_PATH_START + urn, null)); + return this; + } + + @Override + protected String getAspectName() { + return CHART_INFO_ASPECT_NAME; + } + + @Override + protected String getEntityType() { + return CHART_ENTITY_NAME; + } +} diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/common/PatchUtil.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/common/PatchUtil.java new file mode 100644 index 00000000000000..69db36c6e038cf --- /dev/null +++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/common/PatchUtil.java @@ -0,0 +1,84 @@ +package datahub.client.patch.common; + +import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*; +import static com.linkedin.metadata.Constants.*; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.linkedin.common.Edge; +import com.linkedin.common.urn.Urn; +import javax.annotation.Nonnull; + +public class PatchUtil { + private PatchUtil() {} + + private static final String TIME_KEY = "time"; + private static final String ACTOR_KEY = "actor"; + private static final String IMPERSONATOR_KEY = "impersonator"; + private static final String MESSAGE_KEY = "message"; + private static final String LAST_MODIFIED_KEY = "lastModified"; + private static final String CREATED_KEY = "created"; + private static final String DESTINATION_URN_KEY = "destinationUrn"; + private static final String SOURCE_URN_KEY = "sourceUrn"; + + private static final String PROPERTIES_KEY = "properties"; + + public static ObjectNode createEdgeValue(@Nonnull Edge edge) { + ObjectNode value = instance.objectNode(); + + ObjectNode created = instance.objectNode(); + if (edge.getCreated() == null) { + created.put(TIME_KEY, System.currentTimeMillis()).put(ACTOR_KEY, UNKNOWN_ACTOR); + } else { + created + .put(TIME_KEY, edge.getCreated().getTime()) + .put(ACTOR_KEY, edge.getCreated().getActor().toString()); + if (edge.getCreated().getImpersonator() != null) { + created.put(IMPERSONATOR_KEY, edge.getCreated().getImpersonator().toString()); + } + if (edge.getCreated().getMessage() != null) { + created.put(MESSAGE_KEY, edge.getCreated().getMessage()); + } + } + value.set(CREATED_KEY, created); + + ObjectNode lastModified = instance.objectNode(); + if (edge.getLastModified() == null) { + lastModified.put(TIME_KEY, System.currentTimeMillis()).put(ACTOR_KEY, UNKNOWN_ACTOR); + } else { + lastModified + .put(TIME_KEY, edge.getLastModified().getTime()) + .put(ACTOR_KEY, edge.getLastModified().getActor().toString()); + if (edge.getLastModified().getImpersonator() != null) { + lastModified.put(IMPERSONATOR_KEY, edge.getLastModified().getImpersonator().toString()); + } + if (edge.getLastModified().getMessage() != null) { + lastModified.put(MESSAGE_KEY, edge.getLastModified().getMessage()); + } + } + value.set(LAST_MODIFIED_KEY, lastModified); + + if (edge.getProperties() != null) { + ObjectNode propertiesNode = instance.objectNode(); + edge.getProperties().forEach((k, v) -> propertiesNode.set(k, instance.textNode(v))); + value.set(PROPERTIES_KEY, propertiesNode); + } + + value.put(DESTINATION_URN_KEY, edge.getDestinationUrn().toString()); + if (edge.getSourceUrn() != null) { + value.put(SOURCE_URN_KEY, edge.getSourceUrn().toString()); + } + + return value; + } + + public static ObjectNode createEdgeValue(@Nonnull Urn urn) { + ObjectNode value = instance.objectNode(); + ObjectNode auditStamp = instance.objectNode(); + auditStamp.put(TIME_KEY, System.currentTimeMillis()).put(ACTOR_KEY, UNKNOWN_ACTOR); + + value.put(DESTINATION_URN_KEY, urn.toString()).set(LAST_MODIFIED_KEY, auditStamp); + value.set(CREATED_KEY, auditStamp); + + return value; + } +} diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/dashboard/DashboardInfoPatchBuilder.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/dashboard/DashboardInfoPatchBuilder.java new file mode 100644 index 00000000000000..cadde582f1c64a --- /dev/null +++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/dashboard/DashboardInfoPatchBuilder.java @@ -0,0 +1,103 @@ +package datahub.client.patch.dashboard; + +import static com.linkedin.metadata.Constants.*; +import static datahub.client.patch.common.PatchUtil.*; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.linkedin.common.Edge; +import com.linkedin.common.urn.ChartUrn; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import datahub.client.patch.AbstractMultiFieldPatchBuilder; +import datahub.client.patch.PatchOperationType; +import javax.annotation.Nonnull; +import org.apache.commons.lang3.tuple.ImmutableTriple; + +public class DashboardInfoPatchBuilder + extends AbstractMultiFieldPatchBuilder { + private static final String CHART_EDGES_PATH_START = "/chartEdges/"; + private static final String DATASET_EDGES_PATH_START = "/datasetEdges/"; + + // Simplified with just Urn + public DashboardInfoPatchBuilder addChartEdge(@Nonnull ChartUrn urn) { + ObjectNode value = createEdgeValue(urn); + + pathValues.add( + ImmutableTriple.of(PatchOperationType.ADD.getValue(), CHART_EDGES_PATH_START + urn, value)); + return this; + } + + public DashboardInfoPatchBuilder removeChartEdge(@Nonnull ChartUrn urn) { + pathValues.add( + ImmutableTriple.of( + PatchOperationType.REMOVE.getValue(), CHART_EDGES_PATH_START + urn, null)); + return this; + } + + public DashboardInfoPatchBuilder addDatasetEdge(@Nonnull DatasetUrn urn) { + ObjectNode value = createEdgeValue(urn); + + pathValues.add( + ImmutableTriple.of( + PatchOperationType.ADD.getValue(), DATASET_EDGES_PATH_START + urn, value)); + return this; + } + + public DashboardInfoPatchBuilder removeDatasetEdge(@Nonnull DatasetUrn urn) { + pathValues.add( + ImmutableTriple.of( + PatchOperationType.REMOVE.getValue(), DATASET_EDGES_PATH_START + urn, null)); + return this; + } + + // Full Edge modification + public DashboardInfoPatchBuilder addEdge(@Nonnull Edge edge) { + ObjectNode value = createEdgeValue(edge); + String path = getEdgePath(edge); + + pathValues.add(ImmutableTriple.of(PatchOperationType.ADD.getValue(), path, value)); + return this; + } + + public DashboardInfoPatchBuilder removeEdge(@Nonnull Edge edge) { + String path = getEdgePath(edge); + + pathValues.add(ImmutableTriple.of(PatchOperationType.REMOVE.getValue(), path, null)); + return this; + } + + /** + * Determines Edge path based on supplied Urn, if not a valid entity type throws + * IllegalArgumentException + * + * @param edge + * @return + * @throws IllegalArgumentException if destinationUrn is an invalid entity type + */ + private String getEdgePath(@Nonnull Edge edge) { + Urn destinationUrn = edge.getDestinationUrn(); + + if (DATASET_ENTITY_NAME.equals(destinationUrn.getEntityType())) { + return DATASET_EDGES_PATH_START + destinationUrn; + } + + if (CHART_ENTITY_NAME.equals(destinationUrn.getEntityType())) { + return CHART_EDGES_PATH_START + destinationUrn; + } + + // TODO: Output Data Jobs not supported by aspect, add here if this changes + + throw new IllegalArgumentException( + String.format("Unsupported entity type: %s", destinationUrn.getEntityType())); + } + + @Override + protected String getAspectName() { + return DASHBOARD_INFO_ASPECT_NAME; + } + + @Override + protected String getEntityType() { + return DASHBOARD_ENTITY_NAME; + } +} diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/datajob/DataJobInputOutputPatchBuilder.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/datajob/DataJobInputOutputPatchBuilder.java index 0fb0454533fc06..fc250daffe916d 100644 --- a/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/datajob/DataJobInputOutputPatchBuilder.java +++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/patch/datajob/DataJobInputOutputPatchBuilder.java @@ -2,6 +2,7 @@ import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*; import static com.linkedin.metadata.Constants.*; +import static datahub.client.patch.common.PatchUtil.*; import com.fasterxml.jackson.databind.node.ObjectNode; import com.fasterxml.jackson.databind.node.TextNode; @@ -20,21 +21,9 @@ public class DataJobInputOutputPatchBuilder private static final String INPUT_DATA_JOB_EDGES_PATH_START = "/inputDatajobEdges/"; private static final String INPUT_DATASET_EDGES_PATH_START = "/inputDatasetEdges/"; private static final String OUTPUT_DATASET_EDGES_PATH_START = "/outputDatasetEdges/"; - - private static final String DESTINATION_URN_KEY = "destinationUrn"; - private static final String SOURCE_URN_KEY = "sourceUrn"; - private static final String LAST_MODIFIED_KEY = "lastModified"; - private static final String CREATED_KEY = "created"; - private static final String PROPERTIES_KEY = "properties"; - private static final String INPUT_DATASET_FIELDS_PATH_START = "/inputDatasetFields/"; private static final String OUTPUT_DATASET_FIELDS_PATH_START = "/outputDatasetFields/"; - private static final String TIME_KEY = "time"; - private static final String ACTOR_KEY = "actor"; - private static final String IMPERSONATOR_KEY = "impersonator"; - private static final String MESSAGE_KEY = "message"; - // Simplified with just Urn public DataJobInputOutputPatchBuilder addInputDatajobEdge(@Nonnull DataJobUrn dataJobUrn) { ObjectNode value = createEdgeValue(dataJobUrn); @@ -144,66 +133,6 @@ public DataJobInputOutputPatchBuilder removeEdge( return this; } - private ObjectNode createEdgeValue(@Nonnull Urn urn) { - ObjectNode value = instance.objectNode(); - ObjectNode auditStamp = instance.objectNode(); - auditStamp.put(TIME_KEY, System.currentTimeMillis()).put(ACTOR_KEY, UNKNOWN_ACTOR); - - value.put(DESTINATION_URN_KEY, urn.toString()).set(LAST_MODIFIED_KEY, auditStamp); - value.set(CREATED_KEY, auditStamp); - - return value; - } - - private ObjectNode createEdgeValue(@Nonnull Edge edge) { - ObjectNode value = instance.objectNode(); - - ObjectNode created = instance.objectNode(); - if (edge.getCreated() == null) { - created.put(TIME_KEY, System.currentTimeMillis()).put(ACTOR_KEY, UNKNOWN_ACTOR); - } else { - created - .put(TIME_KEY, edge.getCreated().getTime()) - .put(ACTOR_KEY, edge.getCreated().getActor().toString()); - if (edge.getCreated().getImpersonator() != null) { - created.put(IMPERSONATOR_KEY, edge.getCreated().getImpersonator().toString()); - } - if (edge.getCreated().getMessage() != null) { - created.put(MESSAGE_KEY, edge.getCreated().getMessage()); - } - } - value.set(CREATED_KEY, created); - - ObjectNode lastModified = instance.objectNode(); - if (edge.getLastModified() == null) { - lastModified.put(TIME_KEY, System.currentTimeMillis()).put(ACTOR_KEY, UNKNOWN_ACTOR); - } else { - lastModified - .put(TIME_KEY, edge.getLastModified().getTime()) - .put(ACTOR_KEY, edge.getLastModified().getActor().toString()); - if (edge.getLastModified().getImpersonator() != null) { - lastModified.put(IMPERSONATOR_KEY, edge.getLastModified().getImpersonator().toString()); - } - if (edge.getLastModified().getMessage() != null) { - lastModified.put(MESSAGE_KEY, edge.getLastModified().getMessage()); - } - } - value.set(LAST_MODIFIED_KEY, lastModified); - - if (edge.getProperties() != null) { - ObjectNode propertiesNode = instance.objectNode(); - edge.getProperties().forEach((k, v) -> propertiesNode.set(k, instance.textNode(v))); - value.set(PROPERTIES_KEY, propertiesNode); - } - - value.put(DESTINATION_URN_KEY, edge.getDestinationUrn().toString()); - if (edge.getSourceUrn() != null) { - value.put(SOURCE_URN_KEY, edge.getSourceUrn().toString()); - } - - return value; - } - /** * Determines Edge path based on supplied Urn, if not a valid entity type throws * IllegalArgumentException diff --git a/metadata-integration/java/datahub-client/src/test/java/datahub/client/patch/PatchTest.java b/metadata-integration/java/datahub-client/src/test/java/datahub/client/patch/PatchTest.java index 563742990f5468..5bd10245899e41 100644 --- a/metadata-integration/java/datahub-client/src/test/java/datahub/client/patch/PatchTest.java +++ b/metadata-integration/java/datahub-client/src/test/java/datahub/client/patch/PatchTest.java @@ -8,6 +8,7 @@ import com.linkedin.common.GlossaryTermAssociation; import com.linkedin.common.OwnershipType; import com.linkedin.common.TagAssociation; +import com.linkedin.common.urn.ChartUrn; import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.DataJobUrn; import com.linkedin.common.urn.DataPlatformUrn; @@ -22,7 +23,9 @@ import datahub.client.MetadataWriteResponse; import datahub.client.file.FileEmitter; import datahub.client.file.FileEmitterConfig; +import datahub.client.patch.chart.ChartInfoPatchBuilder; import datahub.client.patch.common.OwnershipPatchBuilder; +import datahub.client.patch.dashboard.DashboardInfoPatchBuilder; import datahub.client.patch.dataflow.DataFlowInfoPatchBuilder; import datahub.client.patch.datajob.DataJobInfoPatchBuilder; import datahub.client.patch.datajob.DataJobInputOutputPatchBuilder; @@ -551,4 +554,90 @@ public void testLocalDataJobInputAddEdge() { System.out.println(Arrays.asList(e.getStackTrace())); } } + + @Test + @Ignore + public void testLocalChartInfoAdd() { + RestEmitter restEmitter = new RestEmitter(RestEmitterConfig.builder().build()); + try { + MetadataChangeProposal chartInfoPatch = + new ChartInfoPatchBuilder() + .urn(UrnUtils.getUrn("urn:li:chart:(dashboardTool,chartId)")) + .addInputEdge( + DatasetUrn.createFromString( + "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleHiveDataset,PROD)")) + .build(); + Future response = restEmitter.emit(chartInfoPatch); + + System.out.println(response.get().getResponseContent()); + + } catch (URISyntaxException | IOException | ExecutionException | InterruptedException e) { + System.out.println(Arrays.asList(e.getStackTrace())); + } + } + + @Test + @Ignore + public void testLocalChartInfoRemove() { + RestEmitter restEmitter = new RestEmitter(RestEmitterConfig.builder().build()); + try { + MetadataChangeProposal chartInfoPatch = + new ChartInfoPatchBuilder() + .urn(UrnUtils.getUrn("urn:li:chart:(dashboardTool,chartId)")) + .removeInputEdge( + DatasetUrn.createFromString( + "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleHiveDataset,PROD)")) + .build(); + Future response = restEmitter.emit(chartInfoPatch); + + System.out.println(response.get().getResponseContent()); + + } catch (URISyntaxException | IOException | ExecutionException | InterruptedException e) { + System.out.println(Arrays.asList(e.getStackTrace())); + } + } + + @Test + @Ignore + public void testLocalDashboardInfoAdd() { + RestEmitter restEmitter = new RestEmitter(RestEmitterConfig.builder().build()); + try { + MetadataChangeProposal dashboardInfoPatch = + new DashboardInfoPatchBuilder() + .urn(UrnUtils.getUrn("urn:li:dashboard:(dashboardTool,dashboardId)")) + .addDatasetEdge( + DatasetUrn.createFromString( + "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleHiveDataset,PROD)")) + .addChartEdge(ChartUrn.createFromString("urn:li:chart:(dashboartTool, chartId)")) + .build(); + Future response = restEmitter.emit(dashboardInfoPatch); + + System.out.println(response.get().getResponseContent()); + + } catch (URISyntaxException | IOException | ExecutionException | InterruptedException e) { + System.out.println(Arrays.asList(e.getStackTrace())); + } + } + + @Test + @Ignore + public void testLocalDashboardInfoRemove() { + RestEmitter restEmitter = new RestEmitter(RestEmitterConfig.builder().build()); + try { + MetadataChangeProposal dashboardInfoPatch = + new DashboardInfoPatchBuilder() + .urn(UrnUtils.getUrn("urn:li:dashboard:(dashboardTool,dashboardId)")) + .removeDatasetEdge( + DatasetUrn.createFromString( + "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleHiveDataset,PROD)")) + .removeChartEdge(ChartUrn.createFromString("urn:li:chart:(dashboardTool, chartId)")) + .build(); + Future response = restEmitter.emit(dashboardInfoPatch); + + System.out.println(response.get().getResponseContent()); + + } catch (URISyntaxException | IOException | ExecutionException | InterruptedException e) { + System.out.println(Arrays.asList(e.getStackTrace())); + } + } } From 296e41dfed325116c2a5661c32ae27790b28aafd Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:58:50 -0600 Subject: [PATCH 07/16] feat(docker): docker compose profiles updates (#9514) Co-authored-by: Harshal Sheth --- docker/build.gradle | 7 ++++++- docker/profiles/README.md | 2 +- docker/profiles/docker-compose.actions.yml | 2 +- docker/profiles/docker-compose.frontend.yml | 4 ++-- docker/profiles/docker-compose.gms.yml | 16 ++++++++-------- .../profiles/docker-compose.prerequisites.yml | 18 +++++++++--------- docs/developers.md | 2 +- docs/how/updating-datahub.md | 3 ++- 8 files changed, 30 insertions(+), 24 deletions(-) diff --git a/docker/build.gradle b/docker/build.gradle index 190202620c382c..189c4959e04429 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -31,6 +31,11 @@ ext { pg_quickstart_modules = quickstart_modules - [':docker:mysql-setup'] + [':docker:postgres-setup'] } +tasks.register('minDockerCompose2.20', Exec) { + executable 'bash' + args '-c', 'echo -e "$(docker compose version --short)\n2.20"|sort --version-sort --check=quiet --reverse' +} + tasks.register('quickstart') {} tasks.register('quickstartSlim') {} tasks.register('quickstartDebug') {} @@ -118,9 +123,9 @@ tasks.getByName('quickstartDebugComposeUp').dependsOn( ) tasks.withType(ComposeUp).configureEach { shouldRunAfter('quickstartNuke') + dependsOn tasks.named("minDockerCompose2.20") } - task debugReload(type: Exec) { def cmd = ['docker compose -p datahub --profile debug'] + compose_args + ['restart'] + debug_reloadable commandLine 'bash', '-c', cmd.join(" ") diff --git a/docker/profiles/README.md b/docker/profiles/README.md index df09f15cd85cee..fb3c9e3c84a7a2 100644 --- a/docker/profiles/README.md +++ b/docker/profiles/README.md @@ -5,7 +5,7 @@ for quickstart use-cases as well as development use-cases. These configurations infrastructure configurations that DataHub can operate on. Requirements: -* Use the profiles requires a modern version of docker. +* Using profiles requires docker compose >= 2.20. * If using the debug/development profiles, you will need to have built the `debug` docker images locally. See the Development Profiles section for more details. ```bash diff --git a/docker/profiles/docker-compose.actions.yml b/docker/profiles/docker-compose.actions.yml index a509a6a67d2705..676a72bae32018 100644 --- a/docker/profiles/docker-compose.actions.yml +++ b/docker/profiles/docker-compose.actions.yml @@ -1,7 +1,7 @@ x-datahub-actions-service: &datahub-actions-service hostname: actions - image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head} + image: ${DATAHUB_ACTIONS_IMAGE:-${DATAHUB_ACTIONS_REPO:-acryldata}/datahub-actions}:${ACTIONS_VERSION:-head} env_file: datahub-actions/env/docker.env environment: ACTIONS_EXTRA_PACKAGES: ${ACTIONS_EXTRA_PACKAGES:-} diff --git a/docker/profiles/docker-compose.frontend.yml b/docker/profiles/docker-compose.frontend.yml index 80cb4e7b4b596d..6e1bbc0be70f5c 100644 --- a/docker/profiles/docker-compose.frontend.yml +++ b/docker/profiles/docker-compose.frontend.yml @@ -1,7 +1,7 @@ x-datahub-frontend-service: &datahub-frontend-service hostname: datahub-frontend-react - image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-frontend-react}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 env_file: datahub-frontend/env/docker.env @@ -12,7 +12,7 @@ x-datahub-frontend-service: &datahub-frontend-service x-datahub-frontend-service-dev: &datahub-frontend-service-dev <<: *datahub-frontend-service - image: linkedin/datahub-frontend-react:debug + image: ${DATAHUB_FRONTEND_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-frontend-react}:debug ports: - ${DATAHUB_MAPPED_FRONTEND_DEBUG_PORT:-5002}:5002 - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 01602c8b906b91..93072a76d4041b 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -54,7 +54,7 @@ x-datahub-dev-telemetry-env: &datahub-dev-telemetry-env ################################# x-datahub-system-update-service: &datahub-system-update-service hostname: datahub-system-update - image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-head} command: - -u - SystemUpdate @@ -67,7 +67,7 @@ x-datahub-system-update-service: &datahub-system-update-service x-datahub-system-update-service-dev: &datahub-system-update-service-dev <<: *datahub-system-update-service - image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:debug + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:debug ports: - ${DATAHUB_MAPPED_UPGRADE_DEBUG_PORT:-5003}:5003 environment: &datahub-system-update-dev-env @@ -85,7 +85,7 @@ x-datahub-system-update-service-dev: &datahub-system-update-service-dev ################################# x-datahub-gms-service: &datahub-gms-service hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-gms}:${DATAHUB_VERSION:-head} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 env_file: datahub-gms/env/docker.env @@ -102,7 +102,7 @@ x-datahub-gms-service: &datahub-gms-service x-datahub-gms-service-dev: &datahub-gms-service-dev <<: *datahub-gms-service - image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:debug + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-gms}:debug ports: - ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001 - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 @@ -128,7 +128,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev ################################# x-datahub-mae-consumer-service: &datahub-mae-consumer-service hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-linkedin/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-mae-consumer}:${DATAHUB_VERSION:-head} ports: - 9091:9091 env_file: datahub-mae-consumer/env/docker.env @@ -137,7 +137,7 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev <<: *datahub-mae-consumer-service - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-linkedin/datahub-mae-consumer}:debug + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-mae-consumer}:debug environment: <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env] volumes: @@ -151,7 +151,7 @@ x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev ################################# x-datahub-mce-consumer-service: &datahub-mce-consumer-service hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-linkedin/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-mce-consumer}:${DATAHUB_VERSION:-head} ports: - 9090:9090 env_file: datahub-mce-consumer/env/docker.env @@ -160,7 +160,7 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-linkedin/datahub-mce-consumer}:debug + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-mce-consumer}:debug environment: <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env] volumes: diff --git a/docker/profiles/docker-compose.prerequisites.yml b/docker/profiles/docker-compose.prerequisites.yml index d90d4a252f9935..232239c6c70d08 100644 --- a/docker/profiles/docker-compose.prerequisites.yml +++ b/docker/profiles/docker-compose.prerequisites.yml @@ -128,7 +128,7 @@ services: container_name: mysql-setup profiles: *mysql-profiles-quickstart hostname: mysql-setup - image: ${DATAHUB_MYSQL_SETUP_IMAGE:-acryldata/datahub-mysql-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MYSQL_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mysql-setup}:${DATAHUB_VERSION:-head} env_file: mysql-setup/env/docker.env depends_on: mysql: @@ -139,7 +139,7 @@ services: <<: *mysql-setup container_name: mysql-setup-dev profiles: *mysql-profiles-dev - image: ${DATAHUB_MYSQL_SETUP_IMAGE:-acryldata/datahub-mysql-setup}:debug + image: ${DATAHUB_MYSQL_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mysql-setup}:debug postgres: container_name: postgres profiles: *postgres-profiles @@ -162,7 +162,7 @@ services: container_name: postgres-setup profiles: *postgres-profiles-quickstart hostname: postgres-setup - image: ${DATAHUB_POSTGRES_SETUP_IMAGE:-acryldata/datahub-postgres-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_POSTGRES_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-postgres-setup}:${DATAHUB_VERSION:-head} env_file: postgres-setup/env/docker.env depends_on: postgres: @@ -173,7 +173,7 @@ services: <<: *postgres-setup container_name: postgres-setup-dev profiles: *postgres-profiles-dev - image: ${DATAHUB_POSTGRES_SETUP_IMAGE:-acryldata/datahub-postgres-setup}:debug + image: ${DATAHUB_POSTGRES_SETUP_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-postgres-setup}:debug cassandra: container_name: cassandra profiles: *cassandra-profiles @@ -267,7 +267,7 @@ services: container_name: kafka-setup profiles: *profiles-quickstart hostname: kafka-setup - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-kafka-setup}:${DATAHUB_VERSION:-head} env_file: kafka-setup/env/docker.env environment: &kafka-setup-env DATAHUB_PRECREATE_TOPICS: ${DATAHUB_PRECREATE_TOPICS:-false} @@ -285,7 +285,7 @@ services: environment: <<: *kafka-setup-env DATAHUB_PRECREATE_TOPICS: ${DATAHUB_PRECREATE_TOPICS:-true} - image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:debug + image: ${DATAHUB_KAFKA_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-kafka-setup}:debug elasticsearch: container_name: elasticsearch profiles: *elasticsearch-profiles @@ -311,7 +311,7 @@ services: - esdata:/usr/share/elasticsearch/data elasticsearch-setup-dev: &elasticsearch-setup-dev container_name: elasticsearch-setup-dev - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:debug + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-elasticsearch-setup}:debug profiles: *elasticsearch-profiles hostname: elasticsearch-setup env_file: elasticsearch-setup/env/docker.env @@ -351,7 +351,7 @@ services: container_name: opensearch-setup profiles: *opensearch-profiles-quickstart hostname: opensearch-setup - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head} environment: <<: *search-datastore-environment USE_AWS_ELASTICSEARCH: ${USE_AWS_ELASTICSEARCH:-true} @@ -365,7 +365,7 @@ services: container_name: opensearch-setup-dev profiles: *opensearch-profiles-dev hostname: opensearch-setup-dev - image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:debug + image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-${DATAHUB_REPO:-linkedin}/datahub-elasticsearch-setup}:debug environment: <<: *search-datastore-environment USE_AWS_ELASTICSEARCH: ${USE_AWS_ELASTICSEARCH:-true} diff --git a/docs/developers.md b/docs/developers.md index fe007a56ddc68f..4e31aceeb43821 100644 --- a/docs/developers.md +++ b/docs/developers.md @@ -9,7 +9,7 @@ title: "Local Development" - [Java 17 JDK](https://openjdk.org/projects/jdk/17/) - [Python 3.10](https://www.python.org/downloads/release/python-3100/) - [Docker](https://www.docker.com/) -- [Docker Compose](https://docs.docker.com/compose/) +- [Docker Compose >=2.20](https://docs.docker.com/compose/) - Docker engine with at least 8GB of memory to run tests. ::: diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 61ad2d623d72a4..fb082bea7d1517 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -8,7 +8,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe - Updating MySQL version for quickstarts to 8.2, may cause quickstart issues for existing instances. - Neo4j 5.x, may require migration from 4.x -- Build now requires JDK17 (Runtime Java 11) +- Build requires JDK17 (Runtime Java 11) +- Build requires Docker Compose > 2.20 ### Potential Downtime From 424057862790b520e6d6e7d9d0a04f52aa46e500 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 3 Jan 2024 17:16:16 -0500 Subject: [PATCH 08/16] feat(ui): switch to vite and vitest (#9451) --- .github/workflows/metadata-io.yml | 3 - .github/workflows/spark-smoke-test.yml | 3 +- build.gradle | 4 + datahub-frontend/build.gradle | 18 - datahub-frontend/conf/routes | 11 +- datahub-web-react/.env | 4 +- datahub-web-react/.eslintrc.js | 3 +- datahub-web-react/build.gradle | 66 +- datahub-web-react/craco.config.js | 75 - datahub-web-react/datahub-frontend.graphql | 389 - datahub-web-react/{public => }/index.html | 14 +- datahub-web-react/package.json | 59 +- .../public/{ => assets}/favicon.ico | Bin .../public/{ => assets}/logo.png | Bin datahub-web-react/public/manifest.json | 2 +- datahub-web-react/src/App.less | 5 +- datahub-web-react/src/App.test.tsx | 15 +- datahub-web-react/src/App.tsx | 35 +- datahub-web-react/src/Mocks.tsx | 12 + datahub-web-react/src/app/Routes.tsx | 4 +- .../src/app/analytics/analytics.ts | 2 +- .../src/app/domain/DomainIcon.tsx | 2 +- .../src/app/entity/dataJob/tabs/RunsTab.tsx | 2 +- .../entity/dataset/profile/OperationsTab.tsx | 2 +- .../dataset/profile/__tests__/Schema.test.tsx | 70 +- .../__tests__/SchemaDescriptionField.test.tsx | 4 +- .../__tests__/PlatformContent.test.tsx | 6 +- .../embed/UpstreamHealth/FailingEntity.tsx | 2 +- .../embed/UpstreamHealth/UpstreamHealth.tsx | 2 +- .../__tests__/DocumentationTab.test.tsx | 8 +- .../editor/__tests__/Editor.test.tsx | 2 +- .../Entity/__tests__/DataJobFlowTab.test.tsx | 6 +- .../entity/user/__tests__/UserHeader.test.tsx | 11 - .../ingest/source/builder/RecipeBuilder.tsx | 8 +- .../source/builder/RecipeForm/FormField.tsx | 8 +- .../source/builder/RecipeForm/RecipeForm.tsx | 8 +- .../RecipeForm/SecretField/SecretField.tsx | 16 +- .../TestConnection/TestConnectionModal.tsx | 2 +- .../app/ingest/source/builder/YamlEditor.tsx | 3 +- .../lineage/__tests__/LineageEdges.test.tsx | 22 +- .../__tests__/LineageEntityView.test.tsx | 2 +- .../lineage/__tests__/LineageTree.test.tsx | 12 +- .../policy/_tests_/policyUtils.test.tsx | 175 +- .../src/app/preview/DefaultPreviewCard.tsx | 4 +- .../__tests__/Recommendations.test.tsx | 1 + .../src/app/search/ToggleSidebarButton.tsx | 4 +- .../__tests__/FilterRendererRegistry.test.tsx | 6 +- .../src/app/search/filters/utils.tsx | 2 +- .../src/app/search/sidebar/EntityLink.tsx | 2 +- .../app/search/sorting/SearchSortSelect.tsx | 2 +- datahub-web-react/src/conf/Global.ts | 1 - .../src/conf/theme/global-variables.less | 26 +- .../src/graphql-mock/createServer.ts | 12 - datahub-web-react/src/graphql-mock/server.ts | 84 - datahub-web-react/src/index.tsx | 3 +- datahub-web-react/src/react-app-env.d.ts | 1 - datahub-web-react/src/setupProxy.js | 37 - datahub-web-react/src/setupTests.ts | 21 +- .../utils/test-utils/TestPageContainer.tsx | 2 +- datahub-web-react/src/vite-env.d.ts | 2 + datahub-web-react/tsconfig.json | 5 +- datahub-web-react/vite.config.ts | 100 + datahub-web-react/yarn.lock | 8860 +++-------------- smoke-test/tests/cypress/package-lock.json | 2031 ---- .../tests/read_only/test_services_up.py | 2 +- 65 files changed, 1905 insertions(+), 10400 deletions(-) delete mode 100644 datahub-web-react/craco.config.js delete mode 100644 datahub-web-react/datahub-frontend.graphql rename datahub-web-react/{public => }/index.html (66%) rename datahub-web-react/public/{ => assets}/favicon.ico (100%) rename datahub-web-react/public/{ => assets}/logo.png (100%) delete mode 100644 datahub-web-react/src/graphql-mock/createServer.ts delete mode 100644 datahub-web-react/src/graphql-mock/server.ts delete mode 100644 datahub-web-react/src/react-app-env.d.ts delete mode 100644 datahub-web-react/src/setupProxy.js create mode 100644 datahub-web-react/src/vite-env.d.ts create mode 100644 datahub-web-react/vite.config.ts delete mode 100644 smoke-test/tests/cypress/package-lock.json diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 96229642244b67..c964352c3e129a 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -40,9 +40,6 @@ jobs: python-version: "3.10" cache: "pip" - name: Gradle build (and test) - # there is some race condition in gradle build, which makes gradle never terminate in ~30% of the runs - # running build first without datahub-web-react:yarnBuild and then with it is 100% stable - # datahub-frontend:unzipAssets depends on datahub-web-react:yarnBuild but gradle does not know about it run: | ./gradlew :metadata-io:test - uses: actions/upload-artifact@v3 diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml index 94692bd3c2336a..bd99905a513d67 100644 --- a/.github/workflows/spark-smoke-test.yml +++ b/.github/workflows/spark-smoke-test.yml @@ -51,8 +51,7 @@ jobs: -x :datahub-web-react:yarnLint \ -x :datahub-web-react:yarnGenerate \ -x :datahub-web-react:yarnInstall \ - -x :datahub-web-react:yarnQuickBuild \ - -x :datahub-web-react:copyAssets \ + -x :datahub-web-react:yarnBuild \ -x :datahub-web-react:distZip \ -x :datahub-web-react:jar - uses: actions/upload-artifact@v3 diff --git a/build.gradle b/build.gradle index bb01a15a7db8d6..4680598165d285 100644 --- a/build.gradle +++ b/build.gradle @@ -325,6 +325,10 @@ subprojects { } plugins.withType(JavaPlugin).configureEach { + if (project.name == 'datahub-web-react') { + return + } + dependencies { implementation externalDependency.annotationApi constraints { diff --git a/datahub-frontend/build.gradle b/datahub-frontend/build.gradle index 437c72e6394ea6..1174c5c5cfd5d1 100644 --- a/datahub-frontend/build.gradle +++ b/datahub-frontend/build.gradle @@ -1,5 +1,4 @@ plugins { - id "io.github.kobylynskyi.graphql.codegen" version "4.1.1" id 'scala' id 'com.palantir.docker' id 'org.gradle.playframework' @@ -39,23 +38,6 @@ artifacts { archives myTar } -graphqlCodegen { - // For options: https://github.com/kobylynskyi/graphql-java-codegen/blob/master/docs/codegen-options.md - graphqlSchemaPaths = ["$projectDir/conf/datahub-frontend.graphql".toString()] - outputDir = new File("$projectDir/app/graphql") - packageName = "generated" - generateApis = true - modelValidationAnnotation = "" - customTypesMapping = [ - Long: "Long", - ] -} - -tasks.withType(Checkstyle) { - exclude "**/generated/**" -} - - /* PLAY UPGRADE NOTE Generates the distribution jars under the expected names. The playFramework plugin only accepts certain name values diff --git a/datahub-frontend/conf/routes b/datahub-frontend/conf/routes index 3102c26497fedd..6b53a2789e7ccc 100644 --- a/datahub-frontend/conf/routes +++ b/datahub-frontend/conf/routes @@ -36,11 +36,14 @@ PUT /openapi/*path c HEAD /openapi/*path controllers.Application.proxy(path: String, request: Request) PATCH /openapi/*path controllers.Application.proxy(path: String, request: Request) -# Map static resources from the /public folder to the /assets URL path -GET /assets/*file controllers.Assets.at(path="/public", file) - # Analytics route POST /track controllers.TrackingController.track(request: Request) -# Wildcard route accepts any routes and delegates to serveAsset which in turn serves the React Bundle +# Known React asset routes +GET /assets/*file controllers.Assets.at(path="/public/assets", file) +GET /node_modules/*file controllers.Assets.at(path="/public/node_modules", file) +GET /manifest.json controllers.Assets.at(path="/public", file="manifest.json") +GET /robots.txt controllers.Assets.at(path="/public", file="robots.txt") + +# Wildcard route accepts any routes and delegates to serveAsset which in turn serves the React Bundle's index.html GET /*path controllers.Application.index(path) diff --git a/datahub-web-react/.env b/datahub-web-react/.env index e5529bbdaa56da..7c02340752104b 100644 --- a/datahub-web-react/.env +++ b/datahub-web-react/.env @@ -1,5 +1,3 @@ -PUBLIC_URL=/assets REACT_APP_THEME_CONFIG=theme_light.config.json SKIP_PREFLIGHT_CHECK=true -BUILD_PATH=build/yarn -REACT_APP_PROXY_TARGET=http://localhost:9002 \ No newline at end of file +REACT_APP_PROXY_TARGET=http://localhost:9002 diff --git a/datahub-web-react/.eslintrc.js b/datahub-web-react/.eslintrc.js index 2806942dd10531..e48dfdb23a4e79 100644 --- a/datahub-web-react/.eslintrc.js +++ b/datahub-web-react/.eslintrc.js @@ -5,7 +5,7 @@ module.exports = { 'airbnb-typescript', 'airbnb/hooks', 'plugin:@typescript-eslint/recommended', - 'plugin:jest/recommended', + 'plugin:vitest/recommended', 'prettier', ], plugins: ['@typescript-eslint'], @@ -46,6 +46,7 @@ module.exports = { argsIgnorePattern: '^_', }, ], + 'vitest/prefer-to-be': 'off', }, settings: { react: { diff --git a/datahub-web-react/build.gradle b/datahub-web-react/build.gradle index 72821d8b97dc0b..c0355b935137a4 100644 --- a/datahub-web-react/build.gradle +++ b/datahub-web-react/build.gradle @@ -19,7 +19,7 @@ node { version = '21.2.0' // Version of Yarn to use. - yarnVersion = '1.22.1' + yarnVersion = '1.22.21' // Base URL for fetching node distributions (set nodeDistBaseUrl if you have a mirror). if (project.hasProperty('nodeDistBaseUrl')) { @@ -44,10 +44,33 @@ node { */ task yarnInstall(type: YarnTask) { args = ['install'] + + // The node_modules directory can contain built artifacts, so + // it's not really safe to cache it. + outputs.cacheIf { false } + + inputs.files( + file('yarn.lock'), + file('package.json'), + ) + outputs.dir('node_modules') } task yarnGenerate(type: YarnTask, dependsOn: yarnInstall) { args = ['run', 'generate'] + + outputs.cacheIf { true } + + inputs.files( + yarnInstall.inputs.files, + file('codegen.yml'), + project.fileTree(dir: "../datahub-graphql-core/src/main/resources/", include: "*.graphql"), + project.fileTree(dir: "src", include: "**/*.graphql"), + ) + + outputs.files( + project.fileTree(dir: "src", include: "**/*.generated.ts"), + ) } task yarnServe(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { @@ -55,7 +78,8 @@ task yarnServe(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { } task yarnTest(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { - args = ['run', 'test', '--watchAll', 'false'] + // Explicitly runs in non-watch mode. + args = ['run', 'test', 'run'] } task yarnLint(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { @@ -68,13 +92,24 @@ task yarnLintFix(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { args = ['run', 'lint-fix'] } -task yarnBuild(type: YarnTask, dependsOn: [yarnInstall, yarnTest, yarnLint]) { - args = ['run', 'build'] -} - -task yarnQuickBuild(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { +task yarnBuild(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { environment = [NODE_OPTIONS: "--max-old-space-size=3072 --openssl-legacy-provider"] args = ['run', 'build'] + + outputs.cacheIf { true } + inputs.files( + file('index.html'), + project.fileTree(dir: "src"), + project.fileTree(dir: "public"), + + yarnInstall.inputs.files, + yarnGenerate.outputs.files, + + file('.env'), + file('vite.config.ts'), + file('tsconfig.json'), + ) + outputs.dir('dist') } task cleanExtraDirs { @@ -82,9 +117,8 @@ task cleanExtraDirs { delete 'dist' delete 'tmp' delete 'just' - delete 'src/types.generated.ts' delete fileTree('../datahub-frontend/public') - delete fileTree(dir: 'src/graphql', include: '*.generated.ts') + delete fileTree(dir: 'src', include: '*.generated.ts') } clean.finalizedBy(cleanExtraDirs) @@ -93,24 +127,16 @@ configurations { } distZip { - dependsOn yarnQuickBuild + dependsOn yarnBuild archiveFileName = "datahub-web-react-${archiveVersion}.${archiveExtension}" from 'dist' } -task copyAssets(dependsOn: distZip) { - doLast { - copy { - from zipTree(distZip.outputs.files.first()) - into "../datahub-frontend/public" - } - } -} - jar { - dependsOn distZip, copyAssets + dependsOn distZip into('public') { from zipTree(distZip.outputs.files.first()) } archiveClassifier = 'assets' } +build.dependsOn jar diff --git a/datahub-web-react/craco.config.js b/datahub-web-react/craco.config.js deleted file mode 100644 index 6ede45902128f5..00000000000000 --- a/datahub-web-react/craco.config.js +++ /dev/null @@ -1,75 +0,0 @@ -/* eslint-disable @typescript-eslint/no-var-requires */ -require('dotenv').config(); -const { whenProd } = require('@craco/craco'); -const CracoAntDesignPlugin = require('craco-antd'); -const path = require('path'); -const CopyWebpackPlugin = require('copy-webpack-plugin'); - -// eslint-disable-next-line import/no-dynamic-require -const themeConfig = require(`./src/conf/theme/${process.env.REACT_APP_THEME_CONFIG}`); - -function addLessPrefixToKeys(styles) { - const output = {}; - Object.keys(styles).forEach((key) => { - output[`@${key}`] = styles[key]; - }); - return output; -} - -module.exports = { - webpack: { - configure: { - optimization: whenProd(() => ({ - splitChunks: { - cacheGroups: { - vendor: { - test: /[\\/]node_modules[\\/]/, - name: 'vendors', - chunks: 'all', - }, - }, - }, - })), - // Webpack 5 no longer automatically pollyfill core Node.js modules - resolve: { fallback: { fs: false } }, - // Ignore Webpack 5's missing source map warnings from node_modules - ignoreWarnings: [{ module: /node_modules/, message: /source-map-loader/ }], - }, - plugins: { - add: [ - // Self host images by copying them to the build directory - new CopyWebpackPlugin({ - patterns: [{ from: 'src/images', to: 'platforms' }], - }), - // Copy monaco-editor files to the build directory - new CopyWebpackPlugin({ - patterns: [ - { from: 'node_modules/monaco-editor/min/vs/', to: 'monaco-editor/vs' }, - { from: 'node_modules/monaco-editor/min-maps/vs/', to: 'monaco-editor/min-maps/vs' }, - ], - }), - ], - }, - }, - plugins: [ - { - plugin: CracoAntDesignPlugin, - options: { - customizeThemeLessPath: path.join(__dirname, 'src/conf/theme/global-variables.less'), - customizeTheme: addLessPrefixToKeys(themeConfig.styles), - }, - }, - ], - jest: { - configure: { - // Use dist files instead of source files - moduleNameMapper: { - '^d3-interpolate-path': `d3-interpolate-path/build/d3-interpolate-path`, - '^d3-(.*)$': `d3-$1/dist/d3-$1`, - '^lib0/((?!dist).*)$': 'lib0/dist/$1.cjs', - '^y-protocols/(.*)$': 'y-protocols/dist/$1.cjs', - '\\.(css|less)$': '/src/__mocks__/styleMock.js', - }, - }, - }, -}; diff --git a/datahub-web-react/datahub-frontend.graphql b/datahub-web-react/datahub-frontend.graphql deleted file mode 100644 index 6df3c387e14fe7..00000000000000 --- a/datahub-web-react/datahub-frontend.graphql +++ /dev/null @@ -1,389 +0,0 @@ -scalar Long - -schema { - query: Query - mutation: Mutation -} - -type Query { - dataset(urn: String!): Dataset - user(urn: String!): CorpUser - search(input: SearchInput!): SearchResults - autoComplete(input: AutoCompleteInput!): AutoCompleteResults - browse(input: BrowseInput!): BrowseResults - browsePaths(input: BrowsePathsInput!): [[String!]!] -} - -type Mutation { - logIn(username: String!, password: String!): CorpUser - updateDataset(input: DatasetUpdateInput!): Dataset -} - -input DatasetUpdateInput { - urn: String! - ownership: OwnershipUpdate -} - -input OwnershipUpdate { - owners: [OwnerUpdate!] -} - -input OwnerUpdate { - # The owner URN, eg urn:li:corpuser:1 - owner: String! - - # The owner role type - type: OwnershipType! -} - -enum OwnershipSourceType { - AUDIT - DATABASE - FILE_SYSTEM - ISSUE_TRACKING_SYSTEM - MANUAL - SERVICE - SOURCE_CONTROL - OTHER -} - -type OwnershipSource { - """ - The type of the source - """ - type: OwnershipSourceType! - - """ - A reference URL for the source - """ - url: String -} - -enum OwnershipType { - """ - A person or group that is in charge of developing the code - """ - DEVELOPER - - """ - A person or group that is owning the data - """ - DATAOWNER - - """ - A person or a group that overseas the operation, e.g. a DBA or SRE. - """ - DELEGATE - - """ - A person, group, or service that produces/generates the data - """ - PRODUCER - - """ - A person, group, or service that consumes the data - """ - CONSUMER - - """ - A person or a group that has direct business interest - """ - STAKEHOLDER -} - -type Owner { - """ - Owner object - """ - owner: CorpUser! - - """ - The type of the ownership - """ - type: OwnershipType - - """ - Source information for the ownership - """ - source: OwnershipSource -} - -type Ownership { - owners: [Owner!] - - lastModified: Long! -} - -enum FabricType { - """ - Designates development fabrics - """ - DEV - - """ - Designates early-integration (staging) fabrics - """ - EI - - """ - Designates production fabrics - """ - PROD - - """ - Designates corporation fabrics - """ - CORP -} - -enum PlatformNativeType { - """ - Table - """ - TABLE - - """ - View - """ - VIEW - - """ - Directory in file system - """ - DIRECTORY - - """ - Stream - """ - STREAM - - """ - Bucket in key value store - """ - BUCKET -} - -type PropertyTuple { - key: String! - value: String -} - -type SubTypes { - typeNames: [String!] -} - -type Dataset { - urn: String! - - platform: String! - - name: String! - - origin: FabricType! - - description: String - - uri: String - - platformNativeType: PlatformNativeType - - tags: [String!]! - - properties: [PropertyTuple!] - - createdTime: Long! - - modifiedTime: Long! - - ownership: Ownership - - subTypes: SubTypes -} - -type CorpUserInfo { - active: Boolean! - - displayName: String - - email: String! - - title: String - - manager: CorpUser - - departmentId: Long - - departmentName: String - - firstName: String - - lastName: String - - fullName: String - - countryCode: String -} - -type CorpUserEditableInfo { - aboutMe: String - - teams: [String!] - - skills: [String!] - - pictureLink: String -} - -type CorpUser { - urn: String! - - username: String! - - info: CorpUserInfo - - editableInfo: CorpUserEditableInfo -} - -type CorpGroup implements Entity { - """ - The unique user URN - """ - urn: String! - - """ - GMS Entity Type - """ - type: EntityType! - - """ - group name e.g. wherehows-dev, ask_metadata - """ - name: String - - """ - Information of the corp group - """ - info: CorpGroupInfo -} - - -type CorpGroupInfo { - """ - email of this group - """ - email: String! - - """ - owners of this group - """ - admins: [String!]! - - """ - List of ldap urn in this group. - """ - members: [String!]! - - """ - List of groups in this group. - """ - groups: [String!]! -} - -enum EntityType { - DATASET - USER - DATA_FLOW - DATA_JOB - CORP_USER - CORP_GROUP -} - -# Search Input -input SearchInput { - type: EntityType! - query: String! - start: Int - count: Int - filters: [FacetFilterInput!] -} - -input FacetFilterInput { - field: String! # Facet Field Name - value: String! # Facet Value -} - -# Search Output -type SearchResults { - start: Int! - count: Int! - total: Int! - elements: [SearchResult!]! - facets: [FacetMetadata!] -} - -union SearchResult = Dataset | CorpUser - -type FacetMetadata { - field: String! - aggregations: [AggregationMetadata!]! -} - -type AggregationMetadata { - value: String! - count: Long! -} - -# Autocomplete Input -input AutoCompleteInput { - type: EntityType! - query: String! - field: String # Field name - limit: Int - filters: [FacetFilterInput!] -} - -# Autocomplete Output -type AutoCompleteResults { - query: String! - suggestions: [String!]! -} - -# Browse Inputs -input BrowseInput { - type: EntityType! - path: [String!] - start: Int - count: Int - filters: [FacetFilterInput!] -} - -# Browse Output -type BrowseResults { - entities: [BrowseResultEntity!]! - start: Int! - count: Int! - total: Int! - metadata: BrowseResultMetadata! -} - -type BrowseResultEntity { - name: String! - urn: String! -} - -type BrowseResultMetadata { - path: [String!] - groups: [BrowseResultGroup!]! - totalNumEntities: Long! -} - -type BrowseResultGroup { - name: String! - count: Long! -} - -# Browse Paths Input -input BrowsePathsInput { - type: EntityType! - urn: String! -} diff --git a/datahub-web-react/public/index.html b/datahub-web-react/index.html similarity index 66% rename from datahub-web-react/public/index.html rename to datahub-web-react/index.html index ead3a0aba82cb9..9490881246e122 100644 --- a/datahub-web-react/public/index.html +++ b/datahub-web-react/index.html @@ -2,7 +2,7 @@ - + @@ -10,21 +10,13 @@ manifest.json provides metadata used when your web app is installed on a user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/ --> - - + DataHub
+