From e58e2bf3be6cf43923ff400667406ee6dc95cd3a Mon Sep 17 00:00:00 2001
From: kushagra-apptware <81357546+kushagra-apptware@users.noreply.github.com>
Date: Mon, 18 Dec 2023 11:02:33 +0530
Subject: [PATCH 01/25] feat: Deprecation 'Note' changed to Markdown Renderable
(#9396)
Setting auto merge after test cases are passed
---
.../EntityDropdown/UpdateDeprecationModal.tsx | 14 +++-
.../components/styled/DeprecationPill.tsx | 82 +++++++++++++++++--
.../tests/cypress/cypress/support/commands.js | 2 +-
3 files changed, 86 insertions(+), 12 deletions(-)
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/UpdateDeprecationModal.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/UpdateDeprecationModal.tsx
index 6ae893e12575f..25527497b33a8 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/UpdateDeprecationModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/UpdateDeprecationModal.tsx
@@ -1,7 +1,10 @@
import React from 'react';
-import { Button, DatePicker, Form, Input, message, Modal } from 'antd';
+import { Button, DatePicker, Form, message, Modal } from 'antd';
+import styled from 'styled-components';
import { useBatchUpdateDeprecationMutation } from '../../../../graphql/mutations.generated';
import { handleBatchError } from '../utils';
+import { Editor } from '../tabs/Documentation/components/editor/Editor';
+import { ANTD_GRAY } from '../constants';
type Props = {
urns: string[];
@@ -9,6 +12,10 @@ type Props = {
refetch?: () => void;
};
+const StyledEditor = styled(Editor)`
+ border: 1px solid ${ANTD_GRAY[4.5]};
+`;
+
export const UpdateDeprecationModal = ({ urns, onClose, refetch }: Props) => {
const [batchUpdateDeprecation] = useBatchUpdateDeprecationMutation();
const [form] = Form.useForm();
@@ -64,10 +71,11 @@ export const UpdateDeprecationModal = ({ urns, onClose, refetch }: Props) => {
>
}
+ width='40%'
>
-
+
+
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx b/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx
index f60a74247ebcc..9ec2aab193aa0 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/DeprecationPill.tsx
@@ -1,4 +1,4 @@
-import React from 'react';
+import React, { useState } from 'react';
import { InfoCircleOutlined } from '@ant-design/icons';
import { Divider, message, Modal, Popover, Tooltip, Typography } from 'antd';
import { blue } from '@ant-design/colors';
@@ -8,6 +8,8 @@ import { Deprecation } from '../../../../../types.generated';
import { getLocaleTimezone } from '../../../../shared/time/timeUtils';
import { ANTD_GRAY } from '../../constants';
import { useBatchUpdateDeprecationMutation } from '../../../../../graphql/mutations.generated';
+import { Editor } from '../../tabs/Documentation/components/editor/Editor';
+import StripMarkdownText, { removeMarkdown } from './StripMarkdownText';
const DeprecatedContainer = styled.div`
height: 18px;
@@ -38,11 +40,6 @@ const DeprecatedTitle = styled(Typography.Text)`
font-weight: bold;
`;
-const DeprecatedSubTitle = styled(Typography.Text)`
- display: block;
- margin-bottom: 5px;
-`;
-
const LastEvaluatedAtLabel = styled.div`
padding: 0;
margin: 0;
@@ -70,15 +67,42 @@ const IconGroup = styled.div`
}
`;
+const DescriptionContainer = styled.div`
+ position: relative;
+ display: flex;
+ flex-direction: column;
+ width: 100%;
+ height: 100%;
+ min-height: 22px;
+ margin-bottom: 14px;
+`;
+const StyledViewer = styled(Editor)`
+ padding-right: 8px;
+ display: block;
+
+ .remirror-editor.ProseMirror {
+ padding: 0;
+ }
+`;
+
+const ExpandedActions = styled.div`
+ height: 10px;
+`;
+const ReadLessText = styled(Typography.Link)`
+ margin-right: 4px;
+`;
type Props = {
urn: string;
deprecation: Deprecation;
refetch?: () => void;
showUndeprecate: boolean | null;
};
+const ABBREVIATED_LIMIT = 80;
export const DeprecationPill = ({ deprecation, urn, refetch, showUndeprecate }: Props) => {
const [batchUpdateDeprecationMutation] = useBatchUpdateDeprecationMutation();
+ const [expanded, setExpanded] = useState(false);
+ const overLimit = deprecation?.note && removeMarkdown(deprecation?.note).length > 80;
/**
* Deprecation Decommission Timestamp
*/
@@ -131,14 +155,56 @@ export const DeprecationPill = ({ deprecation, urn, refetch, showUndeprecate }:
return (
{deprecation?.note !== '' && Deprecation note}
{isDividerNeeded && }
- {deprecation?.note !== '' && {deprecation.note}}
+
+ {expanded || !overLimit ? (
+ <>
+ {
+ deprecation?.note && deprecation?.note !== '' &&
+ <>
+
+
+ {overLimit && (
+ {
+ setExpanded(false);
+ }}
+ >
+ Read Less
+
+ )}
+
+ >
+ }
+ >
+ ) : (
+ <>
+
+ {
+ setExpanded(true);
+ }}
+ >
+ Read More
+
+ >
+ }
+ shouldWrap
+ >
+ {deprecation.note}
+
+ >
+ )}
+
{deprecation?.decommissionTime !== null && (
diff --git a/smoke-test/tests/cypress/cypress/support/commands.js b/smoke-test/tests/cypress/cypress/support/commands.js
index 5e3664f944edf..ffbd050488181 100644
--- a/smoke-test/tests/cypress/cypress/support/commands.js
+++ b/smoke-test/tests/cypress/cypress/support/commands.js
@@ -171,7 +171,7 @@ Cypress.Commands.add("deleteFromDropdown", () => {
Cypress.Commands.add("addViaFormModal", (text, modelHeader) => {
cy.waitTextVisible(modelHeader);
- cy.get(".ant-form-item-control-input-content > input[type='text']").first().type(text);
+ cy.get('.ProseMirror-focused').type(text);
cy.get(".ant-modal-footer > button:nth-child(2)").click();
});
From b4fe451d932315546ebd98623f1572a66c41ad43 Mon Sep 17 00:00:00 2001
From: gaurav2733 <77378510+gaurav2733@users.noreply.github.com>
Date: Mon, 18 Dec 2023 12:38:30 +0530
Subject: [PATCH 02/25] feat : markdown support for group description (#9455)
---
.../group/EditGroupDescriptionModal.tsx | 64 ++++++++
.../src/app/entity/group/GroupInfoSideBar.tsx | 145 ++++++++++++++++--
.../app/identity/group/CreateGroupModal.tsx | 106 +++++++------
.../cypress/e2e/settings/managing_groups.js | 6 +-
4 files changed, 261 insertions(+), 60 deletions(-)
create mode 100644 datahub-web-react/src/app/entity/group/EditGroupDescriptionModal.tsx
diff --git a/datahub-web-react/src/app/entity/group/EditGroupDescriptionModal.tsx b/datahub-web-react/src/app/entity/group/EditGroupDescriptionModal.tsx
new file mode 100644
index 0000000000000..a898a73c254ef
--- /dev/null
+++ b/datahub-web-react/src/app/entity/group/EditGroupDescriptionModal.tsx
@@ -0,0 +1,64 @@
+import React, { useState } from 'react';
+import { Button, Modal, Form } from 'antd';
+import styled from 'styled-components';
+
+import { Editor } from '../shared/tabs/Documentation/components/editor/Editor';
+import { ANTD_GRAY } from '../shared/constants';
+
+type Props = {
+ onClose: () => void;
+ onSaveAboutMe: () => void;
+ setStagedDescription: (des: string) => void;
+ stagedDescription: string | undefined;
+};
+const StyledEditor = styled(Editor)`
+ border: 1px solid ${ANTD_GRAY[4]};
+`;
+
+export default function EditGroupDescriptionModal({
+ onClose,
+ onSaveAboutMe,
+ setStagedDescription,
+ stagedDescription,
+}: Props) {
+ const [form] = Form.useForm();
+ const [aboutText,setAboutText] = useState(stagedDescription)
+
+ function updateDescription(description: string) {
+ setAboutText(aboutText)
+ setStagedDescription(description);
+
+ }
+
+ const saveDescription = () => {
+ onSaveAboutMe();
+ onClose();
+ };
+
+ return (
+
+
+
+ >
+ }
+ >
+
+
+
+
+
+
+
+ );
+}
diff --git a/datahub-web-react/src/app/entity/group/GroupInfoSideBar.tsx b/datahub-web-react/src/app/entity/group/GroupInfoSideBar.tsx
index d9eaed2682ea1..07885a4d0f630 100644
--- a/datahub-web-react/src/app/entity/group/GroupInfoSideBar.tsx
+++ b/datahub-web-react/src/app/entity/group/GroupInfoSideBar.tsx
@@ -16,14 +16,15 @@ import {
EmptyValue,
SocialDetails,
EditButton,
- AboutSection,
- AboutSectionText,
GroupsSection,
+ AboutSection,
} from '../shared/SidebarStyledComponents';
import GroupMembersSideBarSection from './GroupMembersSideBarSection';
import { useUserContext } from '../../context/useUserContext';
-
-const { Paragraph } = Typography;
+import StripMarkdownText, { removeMarkdown } from '../shared/components/styled/StripMarkdownText';
+import { Editor } from '../shared/tabs/Documentation/components/editor/Editor';
+import EditGroupDescriptionModal from './EditGroupDescriptionModal';
+import { REDESIGN_COLORS } from '../shared/constants';
type SideBarData = {
photoUrl: string | undefined;
@@ -80,6 +81,61 @@ const GroupTitle = styled(Typography.Title)`
}
`;
+const EditIcon = styled(EditOutlined)`
+ cursor: pointer;
+ color: ${REDESIGN_COLORS.BLUE};
+`;
+const AddNewDescription = styled(Button)`
+ display: none;
+ margin: -4px;
+ width: 140px;
+`;
+
+const StyledViewer = styled(Editor)`
+ padding-right: 8px;
+ display: block;
+
+ .remirror-editor.ProseMirror {
+ padding: 0;
+ }
+`;
+
+const DescriptionContainer = styled.div`
+ position: relative;
+ display: flex;
+ flex-direction: column;
+ width: 100%;
+ text-align:left;
+ font-weight: normal;
+ font
+ min-height: 22px;
+
+ &:hover ${AddNewDescription} {
+ display: block;
+ }
+ & ins.diff {
+ background-color: #b7eb8f99;
+ text-decoration: none;
+ &:hover {
+ background-color: #b7eb8faa;
+ }
+ }
+ & del.diff {
+ background-color: #ffa39e99;
+ text-decoration: line-through;
+ &: hover {
+ background-color: #ffa39eaa;
+ }
+ }
+`;
+
+const ExpandedActions = styled.div`
+ height: 10px;
+`;
+const ReadLessText = styled(Typography.Link)`
+ margin-right: 4px;
+`;
+
/**
* Responsible for reading & writing users.
*/
@@ -106,7 +162,17 @@ export default function GroupInfoSidebar({ sideBarData, refetch }: Props) {
const me = useUserContext();
const canEditGroup = me?.platformPrivileges?.manageIdentities;
const [groupTitle, setGroupTitle] = useState(name);
+ const [expanded, setExpanded] = useState(false);
+ const [isUpdatingDescription, SetIsUpdatingDescription] = useState(false);
+ const [stagedDescription, setStagedDescription] = useState(aboutText);
+
const [updateName] = useUpdateNameMutation();
+ const overLimit = removeMarkdown(aboutText || '').length > 80;
+ const ABBREVIATED_LIMIT = 80;
+
+ useEffect(() => {
+ setStagedDescription(aboutText);
+ }, [aboutText]);
useEffect(() => {
setGroupTitle(groupTitle);
@@ -136,12 +202,12 @@ export default function GroupInfoSidebar({ sideBarData, refetch }: Props) {
};
// About Text save
- const onSaveAboutMe = (inputString) => {
+ const onSaveAboutMe = () => {
updateCorpGroupPropertiesMutation({
variables: {
urn: urn || '',
input: {
- description: inputString,
+ description: stagedDescription,
},
},
})
@@ -201,16 +267,65 @@ export default function GroupInfoSidebar({ sideBarData, refetch }: Props) {
- {TITLES.about}
-
-
- {aboutText || }
-
-
+
+ {TITLES.about}
+
+ SetIsUpdatingDescription(true)} data-testid="edit-icon" />
+
+
+
+ {(aboutText && expanded) || !overLimit ? (
+ <>
+ {/* Read only viewer for displaying group description */}
+
+
+ {overLimit && (
+ {
+ setExpanded(false);
+ }}
+ >
+ Read Less
+
+ )}
+
+ >
+ ) : (
+ <>
+ {/* Display abbreviated description with option to read more */}
+
+ {
+ setExpanded(true);
+ }}
+ >
+ Read More
+
+ >
+ }
+ shouldWrap
+ >
+ {aboutText}
+
+ >
+ )}
+
+ {/* Modal for updating group description */}
+ {isUpdatingDescription && (
+ {
+ SetIsUpdatingDescription(false);
+ setStagedDescription(aboutText);
+ }}
+ onSaveAboutMe={onSaveAboutMe}
+ setStagedDescription={setStagedDescription}
+ stagedDescription={stagedDescription}
+ />
+ )}
diff --git a/datahub-web-react/src/app/identity/group/CreateGroupModal.tsx b/datahub-web-react/src/app/identity/group/CreateGroupModal.tsx
index 214cb251767c9..4ba714ca23ae0 100644
--- a/datahub-web-react/src/app/identity/group/CreateGroupModal.tsx
+++ b/datahub-web-react/src/app/identity/group/CreateGroupModal.tsx
@@ -1,16 +1,23 @@
-import React, { useState } from 'react';
+import React, { useRef, useState } from 'react';
import { message, Button, Input, Modal, Typography, Form, Collapse } from 'antd';
+import styled from 'styled-components';
import { useCreateGroupMutation } from '../../../graphql/group.generated';
import { useEnterKeyListener } from '../../shared/useEnterKeyListener';
import { validateCustomUrnId } from '../../shared/textUtil';
import analytics, { EventType } from '../../analytics';
import { CorpGroup, EntityType } from '../../../types.generated';
+import { Editor as MarkdownEditor } from '../../entity/shared/tabs/Documentation/components/editor/Editor';
+import { ANTD_GRAY } from '../../entity/shared/constants';
type Props = {
onClose: () => void;
onCreate: (group: CorpGroup) => void;
};
+const StyledEditor = styled(MarkdownEditor)`
+ border: 1px solid ${ANTD_GRAY[4]};
+`;
+
export default function CreateGroupModal({ onClose, onCreate }: Props) {
const [stagedName, setStagedName] = useState('');
const [stagedDescription, setStagedDescription] = useState('');
@@ -19,45 +26,54 @@ export default function CreateGroupModal({ onClose, onCreate }: Props) {
const [createButtonEnabled, setCreateButtonEnabled] = useState(true);
const [form] = Form.useForm();
+ // Reference to the styled editor for handling focus
+ const styledEditorRef = useRef(null);
+
const onCreateGroup = () => {
- createGroupMutation({
- variables: {
- input: {
- id: stagedId,
- name: stagedName,
- description: stagedDescription,
- },
- },
- })
- .then(({ data, errors }) => {
- if (!errors) {
- analytics.event({
- type: EventType.CreateGroupEvent,
- });
- message.success({
- content: `Created group!`,
- duration: 3,
- });
- // TODO: Get a full corp group back from create endpoint.
- onCreate({
- urn: data?.createGroup || '',
- type: EntityType.CorpGroup,
+ // Check if the Enter key was pressed inside the styled editor to prevent unintended form submission
+ const isEditorNewlineKeypress =
+ document.activeElement !== styledEditorRef.current &&
+ !styledEditorRef.current?.contains(document.activeElement);
+ if (isEditorNewlineKeypress) {
+ createGroupMutation({
+ variables: {
+ input: {
+ id: stagedId,
name: stagedName,
- info: {
- description: stagedDescription,
- },
- });
- }
- })
- .catch((e) => {
- message.destroy();
- message.error({ content: `Failed to create group!: \n ${e.message || ''}`, duration: 3 });
+ description: stagedDescription,
+ },
+ },
})
- .finally(() => {
- setStagedName('');
- setStagedDescription('');
- });
- onClose();
+ .then(({ data, errors }) => {
+ if (!errors) {
+ analytics.event({
+ type: EventType.CreateGroupEvent,
+ });
+ message.success({
+ content: `Created group!`,
+ duration: 3,
+ });
+ // TODO: Get a full corp group back from create endpoint.
+ onCreate({
+ urn: data?.createGroup || '',
+ type: EntityType.CorpGroup,
+ name: stagedName,
+ info: {
+ description: stagedDescription,
+ },
+ });
+ }
+ })
+ .catch((e) => {
+ message.destroy();
+ message.error({ content: `Failed to create group!: \n ${e.message || ''}`, duration: 3 });
+ })
+ .finally(() => {
+ setStagedName('');
+ setStagedDescription('');
+ });
+ onClose();
+ }
};
// Handle the Enter press
@@ -65,8 +81,13 @@ export default function CreateGroupModal({ onClose, onCreate }: Props) {
querySelectorToExecuteClick: '#createGroupButton',
});
+ function updateDescription(description: string) {
+ setStagedDescription(description);
+ }
+
return (
Description}>
An optional description for your new group.
-
- setStagedDescription(event.target.value)}
- />
+
+ {/* Styled editor for the group description */}
+
+
+
diff --git a/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js b/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js
index 70219a550cd8b..978a245c3d9e3 100644
--- a/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js
+++ b/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js
@@ -72,8 +72,10 @@ describe("create and manage group", () => {
cy.focused().clear().type(`Test group EDITED ${test_id}{enter}`);
cy.waitTextVisible("Name Updated");
cy.contains(`Test group EDITED ${test_id}`).should("be.visible");
- cy.contains("Test group description").find('[aria-label="edit"]').click();
- cy.focused().type(" EDITED{enter}");
+ cy.get('[data-testid="edit-icon"]').click();
+ cy.waitTextVisible("Edit Description");
+ cy.get("#description").should("be.visible").type(" EDITED");
+ cy.get("#updateGroupButton").click();
cy.waitTextVisible("Changes saved.");
cy.contains("Test group description EDITED").should("be.visible");
cy.clickOptionWithText("Add Owners");
From 9d386fbd6f9a0436b25daa2b4603d1fa0b8f44ee Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Mon, 18 Dec 2023 05:38:16 -0500
Subject: [PATCH 03/25] feat(ingest): enable CLL for dbt by default (#9466)
---
.../ingestion/source/dbt/dbt_common.py | 7 +-
.../ingestion/source/looker/looker_common.py | 2 +-
.../source/looker/looker_lib_wrapper.py | 2 +-
.../dbt_enabled_with_schemas_mces_golden.json | 248 ++++++++++++
.../dbt_test_column_meta_mapping_golden.json | 383 ++++++++++++++++++
...th_complex_owner_patterns_mces_golden.json | 248 ++++++++++++
...th_data_platform_instance_mces_golden.json | 248 ++++++++++++
...h_non_incremental_lineage_mces_golden.json | 248 ++++++++++++
..._target_platform_instance_mces_golden.json | 248 ++++++++++++
9 files changed, 1630 insertions(+), 4 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index af28be310587a..7bec07b40c4bd 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -300,7 +300,7 @@ class DBTCommonConfig(
description="When enabled, schemas will be inferred from the dbt node definition.",
)
include_column_lineage: bool = Field(
- default=False,
+ default=True,
description="When enabled, column-level lineage will be extracted from the dbt node definition. Requires `infer_dbt_schemas` to be enabled. "
"If you run into issues where the column name casing does not match up with properly, providing a datahub_api or using the rest sink will improve accuracy.",
)
@@ -696,7 +696,10 @@ def get_column_type(
@support_status(SupportStatus.CERTIFIED)
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
-@capability(SourceCapability.LINEAGE_FINE, "Enabled using `include_column_lineage`")
+@capability(
+ SourceCapability.LINEAGE_FINE,
+ "Enabled by default, configure using `include_column_lineage`",
+)
class DBTSourceBase(StatefulIngestionSourceBase):
def __init__(self, config: DBTCommonConfig, ctx: PipelineContext, platform: str):
super().__init__(config, ctx)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
index e440750cba0d0..53533a8d27c9b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
@@ -1015,7 +1015,7 @@ def __init__(
self.report = report
self.source_config = source_config
- @lru_cache()
+ @lru_cache(maxsize=200)
def get_explore(self, model: str, explore: str) -> Optional[LookerExplore]:
looker_explore = LookerExplore.from_api(
model,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
index 988caba1c0d74..8959868c27114 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
@@ -114,7 +114,7 @@ def get_available_permissions(self) -> Set[str]:
return permissions
- @lru_cache(maxsize=2000)
+ @lru_cache(maxsize=1000)
def get_user(self, id_: str, user_fields: str) -> Optional[User]:
self.client_stats.user_calls += 1
try:
diff --git a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json
index e4f01ef7a6c53..4deb725ed2b44 100644
--- a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json
+++ b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json
@@ -247,6 +247,86 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),full_name)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),email)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),address)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),city)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),postal_code)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),phone)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -428,6 +508,41 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -650,6 +765,104 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -789,6 +1002,41 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json
index 4d5b008b695f9..588470ef41631 100644
--- a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json
+++ b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json
@@ -201,6 +201,98 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),full_name)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),first_name)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),last_name)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),initial_full_name)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),email)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),address)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),city)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),postal_code)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),phone)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -360,6 +452,52 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),billing_month)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),billing_month)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),email)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),email)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -574,6 +712,104 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),payment_date)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),payment_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),rental_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),staff_id)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -741,6 +977,41 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),payment_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),billing_month)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -1011,6 +1282,118 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),active)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),active)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),activebool)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),activebool)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),address_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),address_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),create_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),create_date)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),email)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),first_name)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),last_name)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_update)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),last_update)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),store_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),store_id)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json
index 0bdd5e3c895c2..926e8b8c8ed84 100644
--- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json
+++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json
@@ -211,6 +211,86 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),full_name)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),email)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),address)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),city)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),postal_code)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),phone)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -375,6 +455,41 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -597,6 +712,104 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -736,6 +949,41 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json
index 5ab0b11e37771..3727603266f25 100644
--- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json
+++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json
@@ -212,6 +212,86 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD),first_name)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD),last_name)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),full_name)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD),email)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),email)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.address,PROD),address)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),address)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.city,PROD),city)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),city)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.address,PROD),postal_code)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),postal_code)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.address,PROD),phone)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),phone)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -376,6 +456,41 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -598,6 +713,104 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_01,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_02,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_03,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_04,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_01,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_02,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_03,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_04,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_01,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_02,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_03,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_04,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),payment_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_01,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_02,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_03,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_04,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),payment_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_01,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_02,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_03,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_04,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),rental_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_01,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_02,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_03,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_04,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),staff_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -737,6 +950,41 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json
index 3725e590fee9e..ec879e6af766a 100644
--- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json
+++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json
@@ -212,6 +212,86 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),full_name)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),email)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),address)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),city)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),postal_code)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),phone)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -376,6 +456,41 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -598,6 +713,104 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -737,6 +950,41 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json
index a47abab6b40f7..e25c5e4faf6af 100644
--- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json
+++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json
@@ -212,6 +212,86 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),full_name)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),email)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),address)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),city)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),postal_code)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),phone)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -376,6 +456,41 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -598,6 +713,104 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_01,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_02,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_03,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_04,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
@@ -737,6 +950,41 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD)",
"type": "TRANSFORMED"
}
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)"
+ ],
+ "confidenceScore": 0.9
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)"
+ ],
+ "downstreamType": "FIELD_SET",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)"
+ ],
+ "confidenceScore": 0.9
+ }
]
}
},
From 03590a194885b2fbbb5249aef909d761c3ffc12c Mon Sep 17 00:00:00 2001
From: Tamas Nemeth
Date: Mon, 18 Dec 2023 19:54:31 +0100
Subject: [PATCH 04/25] fix(ingest/snowflake) - Fixing snowflake url with
default region (#9443)
---
metadata-ingestion/setup.py | 8 +-
.../source/snowflake/snowflake_utils.py | 28 ++++-
.../snowflake/snowflake_golden.json | 116 +++++++++---------
.../integration/sql_server/test_sql_server.py | 5 +
.../tests/unit/test_snowflake_source.py | 27 ++++
5 files changed, 120 insertions(+), 64 deletions(-)
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 1bc1bc5100b08..cb13a40125c0d 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -354,7 +354,11 @@
"mlflow": {"mlflow-skinny>=2.3.0"},
"mode": {"requests", "tenacity>=8.0.1"} | sqllineage_lib,
"mongodb": {"pymongo[srv]>=3.11", "packaging"},
- "mssql": sql_common | {"sqlalchemy-pytds>=0.3", "pyOpenSSL"},
+ "mssql": sql_common
+ | {
+ "sqlalchemy-pytds>=0.3",
+ "pyOpenSSL",
+ },
"mssql-odbc": sql_common | {"pyodbc"},
"mysql": mysql,
# mariadb should have same dependency as mysql
@@ -559,7 +563,7 @@
"kafka-connect",
"ldap",
"mongodb",
- "mssql",
+ "mssql" if sys.version_info >= (3, 8) else None,
"mysql",
"mariadb",
"redash",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
index 5a451bf197d34..af8d8824a4b17 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py
@@ -9,8 +9,8 @@
from datahub.configuration.pattern_utils import is_schema_allowed
from datahub.ingestion.source.snowflake.constants import (
GENERIC_PERMISSION_ERROR_KEY,
- SNOWFLAKE_DEFAULT_CLOUD,
SNOWFLAKE_REGION_CLOUD_REGION_MAPPING,
+ SnowflakeCloudProvider,
SnowflakeObjectDomain,
)
from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config
@@ -72,6 +72,15 @@ def report_error(self, key: str, reason: str) -> None:
class SnowflakeCommonMixin:
platform = "snowflake"
+ CLOUD_REGION_IDS_WITHOUT_CLOUD_SUFFIX = [
+ "us-west-2",
+ "us-east-1",
+ "eu-west-1",
+ "eu-central-1",
+ "ap-southeast-1",
+ "ap-southeast-2",
+ ]
+
@staticmethod
def create_snowsight_base_url(
account_locator: str,
@@ -79,12 +88,23 @@ def create_snowsight_base_url(
cloud: str,
privatelink: bool = False,
) -> Optional[str]:
+ if cloud:
+ url_cloud_provider_suffix = f".{cloud}"
+
+ if cloud == SnowflakeCloudProvider.AWS:
+ # Some AWS regions do not have cloud suffix. See below the list:
+ # https://docs.snowflake.com/en/user-guide/admin-account-identifier#non-vps-account-locator-formats-by-cloud-platform-and-region
+ if (
+ cloud_region_id
+ in SnowflakeCommonMixin.CLOUD_REGION_IDS_WITHOUT_CLOUD_SUFFIX
+ ):
+ url_cloud_provider_suffix = ""
+ else:
+ url_cloud_provider_suffix = f".{cloud}"
if privatelink:
url = f"https://app.{account_locator}.{cloud_region_id}.privatelink.snowflakecomputing.com/"
- elif cloud == SNOWFLAKE_DEFAULT_CLOUD:
- url = f"https://app.snowflake.com/{cloud_region_id}/{account_locator}/"
else:
- url = f"https://app.snowflake.com/{cloud_region_id}.{cloud}/{account_locator}/"
+ url = f"https://app.snowflake.com/{cloud_region_id}{url_cloud_provider_suffix}/{account_locator}/"
return url
@staticmethod
diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json
index c7273fee5a2e5..ece54f00eeaa0 100644
--- a/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json
+++ b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json
@@ -11,20 +11,20 @@
"env": "PROD",
"database": "test_db"
},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/",
"name": "TEST_DB",
"description": "Comment for TEST_DB",
"created": {
- "time": 1623110400000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623110400000
+ "time": 1623103200000
}
}
},
"systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
+ "lastObserved": 1615443388097,
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -144,20 +144,20 @@
"database": "test_db",
"schema": "test_schema"
},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/",
"name": "TEST_SCHEMA",
"description": "comment for TEST_DB.TEST_SCHEMA",
"created": {
- "time": 1623110400000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623110400000
+ "time": 1623103200000
}
}
},
"systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
+ "lastObserved": 1615443388097,
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -489,22 +489,22 @@
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/",
"name": "TABLE_1",
"qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_1",
"description": "Comment for Table",
"created": {
- "time": 1623090600000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623090600000
+ "time": 1623103200000
},
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28",
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -788,22 +788,22 @@
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/",
"name": "TABLE_2",
"qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_2",
"description": "Comment for Table",
"created": {
- "time": 1623090600000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623090600000
+ "time": 1623103200000
},
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28",
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -1087,22 +1087,22 @@
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/",
"name": "TABLE_3",
"qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_3",
"description": "Comment for Table",
"created": {
- "time": 1623090600000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623090600000
+ "time": 1623103200000
},
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28",
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -1386,22 +1386,22 @@
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/",
"name": "TABLE_4",
"qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_4",
"description": "Comment for Table",
"created": {
- "time": 1623090600000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623090600000
+ "time": 1623103200000
},
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28",
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -1685,22 +1685,22 @@
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_5/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_5/",
"name": "TABLE_5",
"qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_5",
"description": "Comment for Table",
"created": {
- "time": 1623090600000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623090600000
+ "time": 1623103200000
},
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28",
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -1984,22 +1984,22 @@
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/",
"name": "TABLE_6",
"qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_6",
"description": "Comment for Table",
"created": {
- "time": 1623090600000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623090600000
+ "time": 1623103200000
},
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28",
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -2283,22 +2283,22 @@
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/",
"name": "TABLE_7",
"qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_7",
"description": "Comment for Table",
"created": {
- "time": 1623090600000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623090600000
+ "time": 1623103200000
},
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28",
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -2582,22 +2582,22 @@
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/",
"name": "TABLE_8",
"qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_8",
"description": "Comment for Table",
"created": {
- "time": 1623090600000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623090600000
+ "time": 1623103200000
},
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28",
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -2881,22 +2881,22 @@
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/",
"name": "TABLE_9",
"qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_9",
"description": "Comment for Table",
"created": {
- "time": 1623090600000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623090600000
+ "time": 1623103200000
},
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28",
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -3180,22 +3180,22 @@
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/",
"name": "TABLE_10",
"qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_10",
"description": "Comment for Table",
"created": {
- "time": 1623090600000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623090600000
+ "time": 1623103200000
},
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28",
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -3470,22 +3470,22 @@
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_1/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_1/",
"name": "VIEW_1",
"qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_1",
"description": "Comment for View",
"created": {
- "time": 1623090600000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623090600000
+ "time": 1623103200000
},
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28",
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
@@ -3805,22 +3805,22 @@
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_2/",
+ "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_2/",
"name": "VIEW_2",
"qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_2",
"description": "Comment for View",
"created": {
- "time": 1623090600000
+ "time": 1623103200000
},
"lastModified": {
- "time": 1623090600000
+ "time": 1623103200000
},
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28",
+ "runId": "snowflake-2023_12_18-10_16_09",
"lastRunId": "no-run-id-provided"
}
},
diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
index f439a322c2677..5ed672d527264 100644
--- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
+++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
@@ -1,5 +1,6 @@
import os
import subprocess
+import sys
import time
import pytest
@@ -8,6 +9,10 @@
from tests.test_helpers.click_helpers import run_datahub_cmd
from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port
+pytestmark = pytest.mark.skipif(
+ sys.version_info < (3, 8), reason="requires python 3.8 or higher"
+)
+
@pytest.fixture(scope="module")
def mssql_runner(docker_compose_runner, pytestconfig):
diff --git a/metadata-ingestion/tests/unit/test_snowflake_source.py b/metadata-ingestion/tests/unit/test_snowflake_source.py
index 536c91ace4f5e..69a7510692df1 100644
--- a/metadata-ingestion/tests/unit/test_snowflake_source.py
+++ b/metadata-ingestion/tests/unit/test_snowflake_source.py
@@ -24,6 +24,7 @@
from datahub.ingestion.source.snowflake.snowflake_usage_v2 import (
SnowflakeObjectAccessEntry,
)
+from datahub.ingestion.source.snowflake.snowflake_utils import SnowflakeCommonMixin
from datahub.ingestion.source.snowflake.snowflake_v2 import SnowflakeV2Source
from tests.test_helpers import test_connection_helpers
@@ -584,3 +585,29 @@ def test_email_filter_query_generation_with_case_insensitive_filter():
filter_query
== "AND (rlike(user_name, '.*@example.com','c')) AND NOT (rlike(user_name, '.*@example2.com','c'))"
)
+
+
+def test_create_snowsight_base_url_us_west():
+ (
+ cloud,
+ cloud_region_id,
+ ) = SnowflakeCommonMixin.get_cloud_region_from_snowflake_region_id("aws_us_west_2")
+
+ result = SnowflakeCommonMixin.create_snowsight_base_url(
+ "account_locator", cloud_region_id, cloud, False
+ )
+ assert result == "https://app.snowflake.com/us-west-2/account_locator/"
+
+
+def test_create_snowsight_base_url_ap_northeast_1():
+ (
+ cloud,
+ cloud_region_id,
+ ) = SnowflakeCommonMixin.get_cloud_region_from_snowflake_region_id(
+ "aws_ap_northeast_1"
+ )
+
+ result = SnowflakeCommonMixin.create_snowsight_base_url(
+ "account_locator", cloud_region_id, cloud, False
+ )
+ assert result == "https://app.snowflake.com/ap-northeast-1.aws/account_locator/"
From 193d1464a628fc800e926f04fcd4bd1d6774d858 Mon Sep 17 00:00:00 2001
From: noggi
Date: Mon, 18 Dec 2023 14:06:17 -0800
Subject: [PATCH 05/25] Fix downstream CI issue (#9479)
---
docker/datahub-ingestion-base/Dockerfile | 2 +-
docker/datahub-ingestion/Dockerfile | 2 +-
docker/datahub-ingestion/build.gradle | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile
index e0f9fdc997071..81fec61ea5073 100644
--- a/docker/datahub-ingestion-base/Dockerfile
+++ b/docker/datahub-ingestion-base/Dockerfile
@@ -4,7 +4,7 @@ ARG BASE_IMAGE=base
# Defining custom repo urls for use in enterprise environments. Re-used between stages below.
ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine
ARG GITHUB_REPO_URL=https://github.com
-ARG DEBIAN_REPO_URL=http://deb.debian.org/debian
+ARG DEBIAN_REPO_URL=https://deb.debian.org/debian
ARG PIP_MIRROR_URL=null
FROM golang:1-alpine3.18 AS dockerize-binary
diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile
index 9516c31a19e21..2898a363a0a18 100644
--- a/docker/datahub-ingestion/Dockerfile
+++ b/docker/datahub-ingestion/Dockerfile
@@ -3,7 +3,7 @@ ARG APP_ENV=full
ARG BASE_IMAGE=acryldata/datahub-ingestion-base
ARG DOCKER_VERSION=head
ARG PIP_MIRROR_URL=null
-ARG DEBIAN_REPO_URL=http://deb.debian.org/debian
+ARG DEBIAN_REPO_URL=https://deb.debian.org/debian
FROM $BASE_IMAGE:$DOCKER_VERSION as base
USER 0
diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle
index 36444210f1938..0b08f189e6b45 100644
--- a/docker/datahub-ingestion/build.gradle
+++ b/docker/datahub-ingestion/build.gradle
@@ -33,7 +33,7 @@ docker {
i -> (!i.file.name.endsWith(".dockerignore") && i.file.isHidden())
}
- def dockerBuildArgs = [DOCKER_VERSION: version, RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace("-slim", '')]
+ def dockerBuildArgs = [DOCKER_VERSION: version, RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace("-slim", ''), BASE_IMAGE: "${docker_registry}/datahub-ingestion-base"]
// Add build args if they are defined (needed for some CI or enterprise environments)
if (project.hasProperty('pipMirrorUrl')) {
From ecda3e618704c5eb335ad1a21c30f0c935581f64 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Mon, 18 Dec 2023 18:26:33 -0500
Subject: [PATCH 06/25] feat(ingest): pydantic v2 compatibility (#9434)
---
.github/workflows/airflow-plugin.yml | 7 ++--
.../airflow-plugin/tox.ini | 9 +++++
metadata-ingestion/setup.py | 39 ++++++++++++++++---
.../api/entities/datacontract/assertion.py | 4 +-
.../datacontract/assertion_operator.py | 16 ++++----
.../datacontract/data_quality_assertion.py | 11 +++---
.../api/entities/datacontract/datacontract.py | 23 +++++------
.../datacontract/freshness_assertion.py | 15 ++++---
.../entities/datacontract/schema_assertion.py | 14 ++++---
.../src/datahub/cli/check_cli.py | 13 ++++++-
.../src/datahub/configuration/common.py | 16 +++++++-
.../src/datahub/configuration/datetimes.py | 4 +-
.../pydantic_migration_helpers.py | 29 ++++++++++++++
.../configuration/time_window_config.py | 16 ++++++--
.../configuration/validate_field_rename.py | 4 +-
.../ingestion/glossary/datahub_classifier.py | 11 +++++-
.../source/bigquery_v2/bigquery_config.py | 2 +-
.../ingestion/source/delta_lake/config.py | 4 +-
.../source/snowflake/snowflake_config.py | 2 +-
.../ingestion/source_config/sql/snowflake.py | 2 +-
.../src/datahub/utilities/urns/urn_iter.py | 2 +-
.../integration/snowflake/test_snowflake.py | 16 ++++----
.../unit/{ => config}/test_allow_deny.py | 0
.../unit/{ => config}/test_config_clean.py | 0
.../tests/unit/config/test_config_model.py | 18 +++++++--
.../{ => config}/test_pydantic_validators.py | 13 +++++--
.../{ => config}/test_time_window_config.py | 0
27 files changed, 209 insertions(+), 81 deletions(-)
rename metadata-ingestion/tests/unit/{ => config}/test_allow_deny.py (100%)
rename metadata-ingestion/tests/unit/{ => config}/test_config_clean.py (100%)
rename metadata-ingestion/tests/unit/{ => config}/test_pydantic_validators.py (92%)
rename metadata-ingestion/tests/unit/{ => config}/test_time_window_config.py (100%)
diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml
index cd1e159b7d53c..70816e5f093d1 100644
--- a/.github/workflows/airflow-plugin.yml
+++ b/.github/workflows/airflow-plugin.yml
@@ -32,6 +32,7 @@ jobs:
strategy:
matrix:
include:
+ # Note: this should be kept in sync with tox.ini.
- python-version: "3.8"
extra_pip_requirements: "apache-airflow~=2.1.4"
extra_pip_extras: plugin-v1
@@ -39,13 +40,13 @@ jobs:
extra_pip_requirements: "apache-airflow~=2.2.4"
extra_pip_extras: plugin-v1
- python-version: "3.10"
- extra_pip_requirements: "apache-airflow~=2.4.0"
+ extra_pip_requirements: 'apache-airflow~=2.4.0 pluggy==1.0.0 "pendulum<3.0"'
extra_pip_extras: plugin-v2
- python-version: "3.10"
- extra_pip_requirements: "apache-airflow~=2.6.0"
+ extra_pip_requirements: 'apache-airflow~=2.6.0 "pendulum<3.0"'
extra_pip_extras: plugin-v2
- python-version: "3.10"
- extra_pip_requirements: "apache-airflow>=2.7.0"
+ extra_pip_requirements: "apache-airflow>=2.7.0 pydantic==2.4.2"
extra_pip_extras: plugin-v2
fail-fast: false
steps:
diff --git a/metadata-ingestion-modules/airflow-plugin/tox.ini b/metadata-ingestion-modules/airflow-plugin/tox.ini
index 1010bd2933e45..27ae2ce65ba65 100644
--- a/metadata-ingestion-modules/airflow-plugin/tox.ini
+++ b/metadata-ingestion-modules/airflow-plugin/tox.ini
@@ -10,6 +10,7 @@ envlist = py38-airflow21, py38-airflow22, py310-airflow24, py310-airflow26, py31
use_develop = true
extras = dev,integration-tests,plugin-v1
deps =
+ # This should be kept in sync with the Github Actions matrix.
-e ../../metadata-ingestion/
# Airflow version
airflow21: apache-airflow~=2.1.0
@@ -20,7 +21,15 @@ deps =
# See https://github.com/datahub-project/datahub/pull/9365
airflow24: apache-airflow~=2.4.0,pluggy==1.0.0
airflow26: apache-airflow~=2.6.0
+ # Respect the constraints file on pendulum.
+ # See https://github.com/apache/airflow/issues/36274
+ airflow24,airflow26: pendulum>=2.0,<3.0
+ # The Airflow 2.7 constraints file points at pydantic v2, so we match that here.
+ # https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt
+ # Note that Airflow is actually compatible with both pydantic v1 and v2, and the
+ # constraints file is overly restrictive.
airflow27: apache-airflow~=2.7.0
+ airflow27: pydantic==2.4.2
commands =
pytest --cov-append {posargs}
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index cb13a40125c0d..13c9d3c99aaca 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -14,9 +14,10 @@
"mypy_extensions>=0.4.3",
# Actual dependencies.
"typing-inspect",
+ # pydantic 1.8.2 is incompatible with mypy 0.910.
+ # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
# pydantic 1.10.3 is incompatible with typing-extensions 4.1.1 - https://github.com/pydantic/pydantic/issues/4885
- # pydantic 2 makes major, backwards-incompatible changes - https://github.com/pydantic/pydantic/issues/4887
- "pydantic>=1.5.1,!=1.10.3,<2",
+ "pydantic>=1.10.0,!=1.10.3",
"mixpanel>=4.9.0",
"sentry-sdk",
}
@@ -53,6 +54,18 @@
"ruamel.yaml",
}
+pydantic_no_v2 = {
+ # pydantic 2 makes major, backwards-incompatible changes - https://github.com/pydantic/pydantic/issues/4887
+ # Tags sources that require the pydantic v2 API.
+ "pydantic<2",
+}
+
+plugin_common = {
+ # While pydantic v2 support is experimental, require that all plugins
+ # continue to use v1. This will ensure that no ingestion recipes break.
+ *pydantic_no_v2,
+}
+
rest_common = {"requests", "requests_file"}
kafka_common = {
@@ -118,6 +131,7 @@
"sqlalchemy>=1.4.39, <2",
# Required for SQL profiling.
"great-expectations>=0.15.12, <=0.15.50",
+ *pydantic_no_v2, # because of great-expectations
# scipy version restricted to reduce backtracking, used by great-expectations,
"scipy>=1.7.2",
# GE added handling for higher version of jinja2
@@ -229,6 +243,7 @@
iceberg_common = {
# Iceberg Python SDK
"pyiceberg",
+ *pydantic_no_v2, # because of pyiceberg
"pyarrow>=9.0.0, <13.0.0",
}
@@ -477,9 +492,6 @@
"flake8-bugbear==23.3.12",
"isort>=5.7.0",
"mypy==1.0.0",
- # pydantic 1.8.2 is incompatible with mypy 0.910.
- # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
- "pydantic>=1.10.0",
*test_api_requirements,
pytest_dep,
"pytest-asyncio>=0.16.0",
@@ -740,7 +752,22 @@
extras_require={
"base": list(framework_common),
**{
- plugin: list(framework_common | dependencies)
+ plugin: list(
+ framework_common
+ | (
+ plugin_common
+ if plugin
+ not in {
+ "airflow",
+ "datahub-rest",
+ "datahub-kafka",
+ "sync-file-emitter",
+ "sql-parser",
+ }
+ else set()
+ )
+ | dependencies
+ )
for (plugin, dependencies) in plugins.items()
},
"all": list(
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py
index c45d4ddc92458..89ac528efe81a 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py
@@ -1,7 +1,7 @@
from typing import Optional
-from datahub.configuration import ConfigModel
+from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel
-class BaseAssertion(ConfigModel):
+class BaseAssertion(v1_ConfigModel):
description: Optional[str] = None
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
index a41b0f7aafd9f..dc0c97d1c74e5 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
@@ -2,7 +2,7 @@
from typing_extensions import Literal, Protocol
-from datahub.configuration import ConfigModel
+from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel
from datahub.metadata.schema_classes import (
AssertionStdOperatorClass,
AssertionStdParameterClass,
@@ -58,7 +58,7 @@ def _generate_assertion_std_parameters(
)
-class EqualToOperator(ConfigModel):
+class EqualToOperator(v1_ConfigModel):
type: Literal["equal_to"]
value: Union[str, int, float]
@@ -71,7 +71,7 @@ def generate_parameters(self) -> AssertionStdParametersClass:
return _generate_assertion_std_parameters(value=self.value)
-class BetweenOperator(ConfigModel):
+class BetweenOperator(v1_ConfigModel):
type: Literal["between"]
min: Union[int, float]
max: Union[int, float]
@@ -87,7 +87,7 @@ def generate_parameters(self) -> AssertionStdParametersClass:
)
-class LessThanOperator(ConfigModel):
+class LessThanOperator(v1_ConfigModel):
type: Literal["less_than"]
value: Union[int, float]
@@ -100,7 +100,7 @@ def generate_parameters(self) -> AssertionStdParametersClass:
return _generate_assertion_std_parameters(value=self.value)
-class GreaterThanOperator(ConfigModel):
+class GreaterThanOperator(v1_ConfigModel):
type: Literal["greater_than"]
value: Union[int, float]
@@ -113,7 +113,7 @@ def generate_parameters(self) -> AssertionStdParametersClass:
return _generate_assertion_std_parameters(value=self.value)
-class LessThanOrEqualToOperator(ConfigModel):
+class LessThanOrEqualToOperator(v1_ConfigModel):
type: Literal["less_than_or_equal_to"]
value: Union[int, float]
@@ -126,7 +126,7 @@ def generate_parameters(self) -> AssertionStdParametersClass:
return _generate_assertion_std_parameters(value=self.value)
-class GreaterThanOrEqualToOperator(ConfigModel):
+class GreaterThanOrEqualToOperator(v1_ConfigModel):
type: Literal["greater_than_or_equal_to"]
value: Union[int, float]
@@ -139,7 +139,7 @@ def generate_parameters(self) -> AssertionStdParametersClass:
return _generate_assertion_std_parameters(value=self.value)
-class NotNullOperator(ConfigModel):
+class NotNullOperator(v1_ConfigModel):
type: Literal["not_null"]
operator: str = AssertionStdOperatorClass.NOT_NULL
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
index 6a3944ba36baf..975aa359bd203 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
@@ -1,12 +1,11 @@
from typing import List, Optional, Union
-import pydantic
from typing_extensions import Literal
import datahub.emitter.mce_builder as builder
from datahub.api.entities.datacontract.assertion import BaseAssertion
from datahub.api.entities.datacontract.assertion_operator import Operators
-from datahub.configuration.common import ConfigModel
+from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel, v1_Field
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.metadata.schema_classes import (
AssertionInfoClass,
@@ -25,7 +24,7 @@
class IdConfigMixin(BaseAssertion):
- id_raw: Optional[str] = pydantic.Field(
+ id_raw: Optional[str] = v1_Field(
default=None,
alias="id",
description="The id of the assertion. If not provided, one will be generated using the type.",
@@ -38,7 +37,7 @@ def generate_default_id(self) -> str:
class CustomSQLAssertion(IdConfigMixin, BaseAssertion):
type: Literal["custom_sql"]
sql: str
- operator: Operators = pydantic.Field(discriminator="type")
+ operator: Operators = v1_Field(discriminator="type")
def generate_default_id(self) -> str:
return f"{self.type}-{self.sql}-{self.operator.id()}"
@@ -89,11 +88,11 @@ def generate_assertion_info(self, entity_urn: str) -> AssertionInfoClass:
)
-class DataQualityAssertion(ConfigModel):
+class DataQualityAssertion(v1_ConfigModel):
__root__: Union[
CustomSQLAssertion,
ColumnUniqueAssertion,
- ] = pydantic.Field(discriminator="type")
+ ] = v1_Field(discriminator="type")
@property
def id(self) -> str:
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
index f3c6be55e5fea..e0ef85d5fd66c 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
@@ -1,7 +1,6 @@
import collections
from typing import Iterable, List, Optional, Tuple
-import pydantic
from ruamel.yaml import YAML
from typing_extensions import Literal
@@ -11,7 +10,11 @@
)
from datahub.api.entities.datacontract.freshness_assertion import FreshnessAssertion
from datahub.api.entities.datacontract.schema_assertion import SchemaAssertion
-from datahub.configuration.common import ConfigModel
+from datahub.configuration.pydantic_migration_helpers import (
+ v1_ConfigModel,
+ v1_Field,
+ v1_validator,
+)
from datahub.emitter.mce_builder import datahub_guid, make_assertion_urn
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.metadata.schema_classes import (
@@ -26,7 +29,7 @@
from datahub.utilities.urns.urn import guess_entity_type
-class DataContract(ConfigModel):
+class DataContract(v1_ConfigModel):
"""A yml representation of a Data Contract.
This model is used as a simpler, Python-native representation of a DataHub data contract.
@@ -36,29 +39,27 @@ class DataContract(ConfigModel):
version: Literal[1]
- id: Optional[str] = pydantic.Field(
+ id: Optional[str] = v1_Field(
default=None,
alias="urn",
description="The data contract urn. If not provided, one will be generated.",
)
- entity: str = pydantic.Field(
+ entity: str = v1_Field(
description="The entity urn that the Data Contract is associated with"
)
# TODO: add support for properties
# properties: Optional[Dict[str, str]] = None
- schema_field: Optional[SchemaAssertion] = pydantic.Field(
- default=None, alias="schema"
- )
+ schema_field: Optional[SchemaAssertion] = v1_Field(default=None, alias="schema")
- freshness: Optional[FreshnessAssertion] = pydantic.Field(default=None)
+ freshness: Optional[FreshnessAssertion] = v1_Field(default=None)
# TODO: Add a validator to ensure that ids are unique
- data_quality: Optional[List[DataQualityAssertion]] = pydantic.Field(default=None)
+ data_quality: Optional[List[DataQualityAssertion]] = v1_Field(default=None)
_original_yaml_dict: Optional[dict] = None
- @pydantic.validator("data_quality")
+ @v1_validator("data_quality") # type: ignore
def validate_data_quality(
cls, data_quality: Optional[List[DataQualityAssertion]]
) -> Optional[List[DataQualityAssertion]]:
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
index 71741d76b22fc..8694276688967 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
@@ -3,11 +3,10 @@
from datetime import timedelta
from typing import List, Union
-import pydantic
from typing_extensions import Literal
from datahub.api.entities.datacontract.assertion import BaseAssertion
-from datahub.configuration.common import ConfigModel
+from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel, v1_Field
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.metadata.schema_classes import (
AssertionInfoClass,
@@ -25,10 +24,10 @@
class CronFreshnessAssertion(BaseAssertion):
type: Literal["cron"]
- cron: str = pydantic.Field(
+ cron: str = v1_Field(
description="The cron expression to use. See https://crontab.guru/ for help."
)
- timezone: str = pydantic.Field(
+ timezone: str = v1_Field(
"UTC",
description="The timezone to use for the cron schedule. Defaults to UTC.",
)
@@ -58,10 +57,10 @@ def generate_freshness_assertion_schedule(self) -> FreshnessAssertionScheduleCla
)
-class FreshnessAssertion(ConfigModel):
- __root__: Union[
- CronFreshnessAssertion, FixedIntervalFreshnessAssertion
- ] = pydantic.Field(discriminator="type")
+class FreshnessAssertion(v1_ConfigModel):
+ __root__: Union[CronFreshnessAssertion, FixedIntervalFreshnessAssertion] = v1_Field(
+ discriminator="type"
+ )
@property
def id(self):
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
index b62f94e0592fc..39297d1a98d02 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
@@ -3,11 +3,10 @@
import json
from typing import List, Union
-import pydantic
from typing_extensions import Literal
from datahub.api.entities.datacontract.assertion import BaseAssertion
-from datahub.configuration.common import ConfigModel
+from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel, v1_Field
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.extractor.json_schema_util import get_schema_metadata
from datahub.metadata.schema_classes import (
@@ -23,7 +22,7 @@
class JsonSchemaContract(BaseAssertion):
type: Literal["json-schema"]
- json_schema: dict = pydantic.Field(alias="json-schema")
+ json_schema: dict = v1_Field(alias="json-schema")
_schema_metadata: SchemaMetadataClass
@@ -37,7 +36,10 @@ def _init_private_attributes(self) -> None:
)
-class FieldListSchemaContract(BaseAssertion, arbitrary_types_allowed=True):
+class FieldListSchemaContract(BaseAssertion):
+ class Config:
+ arbitrary_types_allowed = True
+
type: Literal["field-list"]
fields: List[SchemaFieldClass]
@@ -56,8 +58,8 @@ def _init_private_attributes(self) -> None:
)
-class SchemaAssertion(ConfigModel):
- __root__: Union[JsonSchemaContract, FieldListSchemaContract] = pydantic.Field(
+class SchemaAssertion(v1_ConfigModel):
+ __root__: Union[JsonSchemaContract, FieldListSchemaContract] = v1_Field(
discriminator="type"
)
diff --git a/metadata-ingestion/src/datahub/cli/check_cli.py b/metadata-ingestion/src/datahub/cli/check_cli.py
index f7996900f7a7a..2732a72aea539 100644
--- a/metadata-ingestion/src/datahub/cli/check_cli.py
+++ b/metadata-ingestion/src/datahub/cli/check_cli.py
@@ -126,10 +126,21 @@ def metadata_diff(
default=False,
help="Include extra information for each plugin.",
)
+@click.option(
+ "--source",
+ type=str,
+ default=None,
+)
@telemetry.with_telemetry()
-def plugins(verbose: bool) -> None:
+def plugins(source: Optional[str], verbose: bool) -> None:
"""List the enabled ingestion plugins."""
+ if source:
+ # Quick helper for one-off checks with full stack traces.
+ source_registry.get(source)
+ click.echo(f"Source {source} is enabled.")
+ return
+
click.secho("Sources:", bold=True)
click.echo(source_registry.summary(verbose=verbose, col_width=25))
click.echo()
diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py
index f225856ca43ce..0030332bcfd54 100644
--- a/metadata-ingestion/src/datahub/configuration/common.py
+++ b/metadata-ingestion/src/datahub/configuration/common.py
@@ -99,8 +99,20 @@ def _schema_extra(schema: Dict[str, Any], model: Type["ConfigModel"]) -> None:
@classmethod
def parse_obj_allow_extras(cls: Type[_ConfigSelf], obj: Any) -> _ConfigSelf:
- with unittest.mock.patch.object(cls.Config, "extra", pydantic.Extra.allow):
- return cls.parse_obj(obj)
+ if PYDANTIC_VERSION_2:
+ try:
+ with unittest.mock.patch.dict(
+ cls.model_config, # type: ignore
+ {"extra": "allow"},
+ clear=False,
+ ):
+ cls.model_rebuild(force=True) # type: ignore
+ return cls.parse_obj(obj)
+ finally:
+ cls.model_rebuild(force=True) # type: ignore
+ else:
+ with unittest.mock.patch.object(cls.Config, "extra", pydantic.Extra.allow):
+ return cls.parse_obj(obj)
class PermissiveConfigModel(ConfigModel):
diff --git a/metadata-ingestion/src/datahub/configuration/datetimes.py b/metadata-ingestion/src/datahub/configuration/datetimes.py
index 41af7565593d9..1520462fa9bf8 100644
--- a/metadata-ingestion/src/datahub/configuration/datetimes.py
+++ b/metadata-ingestion/src/datahub/configuration/datetimes.py
@@ -65,6 +65,8 @@ def parse_absolute_time(input: str) -> datetime:
def parse_relative_timespan(input: str) -> timedelta:
+ raw_input = input
+
neg = False
input = input.strip()
@@ -79,7 +81,7 @@ def parse_relative_timespan(input: str) -> timedelta:
if neg:
delta = -delta
- logger.debug(f'Parsed "{input}" as {delta}.')
+ logger.debug(f'Parsed "{raw_input}" as {delta}.')
return delta
diff --git a/metadata-ingestion/src/datahub/configuration/pydantic_migration_helpers.py b/metadata-ingestion/src/datahub/configuration/pydantic_migration_helpers.py
index f1876b500598b..bd931abe2e84d 100644
--- a/metadata-ingestion/src/datahub/configuration/pydantic_migration_helpers.py
+++ b/metadata-ingestion/src/datahub/configuration/pydantic_migration_helpers.py
@@ -19,12 +19,41 @@ class PydanticDeprecatedSince20(Warning): # type: ignore
if PYDANTIC_VERSION_2:
from pydantic import BaseModel as GenericModel
+ from pydantic.v1 import ( # type: ignore
+ BaseModel as v1_BaseModel,
+ Extra as v1_Extra,
+ Field as v1_Field,
+ root_validator as v1_root_validator,
+ validator as v1_validator,
+ )
else:
+ from pydantic import ( # type: ignore
+ BaseModel as v1_BaseModel,
+ Extra as v1_Extra,
+ Field as v1_Field,
+ root_validator as v1_root_validator,
+ validator as v1_validator,
+ )
from pydantic.generics import GenericModel # type: ignore
+class v1_ConfigModel(v1_BaseModel):
+ """A simplified variant of our main ConfigModel class.
+
+ This one only uses pydantic v1 features.
+ """
+
+ class Config:
+ extra = v1_Extra.forbid
+ underscore_attrs_are_private = True
+
+
__all__ = [
"PYDANTIC_VERSION_2",
"PydanticDeprecatedSince20",
"GenericModel",
+ "v1_ConfigModel",
+ "v1_Field",
+ "v1_root_validator",
+ "v1_validator",
]
diff --git a/metadata-ingestion/src/datahub/configuration/time_window_config.py b/metadata-ingestion/src/datahub/configuration/time_window_config.py
index 15de7470e4d82..f20ab85be0585 100644
--- a/metadata-ingestion/src/datahub/configuration/time_window_config.py
+++ b/metadata-ingestion/src/datahub/configuration/time_window_config.py
@@ -68,6 +68,12 @@ def default_start_time(
assert abs(delta) >= get_bucket_duration_delta(
values["bucket_duration"]
), "Relative start time should be in terms of configured bucket duration. e.g '-2 days' or '-2 hours'."
+
+ # The end_time's default value is not yet populated, in which case
+ # we can just manually generate it here.
+ if "end_time" not in values:
+ values["end_time"] = datetime.now(tz=timezone.utc)
+
return get_time_bucket(
values["end_time"] + delta, values["bucket_duration"]
)
@@ -80,9 +86,13 @@ def default_start_time(
@pydantic.validator("start_time", "end_time")
def ensure_timestamps_in_utc(cls, v: datetime) -> datetime:
- assert (
- v.tzinfo == timezone.utc
- ), 'timezone is not UTC; try adding a "Z" to the value e.g. "2021-07-20T00:00:00Z"'
+ if v.tzinfo is None:
+ raise ValueError(
+ "Timestamps must be in UTC. Try adding a 'Z' to the value e.g. '2021-07-20T00:00:00Z'"
+ )
+
+ # If the timestamp is timezone-aware but not in UTC, convert it to UTC.
+ v = v.astimezone(timezone.utc)
return v
diff --git a/metadata-ingestion/src/datahub/configuration/validate_field_rename.py b/metadata-ingestion/src/datahub/configuration/validate_field_rename.py
index bb01f2b787123..de2a16e9bf247 100644
--- a/metadata-ingestion/src/datahub/configuration/validate_field_rename.py
+++ b/metadata-ingestion/src/datahub/configuration/validate_field_rename.py
@@ -49,4 +49,6 @@ def _validate_field_rename(cls: Type, values: dict) -> dict:
# validator with pre=True gets all the values that were passed in.
# Given that a renamed field doesn't show up in the fields list, we can't use
# the field-level validator, even with a different field name.
- return pydantic.root_validator(pre=True, allow_reuse=True)(_validate_field_rename)
+ return pydantic.root_validator(pre=True, skip_on_failure=True, allow_reuse=True)(
+ _validate_field_rename
+ )
diff --git a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py
index 1f2b7f5689ea3..42eb930c80f9d 100644
--- a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py
+++ b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py
@@ -8,6 +8,7 @@
from pydantic.fields import Field
from datahub.configuration.common import ConfigModel
+from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
from datahub.ingestion.glossary.classifier import Classifier
@@ -50,7 +51,10 @@ class ValuesFactorConfig(ConfigModel):
class PredictionFactorsAndWeights(ConfigModel):
class Config:
- allow_population_by_field_name = True
+ if PYDANTIC_VERSION_2:
+ populate_by_name = True
+ else:
+ allow_population_by_field_name = True
Name: float = Field(alias="name")
Description: float = Field(alias="description")
@@ -60,7 +64,10 @@ class Config:
class InfoTypeConfig(ConfigModel):
class Config:
- allow_population_by_field_name = True
+ if PYDANTIC_VERSION_2:
+ populate_by_name = True
+ else:
+ allow_population_by_field_name = True
Prediction_Factors_and_Weights: PredictionFactorsAndWeights = Field(
description="Factors and their weights to consider when predicting info types",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index cbe68a454ea43..c13b08a6d9656 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -284,7 +284,7 @@ def validate_bigquery_audit_metadata_datasets(
return v
- @root_validator(pre=False)
+ @root_validator(pre=False, skip_on_failure=True)
def backward_compatibility_configs_set(cls, values: Dict) -> Dict:
project_id = values.get("project_id")
project_id_pattern = values.get("project_id_pattern")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py
index f3616ca648a3e..81a54d1327d05 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py
@@ -4,6 +4,7 @@
import pydantic
from cached_property import cached_property
from pydantic import Field
+from typing_extensions import Literal
from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.source_common import (
@@ -46,10 +47,9 @@ class DeltaLakeSourceConfig(PlatformInstanceConfigMixin, EnvConfigMixin):
"'/' and URNs will be created using "
"relative_path only.",
)
- platform: str = Field(
+ platform: Literal["delta-lake"] = Field(
default="delta-lake",
description="The platform that this source connects to",
- const=True,
)
platform_instance: Optional[str] = Field(
default=None,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
index 032bdef178fdf..b896df1fa340e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
@@ -176,7 +176,7 @@ def validate_include_column_lineage(cls, v, values):
)
return v
- @root_validator(pre=False)
+ @root_validator(pre=False, skip_on_failure=True)
def validate_unsupported_configs(cls, values: Dict) -> Dict:
value = values.get("include_read_operational_stats")
if value is not None and value:
diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
index 46bd24c7e1f4c..e9db82ce75cd9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
+++ b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
@@ -107,7 +107,7 @@ def validate_account_id(cls, account_id: str) -> str:
return account_id
@pydantic.validator("authentication_type", always=True)
- def authenticator_type_is_valid(cls, v, values, field):
+ def authenticator_type_is_valid(cls, v, values):
if v not in VALID_AUTH_TYPES.keys():
raise ValueError(
f"unsupported authenticator type '{v}' was provided,"
diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
index 4f228494f416b..3389a6fb05ee8 100644
--- a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
+++ b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
@@ -150,7 +150,7 @@ def modify_urn(urn: str) -> str:
if guess_entity_type(urn) == "dataset":
return _lowercase_dataset_urn(urn)
elif guess_entity_type(urn) == "schemaField":
- cur_urn = Urn.create_from_string(urn)
+ cur_urn = Urn.from_string(urn)
cur_urn._entity_ids[0] = _lowercase_dataset_urn(cur_urn._entity_ids[0])
return str(cur_urn)
return urn
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
index 1b58696e4014c..39a62056a7e4a 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
@@ -87,18 +87,18 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
confidence_level_threshold=0.58,
info_types_config={
"Age": InfoTypeConfig(
- Prediction_Factors_and_Weights=PredictionFactorsAndWeights(
- Name=0, Values=1, Description=0, Datatype=0
+ prediction_factors_and_weights=PredictionFactorsAndWeights(
+ name=0, values=1, description=0, datatype=0
)
),
"CloudRegion": InfoTypeConfig(
- Prediction_Factors_and_Weights=PredictionFactorsAndWeights(
- Name=0,
- Description=0,
- Datatype=0,
- Values=1,
+ prediction_factors_and_weights=PredictionFactorsAndWeights(
+ name=0,
+ description=0,
+ datatype=0,
+ values=1,
),
- Values=ValuesFactorConfig(
+ values=ValuesFactorConfig(
prediction_type="regex",
regex=[
r"(af|ap|ca|eu|me|sa|us)-(central|north|(north(?:east|west))|south|south(?:east|west)|east|west)-\d+"
diff --git a/metadata-ingestion/tests/unit/test_allow_deny.py b/metadata-ingestion/tests/unit/config/test_allow_deny.py
similarity index 100%
rename from metadata-ingestion/tests/unit/test_allow_deny.py
rename to metadata-ingestion/tests/unit/config/test_allow_deny.py
diff --git a/metadata-ingestion/tests/unit/test_config_clean.py b/metadata-ingestion/tests/unit/config/test_config_clean.py
similarity index 100%
rename from metadata-ingestion/tests/unit/test_config_clean.py
rename to metadata-ingestion/tests/unit/config/test_config_clean.py
diff --git a/metadata-ingestion/tests/unit/config/test_config_model.py b/metadata-ingestion/tests/unit/config/test_config_model.py
index ffac5c465f554..f53390a3deb18 100644
--- a/metadata-ingestion/tests/unit/config/test_config_model.py
+++ b/metadata-ingestion/tests/unit/config/test_config_model.py
@@ -3,8 +3,11 @@
import pydantic
import pytest
-from datahub.configuration.common import ConfigModel, redact_raw_config
-from datahub.ingestion.source.unity.config import UnityCatalogSourceConfig
+from datahub.configuration.common import (
+ AllowDenyPattern,
+ ConfigModel,
+ redact_raw_config,
+)
def test_extras_not_allowed():
@@ -76,8 +79,15 @@ def test_config_redaction():
def test_shared_defaults():
- c1 = UnityCatalogSourceConfig(token="s", workspace_url="https://workspace_url")
- c2 = UnityCatalogSourceConfig(token="s", workspace_url="https://workspace_url")
+ class SourceConfig(ConfigModel):
+ token: str
+ workspace_url: str
+ catalog_pattern: AllowDenyPattern = pydantic.Field(
+ default=AllowDenyPattern.allow_all(),
+ )
+
+ c1 = SourceConfig(token="s", workspace_url="https://workspace_url")
+ c2 = SourceConfig(token="s", workspace_url="https://workspace_url")
assert c2.catalog_pattern.allow == [".*"]
c1.catalog_pattern.allow += ["foo"]
diff --git a/metadata-ingestion/tests/unit/test_pydantic_validators.py b/metadata-ingestion/tests/unit/config/test_pydantic_validators.py
similarity index 92%
rename from metadata-ingestion/tests/unit/test_pydantic_validators.py
rename to metadata-ingestion/tests/unit/config/test_pydantic_validators.py
index 3e9ec6cbaf357..399245736805c 100644
--- a/metadata-ingestion/tests/unit/test_pydantic_validators.py
+++ b/metadata-ingestion/tests/unit/config/test_pydantic_validators.py
@@ -7,7 +7,10 @@
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_removal import pydantic_removed_field
from datahub.configuration.validate_field_rename import pydantic_renamed_field
-from datahub.utilities.global_warning_util import get_global_warnings
+from datahub.utilities.global_warning_util import (
+ clear_global_warnings,
+ get_global_warnings,
+)
def test_field_rename():
@@ -76,9 +79,11 @@ class TestModel(ConfigModel):
def test_field_deprecated():
+ clear_global_warnings()
+
class TestModel(ConfigModel):
- d1: Optional[str]
- d2: Optional[str]
+ d1: Optional[str] = None
+ d2: Optional[str] = None
b: str
_validate_deprecated_d1 = pydantic_field_deprecated("d1")
@@ -93,3 +98,5 @@ class TestModel(ConfigModel):
assert v.d2 == "deprecated"
assert any(["d1 is deprecated" in warning for warning in get_global_warnings()])
assert any(["d2 is deprecated" in warning for warning in get_global_warnings()])
+
+ clear_global_warnings()
diff --git a/metadata-ingestion/tests/unit/test_time_window_config.py b/metadata-ingestion/tests/unit/config/test_time_window_config.py
similarity index 100%
rename from metadata-ingestion/tests/unit/test_time_window_config.py
rename to metadata-ingestion/tests/unit/config/test_time_window_config.py
From 7b067822bd8602c00fe5a0efdd15a6bb7a33bad6 Mon Sep 17 00:00:00 2001
From: John Joyce
Date: Mon, 18 Dec 2023 18:35:02 -0800
Subject: [PATCH 07/25] feat(gms): Add support for platform-based browse
(#9376)
Co-authored-by: John Joyce
---
.../graphql/featureflags/FeatureFlags.java | 1 +
.../resolvers/chart/BrowseV2Resolver.java | 20 +++-
.../resolvers/config/AppConfigResolver.java | 1 +
.../graphql/resolvers/search/SearchUtils.java | 14 +++
.../src/main/resources/app.graphql | 5 +
.../src/main/resources/search.graphql | 9 +-
.../browse/BrowseV2ResolverTest.java | 2 +-
datahub-web-react/src/appConfigContext.tsx | 1 +
datahub-web-react/src/graphql/app.graphql | 1 +
.../metadata/client/JavaEntityClient.java | 24 +++++
.../elasticsearch/ElasticSearchService.java | 12 +++
.../elasticsearch/query/ESBrowseDAO.java | 91 +++++++++++++++++++
.../src/main/resources/application.yml | 1 +
.../linkedin/entity/client/EntityClient.java | 22 +++++
.../entity/client/RestliEntityClient.java | 14 +++
.../metadata/search/EntitySearchService.java | 19 ++++
16 files changed, 231 insertions(+), 6 deletions(-)
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
index 07bd1fba5d8a8..e74ed09849763 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
@@ -12,6 +12,7 @@ public class FeatureFlags {
private boolean readOnlyModeEnabled = false;
private boolean showSearchFiltersV2 = false;
private boolean showBrowseV2 = false;
+ private boolean platformBrowseV2 = false;
private PreProcessHooks preProcessHooks;
private boolean showAcrylInfo = false;
private boolean showAccessManagement = false;
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java
index 292d6108b7a04..da4a3a76dd7e0 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java
@@ -2,14 +2,16 @@
import static com.linkedin.datahub.graphql.Constants.BROWSE_PATH_V2_DELIMITER;
import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument;
-import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.resolveView;
+import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*;
+import com.google.common.collect.ImmutableList;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.generated.BrowseResultGroupV2;
import com.linkedin.datahub.graphql.generated.BrowseResultMetadata;
import com.linkedin.datahub.graphql.generated.BrowseResultsV2;
import com.linkedin.datahub.graphql.generated.BrowseV2Input;
+import com.linkedin.datahub.graphql.generated.EntityType;
import com.linkedin.datahub.graphql.resolvers.EntityTypeMapper;
import com.linkedin.datahub.graphql.resolvers.ResolverUtils;
import com.linkedin.datahub.graphql.resolvers.search.SearchUtils;
@@ -43,8 +45,8 @@ public class BrowseV2Resolver implements DataFetcher get(DataFetchingEnvironment environment) {
final QueryContext context = environment.getContext();
final BrowseV2Input input = bindArgument(environment.getArgument("input"), BrowseV2Input.class);
- final String entityName = EntityTypeMapper.getName(input.getType());
+ final List entityNames = getEntityNames(input);
final int start = input.getStart() != null ? input.getStart() : DEFAULT_START;
final int count = input.getCount() != null ? input.getCount() : DEFAULT_COUNT;
final String query = input.getQuery() != null ? input.getQuery() : "*";
@@ -70,7 +72,7 @@ public CompletableFuture get(DataFetchingEnvironment environmen
BrowseResultV2 browseResults =
_entityClient.browseV2(
- entityName,
+ entityNames,
pathStr,
maybeResolvedView != null
? SearchUtils.combineFilters(
@@ -87,6 +89,18 @@ public CompletableFuture get(DataFetchingEnvironment environmen
});
}
+ public static List getEntityNames(BrowseV2Input input) {
+ List entityTypes;
+ if (input.getTypes() != null && input.getTypes().size() > 0) {
+ entityTypes = input.getTypes();
+ } else if (input.getType() != null) {
+ entityTypes = ImmutableList.of(input.getType());
+ } else {
+ entityTypes = BROWSE_ENTITY_TYPES;
+ }
+ return entityTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList());
+ }
+
private BrowseResultsV2 mapBrowseResults(BrowseResultV2 browseResults) {
BrowseResultsV2 results = new BrowseResultsV2();
results.setTotal(browseResults.getNumGroups());
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java
index 34f7f133f6fb9..81b52991cde90 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java
@@ -175,6 +175,7 @@ public CompletableFuture get(final DataFetchingEnvironment environmen
.setShowAcrylInfo(_featureFlags.isShowAcrylInfo())
.setShowAccessManagement(_featureFlags.isShowAccessManagement())
.setNestedDomainsEnabled(_featureFlags.isNestedDomainsEnabled())
+ .setPlatformBrowseV2(_featureFlags.isPlatformBrowseV2())
.build();
appConfig.setFeatureFlags(featureFlagsConfig);
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java
index d04cb57e1a860..444ab4bcc3c3c 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java
@@ -92,6 +92,20 @@ private SearchUtils() {}
EntityType.NOTEBOOK,
EntityType.DATA_PRODUCT);
+ /** Entities that are part of browse by default */
+ public static final List BROWSE_ENTITY_TYPES =
+ ImmutableList.of(
+ EntityType.DATASET,
+ EntityType.DASHBOARD,
+ EntityType.CHART,
+ EntityType.CONTAINER,
+ EntityType.MLMODEL,
+ EntityType.MLMODEL_GROUP,
+ EntityType.MLFEATURE_TABLE,
+ EntityType.DATA_FLOW,
+ EntityType.DATA_JOB,
+ EntityType.NOTEBOOK);
+
/** A prioritized list of source filter types used to generate quick filters */
public static final List PRIORITIZED_SOURCE_ENTITY_TYPES =
Stream.of(
diff --git a/datahub-graphql-core/src/main/resources/app.graphql b/datahub-graphql-core/src/main/resources/app.graphql
index 075a3b0fac43b..52451e195ee84 100644
--- a/datahub-graphql-core/src/main/resources/app.graphql
+++ b/datahub-graphql-core/src/main/resources/app.graphql
@@ -437,6 +437,11 @@ type FeatureFlagsConfig {
"""
showBrowseV2: Boolean!
+ """
+ Whether browse v2 is platform mode, which means that platforms are displayed instead of entity types at the root.
+ """
+ platformBrowseV2: Boolean!
+
"""
Whether we should show CTAs in the UI related to moving to Managed DataHub by Acryl.
"""
diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql
index e0cde5a2db9f9..8f2377edb546e 100644
--- a/datahub-graphql-core/src/main/resources/search.graphql
+++ b/datahub-graphql-core/src/main/resources/search.graphql
@@ -1176,9 +1176,14 @@ Input required for browse queries
"""
input BrowseV2Input {
"""
- The browse entity type
+ The browse entity type - deprecated use types instead
"""
- type: EntityType!
+ type: EntityType
+
+ """
+ The browse entity type - deprecated use types instead. If not provided, all types will be used.
+ """
+ types: [EntityType!]
"""
The browse path V2 - a list with each entry being part of the browse path V2
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java
index bffc2b31af2b9..433772d7e2cfe 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java
@@ -249,7 +249,7 @@ private static EntityClient initMockEntityClient(
EntityClient client = Mockito.mock(EntityClient.class);
Mockito.when(
client.browseV2(
- Mockito.eq(entityName),
+ Mockito.eq(ImmutableList.of(entityName)),
Mockito.eq(path),
Mockito.eq(filter),
Mockito.eq(query),
diff --git a/datahub-web-react/src/appConfigContext.tsx b/datahub-web-react/src/appConfigContext.tsx
index 4087ad453687c..8c1089b868e5a 100644
--- a/datahub-web-react/src/appConfigContext.tsx
+++ b/datahub-web-react/src/appConfigContext.tsx
@@ -50,6 +50,7 @@ export const DEFAULT_APP_CONFIG = {
showAcrylInfo: false,
showAccessManagement: false,
nestedDomainsEnabled: true,
+ platformBrowseV2: false,
},
};
diff --git a/datahub-web-react/src/graphql/app.graphql b/datahub-web-react/src/graphql/app.graphql
index 4e9bbb11d8c5a..fe28340349147 100644
--- a/datahub-web-react/src/graphql/app.graphql
+++ b/datahub-web-react/src/graphql/app.graphql
@@ -65,6 +65,7 @@ query appConfig {
showAcrylInfo
showAccessManagement
nestedDomainsEnabled
+ platformBrowseV2
}
}
}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
index 53b974b560e2a..e7ec4d313b5f5 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
@@ -235,6 +235,30 @@ public BrowseResultV2 browseV2(
return _entitySearchService.browseV2(entityName, path, filter, input, start, count);
}
+ /**
+ * Gets browse V2 snapshot of a given path
+ *
+ * @param entityNames entities being browsed
+ * @param path path being browsed
+ * @param filter browse filter
+ * @param input search query
+ * @param start start offset of first group
+ * @param count max number of results requested
+ * @throws RemoteInvocationException
+ */
+ @Nonnull
+ public BrowseResultV2 browseV2(
+ @Nonnull List entityNames,
+ @Nonnull String path,
+ @Nullable Filter filter,
+ @Nonnull String input,
+ int start,
+ int count,
+ @Nonnull Authentication authentication) {
+ // TODO: cache browseV2 results
+ return _entitySearchService.browseV2(entityNames, path, filter, input, start, count);
+ }
+
@SneakyThrows
@Deprecated
public void update(@Nonnull final Entity entity, @Nonnull final Authentication authentication)
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
index f40da59a149fa..fd7491fe32ea3 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
@@ -210,6 +210,18 @@ public BrowseResultV2 browseV2(
return esBrowseDAO.browseV2(entityName, path, filter, input, start, count);
}
+ @Nonnull
+ @Override
+ public BrowseResultV2 browseV2(
+ @Nonnull List entityNames,
+ @Nonnull String path,
+ @Nullable Filter filter,
+ @Nonnull String input,
+ int start,
+ int count) {
+ return esBrowseDAO.browseV2(entityNames, path, filter, input, start, count);
+ }
+
@Nonnull
@Override
public List getBrowsePaths(@Nonnull String entityName, @Nonnull Urn urn) {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java
index 5ea60b24a577a..3c71a2dfd9180 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java
@@ -427,6 +427,44 @@ public BrowseResultV2 browseV2(
}
}
+ public BrowseResultV2 browseV2(
+ @Nonnull List entities,
+ @Nonnull String path,
+ @Nullable Filter filter,
+ @Nonnull String input,
+ int start,
+ int count) {
+ try {
+ final SearchResponse groupsResponse;
+
+ try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "esGroupSearch").time()) {
+ final String finalInput = input.isEmpty() ? "*" : input;
+ groupsResponse =
+ client.search(
+ constructGroupsSearchRequestBrowseAcrossEntities(
+ entities, path, filter, finalInput),
+ RequestOptions.DEFAULT);
+ }
+
+ final BrowseGroupsResultV2 browseGroupsResult =
+ extractGroupsResponseV2(groupsResponse, path, start, count);
+ final int numGroups = browseGroupsResult.getTotalGroups();
+
+ return new BrowseResultV2()
+ .setMetadata(
+ new BrowseResultMetadata()
+ .setTotalNumEntities(browseGroupsResult.getTotalNumEntities())
+ .setPath(path))
+ .setGroups(new BrowseResultGroupV2Array(browseGroupsResult.getGroups()))
+ .setNumGroups(numGroups)
+ .setFrom(start)
+ .setPageSize(count);
+ } catch (Exception e) {
+ log.error("Browse Across Entities query failed: " + e.getMessage());
+ throw new ESQueryException("Browse Across Entities query failed: ", e);
+ }
+ }
+
@Nonnull
private SearchRequest constructGroupsSearchRequestV2(
@Nonnull String entityName,
@@ -448,6 +486,33 @@ private SearchRequest constructGroupsSearchRequestV2(
return searchRequest;
}
+ @Nonnull
+ private SearchRequest constructGroupsSearchRequestBrowseAcrossEntities(
+ @Nonnull List entities,
+ @Nonnull String path,
+ @Nullable Filter filter,
+ @Nonnull String input) {
+
+ List entitySpecs =
+ entities.stream().map(entityRegistry::getEntitySpec).collect(Collectors.toList());
+
+ String[] indexArray =
+ entities.stream().map(indexConvention::getEntityIndexName).toArray(String[]::new);
+
+ final SearchRequest searchRequest = new SearchRequest(indexArray);
+ final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
+ searchSourceBuilder.size(0);
+ searchSourceBuilder.query(
+ buildQueryStringBrowseAcrossEntities(
+ entitySpecs,
+ path,
+ SearchUtil.transformFilterForEntities(filter, indexConvention),
+ input));
+ searchSourceBuilder.aggregation(buildAggregationsV2(path));
+ searchRequest.source(searchSourceBuilder);
+ return searchRequest;
+ }
+
/**
* Extracts the name of group from path.
*
@@ -494,6 +559,32 @@ private QueryBuilder buildQueryStringV2(
return queryBuilder;
}
+ @Nonnull
+ private QueryBuilder buildQueryStringBrowseAcrossEntities(
+ @Nonnull List entitySpecs,
+ @Nonnull String path,
+ @Nullable Filter filter,
+ @Nonnull String input) {
+ final int browseDepthVal = getPathDepthV2(path);
+
+ final BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery();
+
+ QueryBuilder query =
+ SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration)
+ .getQuery(input, false);
+ queryBuilder.must(query);
+
+ if (!path.isEmpty()) {
+ queryBuilder.filter(QueryBuilders.matchQuery(BROWSE_PATH_V2, path));
+ }
+
+ queryBuilder.filter(QueryBuilders.rangeQuery(BROWSE_PATH_V2_DEPTH).gt(browseDepthVal));
+
+ queryBuilder.filter(SearchRequestHandler.getFilterQuery(filter));
+
+ return queryBuilder;
+ }
+
@Nonnull
private AggregationBuilder buildAggregationsV2(@Nonnull String path) {
final String currentLevel = ESUtils.escapeReservedCharacters(path) + "␟.*";
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index a52b705cb8da6..0ea6b8712953e 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -317,6 +317,7 @@ featureFlags:
showAccessManagement: ${SHOW_ACCESS_MANAGEMENT:false} #Whether we should show AccessManagement tab in the datahub UI.
showSearchFiltersV2: ${SHOW_SEARCH_FILTERS_V2:true} # Enables showing the search filters V2 experience.
showBrowseV2: ${SHOW_BROWSE_V2:true} # Enables showing the browse v2 sidebar experience.
+ platformBrowseV2: ${PLATFORM_BROWSE_V2:false} # Enables the platform browse experience, instead of the entity-oriented browse default.
preProcessHooks:
uiEnabled: ${PRE_PROCESS_HOOKS_UI_ENABLED:true} # Circumvents Kafka for processing index updates for UI changes sourced from GraphQL to avoid processing delays
showAcrylInfo: ${SHOW_ACRYL_INFO:false} # Show different CTAs within DataHub around moving to Managed DataHub. Set to true for the demo site.
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java
index 7bc50a8f3dc7e..598c252b4f766 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java
@@ -153,6 +153,28 @@ public BrowseResultV2 browseV2(
@Nonnull Authentication authentication)
throws RemoteInvocationException;
+ /**
+ * Gets browse snapshot of a given path
+ *
+ * @param entityNames entities being browsed
+ * @param path path being browsed
+ * @param filter browse filter
+ * @param input search query
+ * @param start start offset of first group
+ * @param count max number of results requested
+ * @throws RemoteInvocationException
+ */
+ @Nonnull
+ public BrowseResultV2 browseV2(
+ @Nonnull List entityNames,
+ @Nonnull String path,
+ @Nullable Filter filter,
+ @Nonnull String input,
+ int start,
+ int count,
+ @Nonnull Authentication authentication)
+ throws RemoteInvocationException;
+
@Deprecated
public void update(@Nonnull final Entity entity, @Nonnull final Authentication authentication)
throws RemoteInvocationException;
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
index c854cb9dd279e..d68c472ea9170 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
@@ -381,6 +381,20 @@ public BrowseResultV2 browseV2(
throw new NotImplementedException("BrowseV2 is not implemented in Restli yet");
}
+ @Nonnull
+ @Override
+ public BrowseResultV2 browseV2(
+ @Nonnull List entityNames,
+ @Nonnull String path,
+ @Nullable Filter filter,
+ @Nonnull String input,
+ int start,
+ int count,
+ @Nonnull Authentication authentication)
+ throws RemoteInvocationException {
+ throw new NotImplementedException("BrowseV2 is not implemented in Restli yet");
+ }
+
public void update(@Nonnull final Entity entity, @Nonnull final Authentication authentication)
throws RemoteInvocationException {
EntitiesDoIngestRequestBuilder requestBuilder =
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
index 09a63e769f025..189ae09e1b938 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
@@ -207,6 +207,25 @@ public BrowseResultV2 browseV2(
int start,
int count);
+ /**
+ * Gets browse snapshot of a given path
+ *
+ * @param entityNames set of entities being browsed
+ * @param path path being browsed
+ * @param filter browse filter
+ * @param input search query
+ * @param start start offset of first group
+ * @param count max number of results requested
+ */
+ @Nonnull
+ public BrowseResultV2 browseV2(
+ @Nonnull List entityNames,
+ @Nonnull String path,
+ @Nullable Filter filter,
+ @Nonnull String input,
+ int start,
+ int count);
+
/**
* Gets a list of paths for a given urn.
*
From 1124ccc4ee02e60980af19d525d5203dd6719a1d Mon Sep 17 00:00:00 2001
From: kushagra-apptware <81357546+kushagra-apptware@users.noreply.github.com>
Date: Tue, 19 Dec 2023 17:29:37 +0530
Subject: [PATCH 08/25] fix(ui/users): searching for users on Users page shows
incorrect roles (#9474)
---
datahub-web-react/src/app/identity/user/UserList.tsx | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/datahub-web-react/src/app/identity/user/UserList.tsx b/datahub-web-react/src/app/identity/user/UserList.tsx
index dce3aa2c68a8d..8e2bc21f0693f 100644
--- a/datahub-web-react/src/app/identity/user/UserList.tsx
+++ b/datahub-web-react/src/app/identity/user/UserList.tsx
@@ -77,7 +77,7 @@ export const UserList = () => {
query: (query?.length && query) || undefined,
},
},
- fetchPolicy: (query?.length || 0) > 0 ? 'no-cache' : 'cache-first',
+ fetchPolicy: 'no-cache',
});
const totalUsers = usersData?.listUsers?.total || 0;
From 94a1603676b6a0fb9e2129b416caf39b100f6d0f Mon Sep 17 00:00:00 2001
From: Tamas Nemeth
Date: Tue, 19 Dec 2023 16:30:21 +0100
Subject: [PATCH 09/25] fix(ingest/redshift: Fixing operation query to not
return duplicate operations (#9481)
---
.../ingestion/source/redshift/usage.py | 26 ++++++++++++-------
1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py
index 409027a8805a0..e40406b994c9b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py
@@ -85,15 +85,18 @@
sq.endtime AS endtime,
'insert' AS operation_type
FROM
- stl_insert si
+ (select userid, query, sum(rows) as rows, tbl
+ from stl_insert si
+ where si.rows > 0
+ AND si.starttime >= '{start_time}'
+ AND si.starttime < '{end_time}'
+ group by userid, query, tbl
+ ) as si
JOIN svv_table_info sti ON si.tbl = sti.table_id
JOIN stl_query sq ON si.query = sq.query
JOIN svl_user_info sui ON sq.userid = sui.usesysid
WHERE
- si.starttime >= '{start_time}'
- AND si.starttime < '{end_time}'
- AND si.rows > 0
- AND sq.aborted = 0)
+ sq.aborted = 0)
UNION
(SELECT
DISTINCT sd.userid AS userid,
@@ -109,15 +112,18 @@
sq.endtime AS endtime,
'delete' AS operation_type
FROM
- stl_delete sd
+ (select userid, query, sum(rows) as rows, tbl
+ from stl_delete sd
+ where sd.rows > 0
+ AND sd.starttime >= '{start_time}'
+ AND sd.starttime < '{end_time}'
+ group by userid, query, tbl
+ ) as sd
JOIN svv_table_info sti ON sd.tbl = sti.table_id
JOIN stl_query sq ON sd.query = sq.query
JOIN svl_user_info sui ON sq.userid = sui.usesysid
WHERE
- sd.starttime >= '{start_time}'
- AND sd.starttime < '{end_time}'
- AND sd.rows > 0
- AND sq.aborted = 0)
+ sq.aborted = 0)
ORDER BY
endtime DESC
""".strip()
From 265d6bdb534c17b1b370033b81a5c20c434b49d0 Mon Sep 17 00:00:00 2001
From: purnimagarg1 <139125209+purnimagarg1@users.noreply.github.com>
Date: Tue, 19 Dec 2023 22:41:18 +0530
Subject: [PATCH 10/25] Fade recipe section to transparent on Ingestion Run
Details (#9404)
---
.../ExecutionRequestDetailsModal.tsx | 35 +++++++++++--------
1 file changed, 20 insertions(+), 15 deletions(-)
diff --git a/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx b/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx
index 96dfc05e39153..0799f8af1173d 100644
--- a/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx
+++ b/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx
@@ -83,11 +83,11 @@ const ShowMoreButton = styled(Button)`
padding: 0px;
`;
-const LogsContainer = styled.div`
+const DetailsContainer = styled.div`
margin-bottom: -25px;
${(props) =>
- props.areLogsExpandable &&
- !props.showExpandedLogs &&
+ props.areDetailsExpandable &&
+ !props.showExpandedDetails &&
`
-webkit-mask-image: linear-gradient(to bottom, rgba(0,0,0,1) 50%, rgba(255,0,0,0.5) 60%, rgba(255,0,0,0) 90% );
mask-image: linear-gradient(to bottom, rgba(0,0,0,1) 50%, rgba(255,0,0,0.5) 60%, rgba(255,0,0,0) 90%);
@@ -102,9 +102,9 @@ const modalBodyStyle = {
padding: 0,
};
-type LogsContainerProps = {
- showExpandedLogs: boolean;
- areLogsExpandable: boolean;
+type DetailsContainerProps = {
+ showExpandedDetails: boolean;
+ areDetailsExpandable: boolean;
};
type Props = {
@@ -124,7 +124,7 @@ export const ExecutionDetailsModal = ({ urn, visible, onClose }: Props) => {
downloadFile(output, `exec-${urn}.log`);
};
- const logs = (showExpandedLogs && output) || output.slice(0, 250);
+ const logs = (showExpandedLogs && output) || output?.split('\n').slice(0, 5).join('\n');
const result = data?.executionRequest?.result?.status;
useEffect(() => {
@@ -154,10 +154,10 @@ export const ExecutionDetailsModal = ({ urn, visible, onClose }: Props) => {
} catch (e) {
recipeYaml = '';
}
- const recipe = showExpandedRecipe ? recipeYaml : recipeYaml?.split('\n').slice(0, 1).join('\n');
+ const recipe = showExpandedRecipe ? recipeYaml : recipeYaml?.split('\n').slice(0, 5).join('\n');
- const areLogsExpandable = output.length > 250;
- const isRecipeExpandable = recipeYaml?.includes('\n');
+ const areLogsExpandable = output?.split(/\r\n|\r|\n/)?.length > 5;
+ const isRecipeExpandable = recipeYaml?.split(/\r\n|\r|\n/)?.length > 5;
return (
{
Download
-
+
{`${logs}${!showExpandedLogs && areLogsExpandable ? '...' : ''}`}
-
+
{areLogsExpandable && (
setShowExpandedLogs(!showExpandedLogs)}>
{showExpandedLogs ? 'Hide' : 'Show More'}
@@ -216,9 +216,14 @@ export const ExecutionDetailsModal = ({ urn, visible, onClose }: Props) => {
The recipe used for this ingestion run.
-
- {`${recipe}${!showExpandedRecipe && isRecipeExpandable ? '\n...' : ''}`}
-
+
+
+ {`${recipe}${!showExpandedRecipe && isRecipeExpandable ? '...' : ''}`}
+
+
{isRecipeExpandable && (
setShowExpandedRecipe((v) => !v)}>
{showExpandedRecipe ? 'Hide' : 'Show More'}
From 92c9940bbd5fd2109f62b7145cfaf981d40704c3 Mon Sep 17 00:00:00 2001
From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com>
Date: Tue, 19 Dec 2023 09:24:03 -0800
Subject: [PATCH 11/25] Allow message_name field for protobuf ingestion (#9480)
---
.../java/datahub-protobuf/build.gradle | 9 +++------
.../src/main/java/datahub/protobuf/Proto2DataHub.java | 11 +++++++++++
.../java/datahub/protobuf/ProtobufDatasetTest.java | 6 +++---
.../test/java/datahub/protobuf/ProtobufUtilsTest.java | 4 ++--
.../java/datahub/protobuf/model/ProtobufEnumTest.java | 4 ++--
.../datahub/protobuf/model/ProtobufFieldTest.java | 4 ++--
.../datahub/protobuf/model/ProtobufGraphTest.java | 4 ++--
.../datahub/protobuf/model/ProtobufMessageTest.java | 4 ++--
.../protobuf/model/ProtobufOneOfFieldTest.java | 4 ++--
.../datahub/protobuf/visitors/VisitContextTest.java | 4 ++--
.../protobuf/visitors/dataset/DatasetVisitorTest.java | 4 ++--
.../visitors/dataset/DescriptionVisitorTest.java | 4 ++--
.../protobuf/visitors/dataset/DomainVisitorTest.java | 4 ++--
.../dataset/InstitutionalMemoryVisitorTest.java | 4 ++--
.../dataset/KafkaTopicPropertyVisitorTest.java | 4 ++--
.../visitors/dataset/OwnershipVisitorTest.java | 4 ++--
.../visitors/dataset/PropertyVisitorTest.java | 4 ++--
.../visitors/dataset/TermAssociationVisitorTest.java | 4 ++--
.../field/ProtobufExtensionFieldVisitorTest.java | 4 ++--
.../visitors/field/SchemaFieldVisitorTest.java | 4 ++--
.../datahub/protobuf/visitors/tag/TagVisitorTest.java | 4 ++--
21 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/metadata-integration/java/datahub-protobuf/build.gradle b/metadata-integration/java/datahub-protobuf/build.gradle
index 2cb36a14cb9c7..c8082b875d321 100644
--- a/metadata-integration/java/datahub-protobuf/build.gradle
+++ b/metadata-integration/java/datahub-protobuf/build.gradle
@@ -31,10 +31,10 @@ dependencies {
implementation externalDependency.commonsCli
implementation externalDependency.httpAsyncClient
implementation externalDependency.slf4jApi
+ implementation externalDependency.jacksonCore
compileOnly externalDependency.lombok
annotationProcessor externalDependency.lombok
- testImplementation externalDependency.junitJupiterApi
- testRuntimeOnly externalDependency.junitJupiterEngine
+ testImplementation externalDependency.testng
}
import java.nio.file.Paths
@@ -61,10 +61,7 @@ jacocoTestReport {
dependsOn test // tests are required to run before generating the report
}
-test {
- useJUnit()
- finalizedBy jacocoTestReport
-}
+test.finalizedBy jacocoTestReport
task checkShadowJar(type: Exec) {
diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/Proto2DataHub.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/Proto2DataHub.java
index dcc95222fabf2..429c6d6bfeba4 100644
--- a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/Proto2DataHub.java
+++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/Proto2DataHub.java
@@ -67,6 +67,13 @@ public class Proto2DataHub {
"[Optional if using --directory] The protobuf source file. Typically a .proto file.")
.build();
+ private static final Option OPTION_MESSAGE_NAME =
+ Option.builder()
+ .longOpt("message_name")
+ .hasArg()
+ .desc("[Optional] The protobuf message name to read from.")
+ .build();
+
private static final Option OPTION_DIR =
Option.builder()
.longOpt("directory")
@@ -166,6 +173,7 @@ static class AppConfig {
private final String dataPlatform;
private final String protoc;
private final String inputFile;
+ private final String messageName;
private final String inputDir;
private final TransportOptions transport;
private final String filename;
@@ -191,6 +199,7 @@ static class AppConfig {
dataPlatform = cli.getOptionValue(OPTION_DATAHUB_PLATFORM, "kafka").toLowerCase(Locale.ROOT);
protoc = cli.getOptionValue(OPTION_DESCRIPTOR);
inputFile = cli.getOptionValue(OPTION_FILE, null);
+ messageName = cli.getOptionValue(OPTION_MESSAGE_NAME, null);
transport =
TransportOptions.valueOf(
cli.getOptionValue(OPTION_TRANSPORT, "rest").toUpperCase(Locale.ROOT));
@@ -250,6 +259,7 @@ public static void main(String[] args) throws Exception {
.addOption(OPTION_DATAHUB_TOKEN)
.addOption(OPTION_DESCRIPTOR)
.addOption(OPTION_FILE)
+ .addOption(OPTION_MESSAGE_NAME)
.addOption(OPTION_DIR)
.addOption(OPTION_EXCLUDE_PATTERN)
.addOption(OPTION_DATAHUB_USER)
@@ -354,6 +364,7 @@ public static void main(String[] args) throws Exception {
.setGithubOrganization(config.githubOrg)
.setSlackTeamId(config.slackId)
.setSubType(config.subType)
+ .setMessageName(config.messageName)
.build();
dataset
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java
index e96bb63220b04..62f3b0453be09 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java
@@ -1,8 +1,8 @@
package datahub.protobuf;
import static datahub.protobuf.TestFixtures.*;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
import com.linkedin.common.FabricType;
import com.linkedin.common.GlobalTags;
@@ -34,7 +34,7 @@
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class ProtobufDatasetTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java
index e2599cb4c3f68..9bf649041e035 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java
@@ -2,13 +2,13 @@
import static datahub.protobuf.TestFixtures.getTestProtobufFileSet;
import static datahub.protobuf.TestFixtures.getTestProtoc;
-import static org.junit.jupiter.api.Assertions.*;
+import static org.testng.Assert.*;
import com.google.protobuf.DescriptorProtos;
import com.google.protobuf.ExtensionRegistry;
import datahub.protobuf.model.ProtobufGraph;
import java.io.IOException;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class ProtobufUtilsTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java
index fed9f250b359f..ae539a8e8fa4a 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java
@@ -1,6 +1,6 @@
package datahub.protobuf.model;
-import static org.junit.jupiter.api.Assertions.*;
+import static org.testng.Assert.*;
import com.google.protobuf.DescriptorProtos.DescriptorProto;
import com.google.protobuf.DescriptorProtos.EnumDescriptorProto;
@@ -11,7 +11,7 @@
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class ProtobufEnumTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java
index 6d4dc8bc4d585..9508f4778e5c8 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java
@@ -1,7 +1,7 @@
package datahub.protobuf.model;
import static datahub.protobuf.TestFixtures.*;
-import static org.junit.jupiter.api.Assertions.*;
+import static org.testng.Assert.*;
import com.google.protobuf.DescriptorProtos.DescriptorProto;
import com.google.protobuf.DescriptorProtos.FieldDescriptorProto;
@@ -22,7 +22,7 @@
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class ProtobufFieldTest {
private static final DescriptorProto EXPECTED_MESSAGE_PROTO =
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java
index 488222b87766d..6ca0c5b45cb5e 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java
@@ -2,14 +2,14 @@
import static datahub.protobuf.TestFixtures.getTestProtobufFileSet;
import static datahub.protobuf.TestFixtures.getTestProtobufGraph;
-import static org.junit.jupiter.api.Assertions.*;
+import static org.testng.Assert.*;
import com.google.protobuf.DescriptorProtos.FileDescriptorSet;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class ProtobufGraphTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java
index 1d6b3907d76d9..1126895aec57a 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java
@@ -1,6 +1,6 @@
package datahub.protobuf.model;
-import static org.junit.jupiter.api.Assertions.*;
+import static org.testng.Assert.*;
import com.google.protobuf.DescriptorProtos.DescriptorProto;
import com.google.protobuf.DescriptorProtos.FileDescriptorProto;
@@ -11,7 +11,7 @@
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class ProtobufMessageTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java
index c8bd8a322aad5..9db06f23a2bdf 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java
@@ -1,6 +1,6 @@
package datahub.protobuf.model;
-import static org.junit.jupiter.api.Assertions.*;
+import static org.testng.Assert.*;
import com.google.protobuf.DescriptorProtos.DescriptorProto;
import com.google.protobuf.DescriptorProtos.FieldDescriptorProto;
@@ -12,7 +12,7 @@
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class ProtobufOneOfFieldTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/VisitContextTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/VisitContextTest.java
index 2fc5f3834a749..fe27af7461860 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/VisitContextTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/VisitContextTest.java
@@ -2,7 +2,7 @@
import static datahub.protobuf.TestFixtures.getTestProtobufFileSet;
import static datahub.protobuf.TestFixtures.getTestProtobufGraph;
-import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.testng.Assert.assertNotEquals;
import com.google.protobuf.DescriptorProtos.FileDescriptorSet;
import datahub.protobuf.model.FieldTypeEdge;
@@ -13,7 +13,7 @@
import java.util.Set;
import java.util.stream.Collectors;
import org.jgrapht.GraphPath;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class VisitContextTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java
index de9a0f5ec4abe..6e99599c852b4 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java
@@ -1,7 +1,7 @@
package datahub.protobuf.visitors.dataset;
import static datahub.protobuf.TestFixtures.*;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.testng.Assert.assertEquals;
import com.linkedin.common.urn.DatasetUrn;
import com.linkedin.data.template.RecordTemplate;
@@ -14,7 +14,7 @@
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class DatasetVisitorTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java
index 679048fb48a53..42d8f1ad4c83c 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java
@@ -1,14 +1,14 @@
package datahub.protobuf.visitors.dataset;
import static datahub.protobuf.TestFixtures.*;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.testng.Assert.assertEquals;
import datahub.protobuf.model.ProtobufGraph;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class DescriptionVisitorTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DomainVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DomainVisitorTest.java
index c24fc30766f0e..3330c09c49436 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DomainVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DomainVisitorTest.java
@@ -2,7 +2,7 @@
import static datahub.protobuf.TestFixtures.getTestProtobufGraph;
import static datahub.protobuf.TestFixtures.getVisitContextBuilder;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.testng.Assert.assertEquals;
import com.linkedin.common.urn.Urn;
import datahub.protobuf.model.ProtobufGraph;
@@ -10,7 +10,7 @@
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class DomainVisitorTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java
index a57916441bfcb..45be30fe96210 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java
@@ -1,7 +1,7 @@
package datahub.protobuf.visitors.dataset;
import static datahub.protobuf.TestFixtures.*;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.testng.Assert.assertEquals;
import com.linkedin.common.InstitutionalMemoryMetadata;
import com.linkedin.common.url.Url;
@@ -9,7 +9,7 @@
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class InstitutionalMemoryVisitorTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java
index 5f8572cf6ddd8..2da53dad2c0be 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java
@@ -2,7 +2,7 @@
import static datahub.protobuf.TestFixtures.getTestProtobufGraph;
import static datahub.protobuf.TestFixtures.getVisitContextBuilder;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.testng.Assert.assertEquals;
import com.linkedin.data.template.StringMap;
import com.linkedin.dataset.DatasetProperties;
@@ -11,7 +11,7 @@
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class KafkaTopicPropertyVisitorTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/OwnershipVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/OwnershipVisitorTest.java
index 1b0aff28eb517..adc94487dab3c 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/OwnershipVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/OwnershipVisitorTest.java
@@ -2,7 +2,7 @@
import static datahub.protobuf.TestFixtures.getTestProtobufGraph;
import static datahub.protobuf.TestFixtures.getVisitContextBuilder;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.testng.Assert.assertEquals;
import com.linkedin.common.Owner;
import com.linkedin.common.OwnershipSource;
@@ -14,7 +14,7 @@
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class OwnershipVisitorTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/PropertyVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/PropertyVisitorTest.java
index 13912100f28a5..be65330954051 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/PropertyVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/PropertyVisitorTest.java
@@ -3,7 +3,7 @@
import static datahub.protobuf.TestFixtures.getTestProtobufGraph;
import static datahub.protobuf.TestFixtures.getVisitContextBuilder;
import static java.util.Map.entry;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.testng.Assert.assertEquals;
import com.linkedin.data.template.StringMap;
import com.linkedin.dataset.DatasetProperties;
@@ -11,7 +11,7 @@
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class PropertyVisitorTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/TermAssociationVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/TermAssociationVisitorTest.java
index f734c00bb76e0..79e7075c65209 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/TermAssociationVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/TermAssociationVisitorTest.java
@@ -2,7 +2,7 @@
import static datahub.protobuf.TestFixtures.getTestProtobufGraph;
import static datahub.protobuf.TestFixtures.getVisitContextBuilder;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.testng.Assert.assertEquals;
import com.linkedin.common.GlossaryTermAssociation;
import com.linkedin.common.urn.GlossaryTermUrn;
@@ -10,7 +10,7 @@
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class TermAssociationVisitorTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java
index eec397011a4ce..ff1aa643ac8df 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java
@@ -1,7 +1,7 @@
package datahub.protobuf.visitors.field;
import static datahub.protobuf.TestFixtures.*;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.testng.Assert.assertEquals;
import com.linkedin.common.GlobalTags;
import com.linkedin.common.GlossaryTermAssociation;
@@ -23,7 +23,7 @@
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class ProtobufExtensionFieldVisitorTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java
index af31a80d3b53a..59d9e0ca6e518 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java
@@ -2,7 +2,7 @@
import static datahub.protobuf.TestFixtures.getTestProtobufGraph;
import static datahub.protobuf.TestFixtures.getVisitContextBuilder;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.testng.Assert.assertEquals;
import com.linkedin.schema.NumberType;
import com.linkedin.schema.SchemaField;
@@ -15,7 +15,7 @@
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class SchemaFieldVisitorTest {
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/TagVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/TagVisitorTest.java
index 258d816d9d1da..ab477e19aabe4 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/TagVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/TagVisitorTest.java
@@ -2,7 +2,7 @@
import static datahub.protobuf.TestFixtures.getTestProtobufGraph;
import static datahub.protobuf.TestFixtures.getVisitContextBuilder;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.testng.Assert.assertEquals;
import com.linkedin.tag.TagProperties;
import datahub.event.MetadataChangeProposalWrapper;
@@ -11,7 +11,7 @@
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
-import org.junit.jupiter.api.Test;
+import org.testng.annotations.Test;
public class TagVisitorTest {
From 8f19138f68ce6376588f4e09617be7e3c325a70f Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Tue, 19 Dec 2023 12:00:54 -0600
Subject: [PATCH 12/25] feat(docker-compose): consolidate docker-compose
profiles (#9478)
---
build.gradle | 1 +
.../upgrade/config/NoCodeCleanupConfig.java | 12 +
.../upgrade/config/NoCodeUpgradeConfig.java | 12 +
.../upgrade/config/RestoreBackupConfig.java | 12 +
.../upgrade/config/RestoreIndicesConfig.java | 12 +
.../datahub/upgrade/nocode/NoCodeUpgrade.java | 12 +-
.../nocodecleanup/NoCodeCleanupUpgrade.java | 12 +-
.../upgrade/restorebackup/RestoreBackup.java | 12 +-
.../restoreindices/RestoreIndices.java | 9 +-
docker/build.gradle | 216 ++++-----
docker/profiles/README.md | 104 +++++
docker/profiles/cassandra | 1 +
docker/profiles/datahub-actions | 1 +
docker/profiles/datahub-frontend | 1 +
docker/profiles/datahub-gms | 1 +
docker/profiles/datahub-mae-consumer | 1 +
docker/profiles/datahub-mce-consumer | 1 +
docker/profiles/datahub-upgrade | 1 +
docker/profiles/docker-compose.actions.yml | 45 ++
docker/profiles/docker-compose.frontend.yml | 119 +++++
docker/profiles/docker-compose.gms.yml | 429 ++++++++++++++++++
.../profiles/docker-compose.prerequisites.yml | 387 ++++++++++++++++
docker/profiles/docker-compose.yml | 13 +
docker/profiles/elasticsearch | 1 +
docker/profiles/elasticsearch-setup | 1 +
docker/profiles/kafka-broker | 1 +
docker/profiles/kafka-setup | 1 +
docker/profiles/monitoring | 1 +
docker/profiles/mysql | 1 +
docker/profiles/mysql-setup | 1 +
docker/profiles/neo4j | 1 +
docker/profiles/postgres | 1 +
docker/profiles/postgres-setup | 1 +
33 files changed, 1288 insertions(+), 136 deletions(-)
create mode 100644 docker/profiles/README.md
create mode 120000 docker/profiles/cassandra
create mode 120000 docker/profiles/datahub-actions
create mode 120000 docker/profiles/datahub-frontend
create mode 120000 docker/profiles/datahub-gms
create mode 120000 docker/profiles/datahub-mae-consumer
create mode 120000 docker/profiles/datahub-mce-consumer
create mode 120000 docker/profiles/datahub-upgrade
create mode 100644 docker/profiles/docker-compose.actions.yml
create mode 100644 docker/profiles/docker-compose.frontend.yml
create mode 100644 docker/profiles/docker-compose.gms.yml
create mode 100644 docker/profiles/docker-compose.prerequisites.yml
create mode 100644 docker/profiles/docker-compose.yml
create mode 120000 docker/profiles/elasticsearch
create mode 120000 docker/profiles/elasticsearch-setup
create mode 120000 docker/profiles/kafka-broker
create mode 120000 docker/profiles/kafka-setup
create mode 120000 docker/profiles/monitoring
create mode 120000 docker/profiles/mysql
create mode 120000 docker/profiles/mysql-setup
create mode 120000 docker/profiles/neo4j
create mode 120000 docker/profiles/postgres
create mode 120000 docker/profiles/postgres-setup
diff --git a/build.gradle b/build.gradle
index a7a85db0398e2..bb01a15a7db8d 100644
--- a/build.gradle
+++ b/build.gradle
@@ -46,6 +46,7 @@ plugins {
id 'com.gorylenko.gradle-git-properties' version '2.4.1'
id 'com.github.johnrengelman.shadow' version '8.1.1' apply false
id 'com.palantir.docker' version '0.35.0' apply false
+ id 'com.avast.gradle.docker-compose' version '0.17.5'
id "com.diffplug.spotless" version "6.23.3"
// https://blog.ltgt.net/javax-jakarta-mess-and-gradle-solution/
// TODO id "org.gradlex.java-ecosystem-capabilities" version "1.0"
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeCleanupConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeCleanupConfig.java
index 24bcec5852b4f..5ba5c8a90fd4a 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeCleanupConfig.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeCleanupConfig.java
@@ -7,13 +7,16 @@
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
import io.ebean.Database;
import javax.annotation.Nonnull;
+import lombok.extern.slf4j.Slf4j;
import org.opensearch.client.RestHighLevelClient;
import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.DependsOn;
+@Slf4j
@Configuration
public class NoCodeCleanupConfig {
@@ -26,6 +29,7 @@ public class NoCodeCleanupConfig {
"elasticSearchRestHighLevelClient",
INDEX_CONVENTION_BEAN
})
+ @ConditionalOnProperty(name = "entityService.impl", havingValue = "ebean", matchIfMissing = true)
@Nonnull
public NoCodeCleanupUpgrade createInstance() {
final Database ebeanServer = applicationContext.getBean(Database.class);
@@ -34,4 +38,12 @@ public NoCodeCleanupUpgrade createInstance() {
final IndexConvention indexConvention = applicationContext.getBean(IndexConvention.class);
return new NoCodeCleanupUpgrade(ebeanServer, graphClient, searchClient, indexConvention);
}
+
+ @Bean(name = "noCodeCleanup")
+ @ConditionalOnProperty(name = "entityService.impl", havingValue = "cassandra")
+ @Nonnull
+ public NoCodeCleanupUpgrade createNotImplInstance() {
+ log.warn("NoCode is not supported for cassandra!");
+ return new NoCodeCleanupUpgrade(null, null, null, null);
+ }
}
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java
index 68009d7ed1718..d968e8521867e 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java
@@ -6,12 +6,15 @@
import com.linkedin.metadata.models.registry.EntityRegistry;
import io.ebean.Database;
import javax.annotation.Nonnull;
+import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.DependsOn;
+@Slf4j
@Configuration
public class NoCodeUpgradeConfig {
@@ -19,6 +22,7 @@ public class NoCodeUpgradeConfig {
@Bean(name = "noCodeUpgrade")
@DependsOn({"ebeanServer", "entityService", "systemRestliEntityClient", "entityRegistry"})
+ @ConditionalOnProperty(name = "entityService.impl", havingValue = "ebean", matchIfMissing = true)
@Nonnull
public NoCodeUpgrade createInstance() {
final Database ebeanServer = applicationContext.getBean(Database.class);
@@ -29,4 +33,12 @@ public NoCodeUpgrade createInstance() {
return new NoCodeUpgrade(ebeanServer, entityService, entityRegistry, entityClient);
}
+
+ @Bean(name = "noCodeUpgrade")
+ @ConditionalOnProperty(name = "entityService.impl", havingValue = "cassandra")
+ @Nonnull
+ public NoCodeUpgrade createNotImplInstance() {
+ log.warn("NoCode is not supported for cassandra!");
+ return new NoCodeUpgrade(null, null, null, null);
+ }
}
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java
index 743e4ffe84b0e..116d62878f5c6 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java
@@ -8,12 +8,15 @@
import com.linkedin.metadata.search.EntitySearchService;
import io.ebean.Database;
import javax.annotation.Nonnull;
+import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.DependsOn;
+@Slf4j
@Configuration
public class RestoreBackupConfig {
@Autowired ApplicationContext applicationContext;
@@ -27,6 +30,7 @@ public class RestoreBackupConfig {
"searchService",
"entityRegistry"
})
+ @ConditionalOnProperty(name = "entityService.impl", havingValue = "ebean", matchIfMissing = true)
@Nonnull
public RestoreBackup createInstance() {
final Database ebeanServer = applicationContext.getBean(Database.class);
@@ -40,4 +44,12 @@ public RestoreBackup createInstance() {
return new RestoreBackup(
ebeanServer, entityService, entityRegistry, entityClient, graphClient, searchClient);
}
+
+ @Bean(name = "restoreBackup")
+ @ConditionalOnProperty(name = "entityService.impl", havingValue = "cassandra")
+ @Nonnull
+ public RestoreBackup createNotImplInstance() {
+ log.warn("restoreIndices is not supported for cassandra!");
+ return new RestoreBackup(null, null, null, null, null, null);
+ }
}
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreIndicesConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreIndicesConfig.java
index d258c4a4d1a52..9d229f315d709 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreIndicesConfig.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreIndicesConfig.java
@@ -7,18 +7,22 @@
import com.linkedin.metadata.search.EntitySearchService;
import io.ebean.Database;
import javax.annotation.Nonnull;
+import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.DependsOn;
+@Slf4j
@Configuration
public class RestoreIndicesConfig {
@Autowired ApplicationContext applicationContext;
@Bean(name = "restoreIndices")
@DependsOn({"ebeanServer", "entityService", "searchService", "graphService", "entityRegistry"})
+ @ConditionalOnProperty(name = "entityService.impl", havingValue = "ebean", matchIfMissing = true)
@Nonnull
public RestoreIndices createInstance() {
final Database ebeanServer = applicationContext.getBean(Database.class);
@@ -31,4 +35,12 @@ public RestoreIndices createInstance() {
return new RestoreIndices(
ebeanServer, entityService, entityRegistry, entitySearchService, graphService);
}
+
+ @Bean(name = "restoreIndices")
+ @ConditionalOnProperty(name = "entityService.impl", havingValue = "cassandra")
+ @Nonnull
+ public RestoreIndices createNotImplInstance() {
+ log.warn("restoreIndices is not supported for cassandra!");
+ return new RestoreIndices(null, null, null, null, null);
+ }
}
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java
index 6753d309b9f50..674efb2b8ba78 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java
@@ -13,6 +13,7 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import javax.annotation.Nullable;
public class NoCodeUpgrade implements Upgrade {
@@ -26,12 +27,17 @@ public class NoCodeUpgrade implements Upgrade {
// Upgrade requires the Database.
public NoCodeUpgrade(
- final Database server,
+ @Nullable final Database server,
final EntityService entityService,
final EntityRegistry entityRegistry,
final SystemRestliEntityClient entityClient) {
- _steps = buildUpgradeSteps(server, entityService, entityRegistry, entityClient);
- _cleanupSteps = buildCleanupSteps();
+ if (server != null) {
+ _steps = buildUpgradeSteps(server, entityService, entityRegistry, entityClient);
+ _cleanupSteps = buildCleanupSteps();
+ } else {
+ _steps = List.of();
+ _cleanupSteps = List.of();
+ }
}
@Override
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/NoCodeCleanupUpgrade.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/NoCodeCleanupUpgrade.java
index 8a267be6ad808..6d3125423b443 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/NoCodeCleanupUpgrade.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/NoCodeCleanupUpgrade.java
@@ -9,6 +9,7 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import javax.annotation.Nullable;
import org.opensearch.client.RestHighLevelClient;
public class NoCodeCleanupUpgrade implements Upgrade {
@@ -18,12 +19,17 @@ public class NoCodeCleanupUpgrade implements Upgrade {
// Upgrade requires the Database.
public NoCodeCleanupUpgrade(
- final Database server,
+ @Nullable final Database server,
final GraphService graphClient,
final RestHighLevelClient searchClient,
final IndexConvention indexConvention) {
- _steps = buildUpgradeSteps(server, graphClient, searchClient, indexConvention);
- _cleanupSteps = buildCleanupSteps();
+ if (server != null) {
+ _steps = buildUpgradeSteps(server, graphClient, searchClient, indexConvention);
+ _cleanupSteps = buildCleanupSteps();
+ } else {
+ _steps = List.of();
+ _cleanupSteps = List.of();
+ }
}
@Override
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java
index b11abb2d6bc23..4ac295b4fdfb7 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java
@@ -16,20 +16,26 @@
import io.ebean.Database;
import java.util.ArrayList;
import java.util.List;
+import javax.annotation.Nullable;
public class RestoreBackup implements Upgrade {
private final List _steps;
public RestoreBackup(
- final Database server,
+ @Nullable final Database server,
final EntityService entityService,
final EntityRegistry entityRegistry,
final SystemRestliEntityClient entityClient,
final GraphService graphClient,
final EntitySearchService searchClient) {
- _steps =
- buildSteps(server, entityService, entityRegistry, entityClient, graphClient, searchClient);
+ if (server != null) {
+ _steps =
+ buildSteps(
+ server, entityService, entityRegistry, entityClient, graphClient, searchClient);
+ } else {
+ _steps = List.of();
+ }
}
@Override
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/RestoreIndices.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/RestoreIndices.java
index 8bb3b0073710a..d38685553dff2 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/RestoreIndices.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/RestoreIndices.java
@@ -13,6 +13,7 @@
import io.ebean.Database;
import java.util.ArrayList;
import java.util.List;
+import javax.annotation.Nullable;
public class RestoreIndices implements Upgrade {
public static final String BATCH_SIZE_ARG_NAME = "batchSize";
@@ -29,12 +30,16 @@ public class RestoreIndices implements Upgrade {
private final List _steps;
public RestoreIndices(
- final Database server,
+ @Nullable final Database server,
final EntityService entityService,
final EntityRegistry entityRegistry,
final EntitySearchService entitySearchService,
final GraphService graphService) {
- _steps = buildSteps(server, entityService, entityRegistry, entitySearchService, graphService);
+ if (server != null) {
+ _steps = buildSteps(server, entityService, entityRegistry, entitySearchService, graphService);
+ } else {
+ _steps = List.of();
+ }
}
@Override
diff --git a/docker/build.gradle b/docker/build.gradle
index bc79be501b395..190202620c382 100644
--- a/docker/build.gradle
+++ b/docker/build.gradle
@@ -1,6 +1,9 @@
plugins {
id 'java' // required by versioning
+ id 'docker-compose'
}
+import com.avast.gradle.dockercompose.tasks.ComposeUp
+import com.avast.gradle.dockercompose.tasks.ComposeDownForced
apply from: "../gradle/versioning/versioning.gradle"
@@ -18,144 +21,107 @@ ext {
debug_modules = quickstart_modules - [':metadata-jobs:mce-consumer-job',
':metadata-jobs:mae-consumer-job']
- debug_compose_args = [
- '-f', 'docker-compose-without-neo4j.yml',
- '-f', 'docker-compose-without-neo4j.override.yml',
- '-f', 'docker-compose-without-neo4j.m1.yml', // updates to mariadb
- '-f', 'docker-compose.dev.yml'
- ]
+ compose_args = ['-f', 'profiles/docker-compose.yml']
debug_reloadable = [
- 'datahub-gms',
- 'datahub-frontend-react'
+ 'datahub-gms-debug',
+ 'system-update-debug',
+ 'frontend-debug'
]
-
// Postgres
pg_quickstart_modules = quickstart_modules - [':docker:mysql-setup'] + [':docker:postgres-setup']
- pg_compose_args = [
- '-f', 'docker-compose-without-neo4j.yml',
- '-f', 'docker-compose-without-neo4j.postgres.override.yml'
- ]
}
-task quickstart(type: Exec, dependsOn: ':metadata-ingestion:install') {
- dependsOn(quickstart_modules.collect { it + ':dockerTag' })
- shouldRunAfter ':metadata-ingestion:clean', 'quickstartNuke'
-
- environment "DATAHUB_TELEMETRY_ENABLED", "false"
- environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}"
- // environment "ACTIONS_VERSION", 'alpine3.18-slim'
- // environment "DATAHUB_ACTIONS_IMAGE", 'nginx'
-
- // Elastic
- // environment "DATAHUB_SEARCH_IMAGE", 'elasticsearch'
- // environment "DATAHUB_SEARCH_TAG", '7.10.1'
-
- // OpenSearch
- environment "DATAHUB_SEARCH_IMAGE", 'opensearchproject/opensearch'
- environment "DATAHUB_SEARCH_TAG", '2.9.0'
- environment "XPACK_SECURITY_ENABLED", 'plugins.security.disabled=true'
- environment "USE_AWS_ELASTICSEARCH", 'true'
-
- def cmd = [
- 'source ../metadata-ingestion/venv/bin/activate && ',
- 'datahub docker quickstart',
- '--no-pull-images',
- '--standalone_consumers',
- '--version', "v${version}",
- '--dump-logs-on-failure'
- ]
+tasks.register('quickstart') {}
+tasks.register('quickstartSlim') {}
+tasks.register('quickstartDebug') {}
+tasks.register('quickstartPg') {}
- commandLine 'bash', '-c', cmd.join(" ")
+tasks.withType(ComposeDownForced) {
+ removeVolumes = true
}
-
-task quickstartSlim(type: Exec, dependsOn: ':metadata-ingestion:install') {
- dependsOn(([':docker:datahub-ingestion'] + quickstart_modules).collect { it + ':dockerTag' })
- shouldRunAfter ':metadata-ingestion:clean', 'quickstartNuke'
-
- environment "DATAHUB_TELEMETRY_ENABLED", "false"
- environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}"
- environment "DATAHUB_ACTIONS_IMAGE", "acryldata/datahub-ingestion"
- environment "ACTIONS_VERSION", "v${version}-slim"
- environment "ACTIONS_EXTRA_PACKAGES", 'acryl-datahub-actions[executor] acryl-datahub-actions'
- environment "ACTIONS_CONFIG", 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml'
-
- def cmd = [
- 'source ../metadata-ingestion/venv/bin/activate && ',
- 'datahub docker quickstart',
- '--no-pull-images',
- '--standalone_consumers',
- '--version', "v${version}",
- '--dump-logs-on-failure'
- ]
-
- commandLine 'bash', '-c', cmd.join(" ")
+task quickstartNuke {
+ finalizedBy(tasks.withType(ComposeDownForced))
}
-task quickstartNuke(type: Exec, dependsOn: ":metadata-ingestion:install") {
- shouldRunAfter(':metadata-ingestion:clean')
-
- def cmd = [
- 'source ../metadata-ingestion/venv/bin/activate && ',
- 'datahub docker nuke'
- ]
- commandLine 'bash', '-c', cmd.join(" ")
+dockerCompose {
+ quickstart {
+ isRequiredBy(tasks.named('quickstart'))
+ composeAdditionalArgs = ['--profile', 'quickstart-consumers']
+
+ environment.put 'DATAHUB_VERSION', "v${version}"
+
+ useComposeFiles = ['profiles/docker-compose.yml']
+ projectName = 'datahub'
+ projectNamePrefix = ''
+ buildBeforeUp = false
+ buildBeforePull = false
+ stopContainers = false
+ removeVolumes = false
+ }
+
+ quickstartPg {
+ isRequiredBy(tasks.named('quickstartPg'))
+ composeAdditionalArgs = ['--profile', 'quickstart-postgres']
+
+ environment.put 'DATAHUB_VERSION', "v${version}"
+
+ useComposeFiles = ['profiles/docker-compose.yml']
+ projectName = 'datahub'
+ projectNamePrefix = ''
+ buildBeforeUp = false
+ buildBeforePull = false
+ stopContainers = false
+ removeVolumes = false
+ }
+
+ quickstartSlim {
+ isRequiredBy(tasks.named('quickstartSlim'))
+ composeAdditionalArgs = ['--profile', 'quickstart-consumers']
+
+ environment.put 'DATAHUB_VERSION', "v${version}"
+ environment.put "DATAHUB_ACTIONS_IMAGE", "acryldata/datahub-ingestion"
+ environment.put "ACTIONS_VERSION", "v${version}-slim"
+ environment.put "ACTIONS_EXTRA_PACKAGES", 'acryl-datahub-actions[executor] acryl-datahub-actions'
+ environment.put "ACTIONS_CONFIG", 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml'
+
+ useComposeFiles = ['profiles/docker-compose.yml']
+ projectName = 'datahub'
+ projectNamePrefix = ''
+ buildBeforeUp = false
+ buildBeforePull = false
+ stopContainers = false
+ removeVolumes = false
+ }
+
+ quickstartDebug {
+ isRequiredBy(tasks.named('quickstartDebug'))
+ composeAdditionalArgs = ['--profile', 'debug']
+
+ useComposeFiles = ['profiles/docker-compose.yml']
+ projectName = 'datahub'
+ projectNamePrefix = ''
+ buildBeforeUp = false
+ buildBeforePull = false
+ stopContainers = false
+ removeVolumes = false
+ }
}
-
-task quickstartDebug(type: Exec, dependsOn: ':metadata-ingestion:install') {
- dependsOn(debug_modules.collect { it + ':dockerTagDebug' })
- shouldRunAfter ':metadata-ingestion:clean', 'quickstartNuke'
-
- environment "DATAHUB_TELEMETRY_ENABLED", "false"
- environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}"
-
- // Elastic
- // environment "DATAHUB_SEARCH_IMAGE", 'elasticsearch'
- // environment "DATAHUB_SEARCH_TAG", '7.10.1'
-
- // OpenSearch
- environment "DATAHUB_SEARCH_IMAGE", 'opensearchproject/opensearch'
- environment "DATAHUB_SEARCH_TAG", '2.9.0'
- environment "XPACK_SECURITY_ENABLED", 'plugins.security.disabled=true'
- environment "USE_AWS_ELASTICSEARCH", 'true'
-
-
- def cmd = [
- 'source ../metadata-ingestion/venv/bin/activate && ',
- 'datahub docker quickstart',
- '--no-pull-images',
- '--version', "debug",
- '--dump-logs-on-failure'
- ] + debug_compose_args
- commandLine 'bash', '-c', cmd.join(" ")
+tasks.getByName('quickstartComposeUp').dependsOn(
+ quickstart_modules.collect { it + ':dockerTag' })
+tasks.getByName('quickstartPgComposeUp').dependsOn(
+ pg_quickstart_modules.collect { it + ':dockerTag' })
+tasks.getByName('quickstartSlimComposeUp').dependsOn(
+ ([':docker:datahub-ingestion'] + quickstart_modules)
+ .collect { it + ':dockerTag' })
+tasks.getByName('quickstartDebugComposeUp').dependsOn(
+ debug_modules.collect { it + ':dockerTagDebug' }
+)
+tasks.withType(ComposeUp).configureEach {
+ shouldRunAfter('quickstartNuke')
}
+
task debugReload(type: Exec) {
- def cmd = ['docker compose -p datahub'] + debug_compose_args + ['restart'] + debug_reloadable
+ def cmd = ['docker compose -p datahub --profile debug'] + compose_args + ['restart'] + debug_reloadable
commandLine 'bash', '-c', cmd.join(" ")
}
-
-task quickstartPg(type: Exec, dependsOn: ':metadata-ingestion:install') {
- dependsOn(pg_quickstart_modules.collect { it + ':dockerTag' })
- shouldRunAfter ':metadata-ingestion:clean', 'quickstartNuke'
-
- environment "DATAHUB_TELEMETRY_ENABLED", "false"
- environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}"
- environment "DATAHUB_POSTGRES_VERSION", "15.5"
-
- // OpenSearch
- environment "DATAHUB_SEARCH_IMAGE", 'opensearchproject/opensearch'
- environment "DATAHUB_SEARCH_TAG", '2.9.0'
- environment "XPACK_SECURITY_ENABLED", 'plugins.security.disabled=true'
- environment "USE_AWS_ELASTICSEARCH", 'true'
-
- def cmd = [
- 'source ../metadata-ingestion/venv/bin/activate && ',
- 'datahub docker quickstart',
- '--no-pull-images',
- '--standalone_consumers',
- '--version', "v${version}",
- '--dump-logs-on-failure'
- ] + pg_compose_args
-
- commandLine 'bash', '-c', cmd.join(" ")
-}
\ No newline at end of file
diff --git a/docker/profiles/README.md b/docker/profiles/README.md
new file mode 100644
index 0000000000000..df09f15cd85ce
--- /dev/null
+++ b/docker/profiles/README.md
@@ -0,0 +1,104 @@
+# Docker Compose Profiles
+
+This directory contains a set of docker compose definitions which are designed to run several configurations
+for quickstart use-cases as well as development use-cases. These configurations cover a few of the wide variety of
+infrastructure configurations that DataHub can operate on.
+
+Requirements:
+* Use the profiles requires a modern version of docker.
+* If using the debug/development profiles, you will need to have built the `debug` docker images locally. See the Development Profiles section for more details.
+
+```bash
+$ cd docker/profiles
+$ docker compose --profile up
+```
+
+Use Control-c (`^c`) to terminate the running system. This will automatically stop all running containers.
+
+To remove the containers use the following:
+
+```bash
+docker compose --profile rm
+```
+
+Please refer to docker's documentation for more details.
+
+The following sections detail a few of the profiles and their intended use-cases. For a complete list of profiles
+and their configuration please see the table at the end of each section.
+
+## Quickstart Profiles
+
+Quickstart profiles are primarily a way to test drive DataHub features before committing to a production ready deployment.
+A couple of these profiles are also used in our continuous integration (CI) tests.
+
+Note: Quickstart profiles use docker images with the `head` tag. These images up updated when changes are committed
+to the DataHub github repository. This can be overridden to use a stable release tag by prefixing the commands with
+`DATAHUB_VERSION=v0.12.1` for example.
+
+### `quickstart`
+
+This is the default configuration MySQL and OpenSearch for the storage and GMS running with integrated consumers.
+
+### `quickstart-consumers`
+
+This configuration is identical to `quickstart` how it runs standalone consumers instead of consumers integrated with the GMS container.
+
+### `quickstart-postgres`
+
+Identical to `quickstart` with Postgres instead of MySQL.
+
+### `quickstart-cassandra`
+
+Uses Cassandra as the primary data store along with Neo4j as the graph database.
+
+### `quickstart-storage`
+
+Just run the `quickstart` data stores without the DataHub components. This mode is useful for debugging when running the frontend and GMS components outside
+of docker.
+
+### Quickstart Profiles Table
+| Profile Name | MySQL | Postgres | Cassandra | Neo4j | Frontend | GMS | Actions | SystemUpdate | MAE | MCE | Kafka | OpenSearch |
+|----------------------|-------|----------|-----------|-------|----------|-----|---------|--------------|-----|-----|-------|------------|
+| quickstart | X | | | | X | X | X | X | | | X | X |
+| quickstart-frontend | X | | | | X | | | X | | | X | X |
+| quickstart-backend | X | | | | | X | X | X | | | X | X |
+| quickstart-postgres | | X | | | X | X | X | X | | | X | X |
+| quickstart-cassandra | | | X | X | X | X | X | X | | | X | X |
+| quickstart-consumers | X | | | | X | X | X | X | X | X | X | X |
+| quickstart-storage | X | | | | | | | | | | X | X |
+
+## Development Profiles
+
+* Runs `debug` tagged images
+* JVM Debug Mode Enabled
+* Exposes local jars and scripts to the containers
+* Can run non-default one-off configurations (neo4j, cassandra, elasticsearch)
+
+The docker images used are the `debug` images which are created by building locally. These images are
+created by running the gradle command.
+
+```bash
+./gradlew dockerTagDebug
+```
+
+For a complete list of profiles see the table at the end of this section.
+
+### `quickstart-backend`
+
+Run everything except for the `frontend` component. Useful for running just a local (non-docker) frontend.
+
+### `quickstart-frontend`
+
+Runs everything except for the GMS. Useful for running just a local (non-docker) GMS instance.
+
+### Development Profiles Table
+| Profile Name | MySQL | Postgres | Cassandra | Neo4j | Frontend | GMS | Actions | SystemUpdate | MAE | MCE | Kafka | OpenSearch | Elasticsearch |
+|---------------------|-------|----------|-----------|-------|----------|-----|---------|--------------|-----|-----|-------|------------|---------------|
+| debug | X | | | | X | X | X | X | | | X | X | |
+| debug-frontend | X | | | | X | | | X | | | X | X | |
+| debug-backend | X | | | | | X | X | X | | | X | X | |
+| debug-postgres | | X | | | X | X | X | X | | | X | X | |
+| debug-cassandra | | | X | | X | X | X | X | | | X | X | |
+| debug-consumers | X | | | | X | X | X | X | X | X | X | X | |
+| debug-neo4j | X | | | X | X | X | X | X | | | X | X | |
+| debug-elasticsearch | X | | | | X | X | X | X | | | X | | X |
\ No newline at end of file
diff --git a/docker/profiles/cassandra b/docker/profiles/cassandra
new file mode 120000
index 0000000000000..d9af9adbce5ca
--- /dev/null
+++ b/docker/profiles/cassandra
@@ -0,0 +1 @@
+../cassandra
\ No newline at end of file
diff --git a/docker/profiles/datahub-actions b/docker/profiles/datahub-actions
new file mode 120000
index 0000000000000..fea4275be45ff
--- /dev/null
+++ b/docker/profiles/datahub-actions
@@ -0,0 +1 @@
+../datahub-actions/
\ No newline at end of file
diff --git a/docker/profiles/datahub-frontend b/docker/profiles/datahub-frontend
new file mode 120000
index 0000000000000..74a18b81b7e3b
--- /dev/null
+++ b/docker/profiles/datahub-frontend
@@ -0,0 +1 @@
+../datahub-frontend
\ No newline at end of file
diff --git a/docker/profiles/datahub-gms b/docker/profiles/datahub-gms
new file mode 120000
index 0000000000000..de2f067e4c0e0
--- /dev/null
+++ b/docker/profiles/datahub-gms
@@ -0,0 +1 @@
+../datahub-gms
\ No newline at end of file
diff --git a/docker/profiles/datahub-mae-consumer b/docker/profiles/datahub-mae-consumer
new file mode 120000
index 0000000000000..90974047792c5
--- /dev/null
+++ b/docker/profiles/datahub-mae-consumer
@@ -0,0 +1 @@
+../datahub-mae-consumer
\ No newline at end of file
diff --git a/docker/profiles/datahub-mce-consumer b/docker/profiles/datahub-mce-consumer
new file mode 120000
index 0000000000000..288c9d91c28b3
--- /dev/null
+++ b/docker/profiles/datahub-mce-consumer
@@ -0,0 +1 @@
+../datahub-mce-consumer
\ No newline at end of file
diff --git a/docker/profiles/datahub-upgrade b/docker/profiles/datahub-upgrade
new file mode 120000
index 0000000000000..8ff77fd5562e7
--- /dev/null
+++ b/docker/profiles/datahub-upgrade
@@ -0,0 +1 @@
+../datahub-upgrade
\ No newline at end of file
diff --git a/docker/profiles/docker-compose.actions.yml b/docker/profiles/docker-compose.actions.yml
new file mode 100644
index 0000000000000..a509a6a67d270
--- /dev/null
+++ b/docker/profiles/docker-compose.actions.yml
@@ -0,0 +1,45 @@
+
+x-datahub-actions-service: &datahub-actions-service
+ hostname: actions
+ image: ${DATAHUB_ACTIONS_IMAGE:-acryldata/datahub-actions}:${ACTIONS_VERSION:-head}
+ env_file: datahub-actions/env/docker.env
+ environment:
+ ACTIONS_EXTRA_PACKAGES: ${ACTIONS_EXTRA_PACKAGES:-}
+ ACTIONS_CONFIG: ${ACTIONS_CONFIG:-}
+ KAFKA_BOOTSTRAP_SERVER: kafka-broker:29092
+ SCHEMA_REGISTRY_URL: http://datahub-gms:8080/schema-registry/api/
+
+services:
+ datahub-actions-quickstart:
+ <<: *datahub-actions-service
+ container_name: actions
+ profiles:
+ - quickstart
+ - quickstart-backend
+ depends_on:
+ datahub-gms-quickstart:
+ condition: service_healthy
+ datahub-actions-quickstart-cassandra:
+ <<: *datahub-actions-service
+ container_name: actions
+ profiles:
+ - quickstart-cassandra
+ depends_on:
+ datahub-gms-quickstart-cassandra:
+ condition: service_healthy
+ datahub-actions-quickstart-postgres:
+ <<: *datahub-actions-service
+ container_name: actions
+ profiles:
+ - quickstart-postgres
+ depends_on:
+ datahub-gms-quickstart-postgres:
+ condition: service_healthy
+ datahub-actions-quickstart-consumers:
+ <<: *datahub-actions-service
+ container_name: actions
+ profiles:
+ - quickstart-consumers
+ depends_on:
+ datahub-gms-quickstart-consumers:
+ condition: service_healthy
diff --git a/docker/profiles/docker-compose.frontend.yml b/docker/profiles/docker-compose.frontend.yml
new file mode 100644
index 0000000000000..2b82829648dac
--- /dev/null
+++ b/docker/profiles/docker-compose.frontend.yml
@@ -0,0 +1,119 @@
+
+x-datahub-frontend-service: &datahub-frontend-service
+ hostname: datahub-frontend-react
+ image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head}
+ ports:
+ - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
+ env_file: datahub-frontend/env/docker.env
+ environment: &datahub-frontend-service-env
+ KAFKA_BOOTSTRAP_SERVER: kafka-broker:29092
+ volumes:
+ - ${HOME}/.datahub/plugins:/etc/datahub/plugins
+
+x-datahub-frontend-service-dev: &datahub-frontend-service-dev
+ <<: *datahub-frontend-service
+ image: linkedin/datahub-frontend-react:debug
+ ports:
+ - ${DATAHUB_MAPPED_FRONTEND_DEBUG_PORT:-5002}:5002
+ - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
+ environment:
+ <<: *datahub-frontend-service-env
+ JAVA_TOOL_OPTIONS: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5002
+ DATAHUB_ANALYTICS_ENABLED: ${DATAHUB_ANALYTICS_ENABLED:-true}
+ volumes:
+ - ../../datahub-frontend/build/stage/playBinary:/datahub-frontend
+
+services:
+ frontend-quickstart:
+ <<: *datahub-frontend-service
+ container_name: frontend
+ profiles:
+ - quickstart
+ - quickstart-frontend
+ depends_on:
+ system-update-quickstart:
+ condition: service_completed_successfully
+ frontend-quickstart-cassandra:
+ <<: *datahub-frontend-service
+ container_name: frontend
+ profiles:
+ - quickstart-cassandra
+ depends_on:
+ system-update-quickstart-cassandra:
+ condition: service_completed_successfully
+ frontend-quickstart-postgres:
+ <<: *datahub-frontend-service
+ container_name: frontend
+ profiles:
+ - quickstart-postgres
+ depends_on:
+ system-update-quickstart-postgres:
+ condition: service_completed_successfully
+ frontend-quickstart-consumers:
+ <<: *datahub-frontend-service
+ container_name: frontend
+ profiles:
+ - quickstart-consumers
+ depends_on:
+ system-update-quickstart:
+ condition: service_completed_successfully
+ frontend-debug:
+ <<: *datahub-frontend-service-dev
+ container_name: datahub-frontend-dev
+ profiles:
+ - debug
+ depends_on:
+ system-update-debug:
+ condition: service_completed_successfully
+ frontend-debug-frontend:
+ <<: *datahub-frontend-service-dev
+ container_name: datahub-frontend-dev
+ profiles:
+ - debug-frontend
+ depends_on:
+ mysql-setup-dev:
+ condition: service_completed_successfully
+ opensearch-setup-dev:
+ condition: service_completed_successfully
+ kafka-setup-dev:
+ condition: service_completed_successfully
+ frontend-debug-postgres:
+ <<: *datahub-frontend-service-dev
+ container_name: datahub-frontend-dev
+ profiles:
+ - debug-postgres
+ depends_on:
+ system-update-debug-postgres:
+ condition: service_completed_successfully
+ frontend-debug-cassandra:
+ <<: *datahub-frontend-service-dev
+ container_name: datahub-frontend-dev
+ profiles:
+ - debug-cassandra
+ depends_on:
+ system-update-debug-cassandra:
+ condition: service_completed_successfully
+ frontend-debug-consumers:
+ <<: *datahub-frontend-service-dev
+ container_name: datahub-frontend-dev
+ profiles:
+ - debug-consumers
+ depends_on:
+ system-update-debug:
+ condition: service_completed_successfully
+ frontend-debug-neo4j:
+ <<: *datahub-frontend-service-dev
+ container_name: datahub-frontend-dev
+ profiles:
+ - debug-neo4j
+ depends_on:
+ system-update-debug-neo4j:
+ condition: service_completed_successfully
+ frontend-debug-elasticsearch:
+ <<: *datahub-frontend-service-dev
+ container_name: datahub-frontend-dev
+ profiles:
+ - debug-elasticsearch
+ depends_on:
+ system-update-debug-elasticsearch:
+ condition: service_completed_successfully
\ No newline at end of file
diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml
new file mode 100644
index 0000000000000..01602c8b906b9
--- /dev/null
+++ b/docker/profiles/docker-compose.gms.yml
@@ -0,0 +1,429 @@
+#################################
+# Common Environment Variables
+#################################
+x-primary-datastore-mysql-env: &primary-datastore-mysql-env
+ EBEAN_DATASOURCE_HOST: mysql:3306
+ EBEAN_DATASOURCE_URL: 'jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2'
+ EBEAN_DATASOURCE_DRIVER: com.mysql.jdbc.Driver
+
+x-primary-datastore-postgres-env: &primary-datastore-postgres-env
+ EBEAN_DATASOURCE_HOST: postgres:5432
+ EBEAN_DATASOURCE_URL: 'jdbc:postgresql://postgres:5432/datahub'
+ EBEAN_DATASOURCE_DRIVER: org.postgresql.Driver
+ EBEAN_POSTGRES_USE_AWS_IAM_AUTH: ${EBEAN_POSTGRES_USE_AWS_IAM_AUTH:-false}
+
+x-primary-datastore-cassandra-env: &primary-datastore-cassandra-env
+ CASSANDRA_DATASOURCE_USERNAME: cassandra
+ CASSANDRA_DATASOURCE_PASSWORD: cassandra
+ CASSANDRA_HOSTS: cassandra
+ CASSANDRA_PORT: 9042
+ CASSANDRA_DATASOURCE_HOST: 'cassandra:9042'
+ ENTITY_SERVICE_IMPL: cassandra
+
+x-graph-datastore-neo4j-env: &graph-datastore-neo4j-env
+ GRAPH_SERVICE_IMPL: neo4j
+ NEO4J_HOST: 'http://neo4j:7474'
+ NEO4J_URI: 'bolt://neo4j'
+ NEO4J_USERNAME: neo4j
+ NEO4J_PASSWORD: datahub
+x-graph-datastore-search-env: &graph-datastore-search-env
+ GRAPH_SERVICE_IMPL: elasticsearch
+
+x-search-datastore-elasticsearch-env: &search-datastore-env
+ ELASTICSEARCH_HOST: search
+ ELASTICSEARCH_PORT: 9200
+ ELASTICSEARCH_PROTOCOL: http
+ ELASTICSEARCH_USE_SSL: ${ELASTICSEARCH_USE_SSL:-false}
+
+x-kafka-env: &kafka-env
+ KAFKA_BOOTSTRAP_SERVER: kafka-broker:29092
+ # KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
+ SCHEMA_REGISTRY_TYPE: INTERNAL
+ KAFKA_SCHEMAREGISTRY_URL: http://datahub-gms:8080/schema-registry/api/
+
+x-datahub-quickstart-telemetry-env: &datahub-quickstart-telemetry-env
+ DATAHUB_SERVER_TYPE: ${DATAHUB_SERVER_TYPE:-quickstart}
+ DATAHUB_TELEMETRY_ENABLED: ${DATAHUB_TELEMETRY_ENABLED:-true}
+
+x-datahub-dev-telemetry-env: &datahub-dev-telemetry-env
+ DATAHUB_SERVER_TYPE: ${DATAHUB_SERVER_TYPE:-dev}
+ DATAHUB_TELEMETRY_ENABLED: ${DATAHUB_TELEMETRY_ENABLED:-true}
+
+#################################
+# System Update
+#################################
+x-datahub-system-update-service: &datahub-system-update-service
+ hostname: datahub-system-update
+ image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head}
+ command:
+ - -u
+ - SystemUpdate
+ env_file: datahub-upgrade/env/docker.env
+ environment: &datahub-system-update-env
+ <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *kafka-env]
+ SCHEMA_REGISTRY_SYSTEM_UPDATE: ${SCHEMA_REGISTRY_SYSTEM_UPDATE:-true}
+ SPRING_KAFKA_PROPERTIES_AUTO_REGISTER_SCHEMAS: ${SPRING_KAFKA_PROPERTIES_AUTO_REGISTER_SCHEMAS:-true}
+ SPRING_KAFKA_PROPERTIES_USE_LATEST_VERSION: ${SPRING_KAFKA_PROPERTIES_USE_LATEST_VERSION:-true}
+
+x-datahub-system-update-service-dev: &datahub-system-update-service-dev
+ <<: *datahub-system-update-service
+ image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:debug
+ ports:
+ - ${DATAHUB_MAPPED_UPGRADE_DEBUG_PORT:-5003}:5003
+ environment: &datahub-system-update-dev-env
+ <<: [*datahub-dev-telemetry-env, *datahub-system-update-env]
+ SKIP_ELASTICSEARCH_CHECK: false
+ REPROCESS_DEFAULT_BROWSE_PATHS_V2: ${REPROCESS_DEFAULT_BROWSE_PATHS_V2:-false}
+ JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5003'
+ volumes:
+ - ../../datahub-upgrade/build/libs/:/datahub/datahub-upgrade/bin/
+ - ../../metadata-models/src/main/resources/:/datahub/datahub-gms/resources
+ - ${HOME}/.datahub/plugins:/etc/datahub/plugins
+
+#################################
+# GMS
+#################################
+x-datahub-gms-service: &datahub-gms-service
+ hostname: datahub-gms
+ image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head}
+ ports:
+ - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
+ env_file: datahub-gms/env/docker.env
+ environment: &datahub-gms-env
+ <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env]
+ healthcheck:
+ test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health
+ start_period: 90s
+ interval: 1s
+ retries: 3
+ timeout: 5s
+ volumes:
+ - ${HOME}/.datahub/plugins:/etc/datahub/plugins
+
+x-datahub-gms-service-dev: &datahub-gms-service-dev
+ <<: *datahub-gms-service
+ image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:debug
+ ports:
+ - ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001
+ - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
+ environment: &datahub-gms-dev-env
+ <<: [*datahub-dev-telemetry-env, *datahub-gms-env]
+ SKIP_ELASTICSEARCH_CHECK: false
+ METADATA_SERVICE_AUTH_ENABLED: false
+ JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001'
+ BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false
+ SEARCH_SERVICE_ENABLE_CACHE: false
+ LINEAGE_SEARCH_CACHE_ENABLED: false
+ SHOW_BROWSE_V2: true
+ volumes:
+ - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh
+ - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml
+ - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-gms/scripts/prometheus-config.yaml
+ - ../../metadata-models/src/main/resources/:/datahub/datahub-gms/resources
+ - ../../metadata-service/war/build/libs/:/datahub/datahub-gms/bin
+ - ${HOME}/.datahub/plugins:/etc/datahub/plugins
+
+#################################
+# MAE Consumer
+#################################
+x-datahub-mae-consumer-service: &datahub-mae-consumer-service
+ hostname: datahub-mae-consumer
+ image: ${DATAHUB_MAE_CONSUMER_IMAGE:-linkedin/datahub-mae-consumer}:${DATAHUB_VERSION:-head}
+ ports:
+ - 9091:9091
+ env_file: datahub-mae-consumer/env/docker.env
+ environment: &datahub-mae-consumer-env
+ <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *kafka-env]
+
+x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev
+ <<: *datahub-mae-consumer-service
+ image: ${DATAHUB_MAE_CONSUMER_IMAGE:-linkedin/datahub-mae-consumer}:debug
+ environment:
+ <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env]
+ volumes:
+ - ./datahub-mae-consumer/start.sh:/datahub/datahub-mae-consumer/scripts/start.sh
+ - ../../metadata-models/src/main/resources/:/datahub/datahub-mae-consumer/resources
+ - ../../metadata-jobs/mae-consumer-job/build/libs/:/datahub/datahub-mae-consumer/bin/
+ - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-mae-consumer/scripts/prometheus-config.yaml
+
+#################################
+# MCE Consumer
+#################################
+x-datahub-mce-consumer-service: &datahub-mce-consumer-service
+ hostname: datahub-mce-consumer
+ image: ${DATAHUB_MCE_CONSUMER_IMAGE:-linkedin/datahub-mce-consumer}:${DATAHUB_VERSION:-head}
+ ports:
+ - 9090:9090
+ env_file: datahub-mce-consumer/env/docker.env
+ environment: &datahub-mce-consumer-env
+ <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env]
+
+x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev
+ <<: *datahub-mce-consumer-service
+ image: ${DATAHUB_MCE_CONSUMER_IMAGE:-linkedin/datahub-mce-consumer}:debug
+ environment:
+ <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env]
+ volumes:
+ - ./datahub-mce-consumer/start.sh:/datahub/datahub-mce-consumer/scripts/start.sh
+ - ../../metadata-jobs/mce-consumer-job/build/libs/:/datahub/datahub-mce-consumer/bin
+ - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-mce-consumer/scripts/prometheus-config.yaml
+
+services:
+ #################################
+ # System Update
+ #################################
+ system-update-quickstart:
+ <<: *datahub-system-update-service
+ container_name: system-update
+ profiles:
+ - quickstart
+ - quickstart-storage
+ - quickstart-consumers
+ - quickstart-frontend
+ - quickstart-backend
+ depends_on:
+ mysql-setup:
+ condition: service_completed_successfully
+ opensearch-setup:
+ condition: service_completed_successfully
+ kafka-setup:
+ condition: service_completed_successfully
+ system-update-quickstart-cassandra:
+ <<: *datahub-system-update-service
+ container_name: system-update
+ profiles:
+ - quickstart-cassandra
+ environment:
+ <<: [*primary-datastore-cassandra-env, *graph-datastore-neo4j-env, *datahub-system-update-env]
+ depends_on:
+ neo4j:
+ condition: service_healthy
+ cassandra-setup:
+ condition: service_completed_successfully
+ opensearch-setup:
+ condition: service_completed_successfully
+ kafka-setup:
+ condition: service_completed_successfully
+ system-update-quickstart-postgres:
+ <<: *datahub-system-update-service
+ container_name: system-update
+ profiles:
+ - quickstart-postgres
+ environment:
+ <<: [*primary-datastore-postgres-env, *datahub-system-update-env]
+ depends_on:
+ postgres-setup:
+ condition: service_completed_successfully
+ opensearch-setup:
+ condition: service_completed_successfully
+ kafka-setup:
+ condition: service_completed_successfully
+ system-update-debug:
+ <<: *datahub-system-update-service-dev
+ container_name: system-update-dev
+ profiles:
+ - debug
+ - debug-backend
+ - debug-consumers
+ depends_on:
+ mysql-setup-dev:
+ condition: service_completed_successfully
+ opensearch-setup-dev:
+ condition: service_completed_successfully
+ kafka-setup-dev:
+ condition: service_completed_successfully
+ system-update-debug-elasticsearch:
+ <<: *datahub-system-update-service-dev
+ container_name: system-update-dev
+ profiles:
+ - debug-elasticsearch
+ depends_on:
+ mysql-setup-dev:
+ condition: service_completed_successfully
+ elasticsearch-setup-dev:
+ condition: service_completed_successfully
+ kafka-setup-dev:
+ condition: service_completed_successfully
+ system-update-debug-postgres:
+ <<: *datahub-system-update-service-dev
+ container_name: system-update-dev
+ profiles:
+ - debug-postgres
+ environment:
+ <<: [*primary-datastore-postgres-env, *datahub-system-update-dev-env]
+ depends_on:
+ postgres-setup-dev:
+ condition: service_completed_successfully
+ opensearch-setup-dev:
+ condition: service_completed_successfully
+ kafka-setup-dev:
+ condition: service_completed_successfully
+ system-update-debug-cassandra:
+ <<: *datahub-system-update-service-dev
+ container_name: system-update-dev
+ profiles:
+ - debug-cassandra
+ environment:
+ <<: [*primary-datastore-cassandra-env, *datahub-system-update-dev-env]
+ depends_on:
+ cassandra-setup:
+ condition: service_completed_successfully
+ opensearch-setup-dev:
+ condition: service_completed_successfully
+ kafka-setup-dev:
+ condition: service_completed_successfully
+ system-update-debug-neo4j:
+ <<: *datahub-system-update-service-dev
+ container_name: system-update-dev
+ profiles:
+ - debug-neo4j
+ environment:
+ <<: [*graph-datastore-neo4j-env, *datahub-system-update-dev-env]
+ depends_on:
+ neo4j:
+ condition: service_healthy
+ opensearch-setup-dev:
+ condition: service_completed_successfully
+ kafka-setup-dev:
+ condition: service_completed_successfully
+ #################################
+ # GMS
+ #################################
+ datahub-gms-quickstart:
+ <<: *datahub-gms-service
+ profiles:
+ - quickstart
+ - quickstart-backend
+ container_name: datahub-gms
+ depends_on:
+ system-update-quickstart:
+ condition: service_completed_successfully
+ datahub-gms-quickstart-cassandra:
+ <<: *datahub-gms-service
+ profiles:
+ - quickstart-cassandra
+ container_name: datahub-gms
+ environment:
+ <<: [*primary-datastore-cassandra-env, *graph-datastore-neo4j-env, *datahub-gms-env]
+ depends_on:
+ system-update-quickstart-cassandra:
+ condition: service_completed_successfully
+ datahub-gms-quickstart-postgres:
+ <<: *datahub-gms-service
+ profiles:
+ - quickstart-postgres
+ container_name: datahub-gms
+ environment:
+ <<: [*primary-datastore-postgres-env, *datahub-gms-env]
+ depends_on:
+ system-update-quickstart-postgres:
+ condition: service_completed_successfully
+ datahub-gms-quickstart-consumers:
+ <<: *datahub-gms-service
+ profiles:
+ - quickstart-consumers
+ container_name: datahub-gms
+ environment:
+ <<: *datahub-gms-env
+ MAE_CONSUMER_ENABLED: false
+ MCE_CONSUMER_ENABLED: false
+ depends_on:
+ system-update-quickstart:
+ condition: service_completed_successfully
+ datahub-gms-debug:
+ <<: *datahub-gms-service-dev
+ profiles:
+ - debug
+ - debug-backend
+ container_name: datahub-gms-dev
+ depends_on:
+ system-update-debug:
+ condition: service_completed_successfully
+ datahub-gms-debug-postgres:
+ <<: *datahub-gms-service-dev
+ profiles:
+ - debug-postgres
+ environment:
+ <<: [*primary-datastore-postgres-env, *datahub-gms-dev-env]
+ container_name: datahub-gms-dev
+ depends_on:
+ system-update-debug-postgres:
+ condition: service_completed_successfully
+ datahub-gms-debug-cassandra:
+ <<: *datahub-gms-service-dev
+ profiles:
+ - debug-cassandra
+ environment:
+ <<: [*primary-datastore-cassandra-env, *datahub-gms-dev-env]
+ container_name: datahub-gms-dev
+ depends_on:
+ system-update-debug-cassandra:
+ condition: service_completed_successfully
+ datahub-gms-debug-consumers:
+ <<: *datahub-gms-service-dev
+ profiles:
+ - debug-consumers
+ environment:
+ <<: *datahub-gms-dev-env
+ MAE_CONSUMER_ENABLED: false
+ MCE_CONSUMER_ENABLED: false
+ container_name: datahub-gms-dev
+ depends_on:
+ system-update-debug:
+ condition: service_completed_successfully
+ datahub-gms-debug-neo4j:
+ <<: *datahub-gms-service-dev
+ profiles:
+ - debug-neo4j
+ environment:
+ <<: [*graph-datastore-neo4j-env, *datahub-gms-dev-env]
+ container_name: datahub-gms-dev
+ depends_on:
+ system-update-debug-neo4j:
+ condition: service_completed_successfully
+ datahub-gms-debug-elasticsearch:
+ <<: *datahub-gms-service-dev
+ profiles:
+ - debug-elasticsearch
+ container_name: datahub-gms-dev
+ depends_on:
+ system-update-debug-elasticsearch:
+ condition: service_completed_successfully
+ #################################
+ # MAE Consumer
+ #################################
+ datahub-mae-consumer-quickstart-consumers:
+ <<: *datahub-mae-consumer-service
+ profiles:
+ - quickstart-consumers
+ container_name: datahub-mae-consumer
+ depends_on:
+ datahub-gms-quickstart-consumers:
+ condition: service_healthy
+ datahub-mae-consumer-quickstart-consumers-dev:
+ <<: *datahub-mae-consumer-service-dev
+ profiles:
+ - debug-consumers
+ container_name: datahub-mae-consumer-dev
+ depends_on:
+ datahub-gms-debug-consumers:
+ condition: service_healthy
+ #################################
+ # MCE Consumer
+ #################################
+ datahub-mce-consumer-quickstart-consumers:
+ <<: *datahub-mce-consumer-service
+ profiles:
+ - quickstart-consumers
+ container_name: datahub-mce-consumer
+ depends_on:
+ datahub-gms-quickstart-consumers:
+ condition: service_healthy
+ datahub-mce-consumer-quickstart-consumers-dev:
+ <<: *datahub-mce-consumer-service-dev
+ profiles:
+ - debug-consumers
+ container_name: datahub-mce-consumer-dev
+ depends_on:
+ datahub-gms-debug-consumers:
+ condition: service_healthy
\ No newline at end of file
diff --git a/docker/profiles/docker-compose.prerequisites.yml b/docker/profiles/docker-compose.prerequisites.yml
new file mode 100644
index 0000000000000..d90d4a252f993
--- /dev/null
+++ b/docker/profiles/docker-compose.prerequisites.yml
@@ -0,0 +1,387 @@
+# Common environment
+x-search-datastore-search: &search-datastore-environment
+ ELASTICSEARCH_HOST: search
+ ELASTICSEARCH_PORT: 9200
+ ELASTICSEARCH_PROTOCOL: http
+ ELASTICSEARCH_USE_SSL: ${ELASTICSEARCH_USE_SSL:-false}
+
+# Primary Storage Profiles
+x-mysql-profiles-quickstart: &mysql-profiles-quickstart
+ - quickstart
+ - quickstart-backend
+ - quickstart-frontend
+ - quickstart-storage
+ - quickstart-consumers
+x-mysql-profiles-dev: &mysql-profiles-dev
+ - debug
+ - debug-frontend
+ - debug-backend
+ - debug-consumers
+ - debug-neo4j
+ - debug-elasticsearch
+x-mysql-profiles: &mysql-profiles
+ - quickstart
+ - quickstart-backend
+ - quickstart-frontend
+ - quickstart-storage
+ - quickstart-consumers
+ - debug
+ - debug-frontend
+ - debug-backend
+ - debug-consumers
+ - debug-neo4j
+ - debug-elasticsearch
+
+x-postgres-profiles-quickstart: &postgres-profiles-quickstart
+ - quickstart-postgres
+x-postgres-profiles-dev: &postgres-profiles-dev
+ - debug-postgres
+x-postgres-profiles: &postgres-profiles
+ - quickstart-postgres
+ - debug-postgres
+
+x-cassandra-profiles: &cassandra-profiles
+ - quickstart-cassandra
+ - debug-cassandra
+
+# Graph Storage Profiles
+x-neo4j-profiles: &neo4j-profiles
+ - quickstart-cassandra
+ - debug-neo4j
+
+# Search Storage Profiles
+x-elasticsearch-profiles: &elasticsearch-profiles
+ - debug-elasticsearch
+
+x-opensearch-profiles-quickstart: &opensearch-profiles-quickstart
+ - quickstart
+ - quickstart-backend
+ - quickstart-frontend
+ - quickstart-storage
+ - quickstart-cassandra
+ - quickstart-postgres
+ - quickstart-consumers
+x-opensearch-profiles-dev: &opensearch-profiles-dev
+ - debug
+ - debug-frontend
+ - debug-backend
+ - debug-postgres
+ - debug-cassandra
+ - debug-consumers
+ - debug-neo4j
+x-opensearch-profiles: &opensearch-profiles
+ - quickstart
+ - quickstart-backend
+ - quickstart-frontend
+ - quickstart-storage
+ - quickstart-cassandra
+ - quickstart-postgres
+ - quickstart-consumers
+ - debug
+ - debug-frontend
+ - debug-backend
+ - debug-postgres
+ - debug-cassandra
+ - debug-consumers
+ - debug-neo4j
+
+# Debug vs Quickstart Profiles
+x-profiles-quickstart: &profiles-quickstart
+ - quickstart
+ - quickstart-backend
+ - quickstart-frontend
+ - quickstart-storage
+ - quickstart-cassandra
+ - quickstart-postgres
+ - quickstart-consumers
+x-profiles-dev: &profiles-dev
+ - debug
+ - debug-frontend
+ - debug-backend
+ - debug-postgres
+ - debug-cassandra
+ - debug-consumers
+ - debug-neo4j
+ - debug-elasticsearch
+
+services:
+ mysql:
+ container_name: mysql
+ profiles: *mysql-profiles
+ hostname: mysql
+ image: mysql:${DATAHUB_MYSQL_VERSION:-8.2}
+ command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=caching_sha2_password
+ ports:
+ - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306
+ env_file: mysql/env/docker.env
+ restart: on-failure
+ healthcheck:
+ test: mysqladmin ping -h mysql -u $$MYSQL_USER --password=$$MYSQL_PASSWORD
+ start_period: 10s
+ interval: 1s
+ retries: 3
+ timeout: 5s
+ volumes:
+ - ./mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
+ - mysqldata:/var/lib/mysql
+ mysql-setup: &mysql-setup
+ container_name: mysql-setup
+ profiles: *mysql-profiles-quickstart
+ hostname: mysql-setup
+ image: ${DATAHUB_MYSQL_SETUP_IMAGE:-acryldata/datahub-mysql-setup}:${DATAHUB_VERSION:-head}
+ env_file: mysql-setup/env/docker.env
+ depends_on:
+ mysql:
+ condition: service_healthy
+ labels:
+ datahub_setup_job: true
+ mysql-setup-dev:
+ <<: *mysql-setup
+ container_name: mysql-setup-dev
+ profiles: *mysql-profiles-dev
+ image: ${DATAHUB_MYSQL_SETUP_IMAGE:-acryldata/datahub-mysql-setup}:debug
+ postgres:
+ container_name: postgres
+ profiles: *postgres-profiles
+ hostname: postgres
+ image: postgres:${DATAHUB_POSTGRES_VERSION:-15.5}
+ env_file: postgres/env/docker.env
+ ports:
+ - '5432:5432'
+ restart: on-failure
+ healthcheck:
+ test: [ "CMD-SHELL", "pg_isready" ]
+ start_period: 20s
+ interval: 2s
+ timeout: 10s
+ retries: 5
+ volumes:
+ - ./postgres/init.sql:/docker-entrypoint-initdb.d/init.sql
+ - postgresdata:/var/lib/postgresql/data
+ postgres-setup: &postgres-setup
+ container_name: postgres-setup
+ profiles: *postgres-profiles-quickstart
+ hostname: postgres-setup
+ image: ${DATAHUB_POSTGRES_SETUP_IMAGE:-acryldata/datahub-postgres-setup}:${DATAHUB_VERSION:-head}
+ env_file: postgres-setup/env/docker.env
+ depends_on:
+ postgres:
+ condition: service_healthy
+ labels:
+ datahub_setup_job: true
+ postgres-setup-dev:
+ <<: *postgres-setup
+ container_name: postgres-setup-dev
+ profiles: *postgres-profiles-dev
+ image: ${DATAHUB_POSTGRES_SETUP_IMAGE:-acryldata/datahub-postgres-setup}:debug
+ cassandra:
+ container_name: cassandra
+ profiles: *cassandra-profiles
+ hostname: cassandra
+ image: cassandra:4.1
+ ports:
+ - 9042:9042
+ healthcheck:
+ test: cqlsh -u cassandra -p cassandra -e 'describe keyspaces'
+ interval: 15s
+ timeout: 10s
+ retries: 10
+ volumes:
+ - cassandradata:/var/lib/cassandra
+ cassandra-setup:
+ container_name: cassandra-setup
+ profiles: *cassandra-profiles
+ hostname: cassandra-setup
+ image: cassandra:4.1
+ command: /bin/bash -c "cqlsh cassandra -f /init.cql"
+ depends_on:
+ cassandra:
+ condition: service_healthy
+ volumes:
+ - ./cassandra/init.cql:/init.cql
+ labels:
+ datahub_setup_job: true
+ neo4j:
+ container_name: neo4j
+ profiles: *neo4j-profiles
+ hostname: neo4j
+ image: neo4j:4.4.28-community
+ ports:
+ - ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474
+ - ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687
+ env_file: neo4j/env/docker.env
+ healthcheck:
+ test: wget http://neo4j:$${DATAHUB_NEO4J_HTTP_PORT:-7474}
+ start_period: 5s
+ interval: 1s
+ retries: 5
+ timeout: 5s
+ volumes:
+ - neo4jdata:/data
+ kafka-broker:
+ container_name: kafka-broker
+ hostname: kafka-broker
+ image: confluentinc/cp-kafka:7.4.0
+ command:
+ - /bin/bash
+ - -c
+ - |
+ # Generate KRaft clusterID
+ file_path="/var/lib/kafka/data/clusterID"
+
+ if [ ! -f "$$file_path" ]; then
+ /bin/kafka-storage random-uuid > $$file_path
+ echo "Cluster id has been created..."
+ # KRaft required step: Format the storage directory with a new cluster ID
+ kafka-storage format --ignore-formatted -t $$(cat "$$file_path") -c /etc/kafka/kafka.properties
+ fi
+
+ export CLUSTER_ID=$$(cat "$$file_path")
+ echo "CLUSTER_ID=$$CLUSTER_ID"
+
+ /etc/confluent/docker/run
+ ports:
+ - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
+ env_file: kafka-broker/env/docker.env
+ environment:
+ KAFKA_NODE_ID: 1
+ KAFKA_ADVERTISED_LISTENERS: BROKER://kafka-broker:29092,EXTERNAL://kafka-broker:9092
+ KAFKA_LISTENERS: BROKER://kafka-broker:29092,EXTERNAL://kafka-broker:9092,CONTROLLER://kafka-broker:39092
+ KAFKA_INTER_BROKER_LISTENER_NAME: BROKER
+ KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
+ KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,BROKER:PLAINTEXT,EXTERNAL:PLAINTEXT
+ KAFKA_PROCESS_ROLES: controller, broker
+ KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka-broker:39092
+ # https://github.com/confluentinc/cp-all-in-one/issues/120
+ KAFKA_LOG4J_LOGGERS: 'org.apache.kafka.image.loader.MetadataLoader=WARN'
+ KAFKA_ZOOKEEPER_CONNECT: null
+ healthcheck:
+ test: nc -z kafka-broker $${DATAHUB_KAFKA_BROKER_PORT:-9092}
+ start_period: 60s
+ interval: 1s
+ retries: 5
+ timeout: 5s
+ volumes:
+ - broker:/var/lib/kafka/data/
+ kafka-setup: &kafka-setup
+ container_name: kafka-setup
+ profiles: *profiles-quickstart
+ hostname: kafka-setup
+ image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head}
+ env_file: kafka-setup/env/docker.env
+ environment: &kafka-setup-env
+ DATAHUB_PRECREATE_TOPICS: ${DATAHUB_PRECREATE_TOPICS:-false}
+ KAFKA_BOOTSTRAP_SERVER: kafka-broker:29092
+ USE_CONFLUENT_SCHEMA_REGISTRY: false
+ depends_on:
+ kafka-broker:
+ condition: service_healthy
+ labels:
+ datahub_setup_job: true
+ kafka-setup-dev:
+ <<: *kafka-setup
+ container_name: kafka-setup-dev
+ profiles: *profiles-dev
+ environment:
+ <<: *kafka-setup-env
+ DATAHUB_PRECREATE_TOPICS: ${DATAHUB_PRECREATE_TOPICS:-true}
+ image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:debug
+ elasticsearch:
+ container_name: elasticsearch
+ profiles: *elasticsearch-profiles
+ hostname: search
+ image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1}
+ ports:
+ - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
+ env_file: elasticsearch/env/docker.env
+ environment:
+ - discovery.type=single-node
+ - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false}
+ deploy:
+ resources:
+ limits:
+ memory: 1G
+ healthcheck:
+ test: curl -sS --fail http://search:$${DATAHUB_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s
+ start_period: 20s
+ interval: 1s
+ retries: 3
+ timeout: 5s
+ volumes:
+ - esdata:/usr/share/elasticsearch/data
+ elasticsearch-setup-dev: &elasticsearch-setup-dev
+ container_name: elasticsearch-setup-dev
+ image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:debug
+ profiles: *elasticsearch-profiles
+ hostname: elasticsearch-setup
+ env_file: elasticsearch-setup/env/docker.env
+ environment:
+ <<: *search-datastore-environment
+ USE_AWS_ELASTICSEARCH: ${USE_AWS_ELASTICSEARCH:-false}
+ depends_on:
+ elasticsearch:
+ condition: service_healthy
+ labels:
+ datahub_setup_job: true
+ opensearch:
+ container_name: opensearch
+ profiles: *opensearch-profiles
+ hostname: search
+ image: ${DATAHUB_SEARCH_IMAGE:-opensearchproject/opensearch}:${DATAHUB_SEARCH_TAG:-2.9.0}
+ ports:
+ - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
+ env_file: elasticsearch/env/docker.env
+ environment:
+ - discovery.type=single-node
+ - ${XPACK_SECURITY_ENABLED:-plugins.security.disabled=true}
+ deploy:
+ resources:
+ limits:
+ memory: 1G
+ healthcheck:
+ test: curl -sS --fail http://search:$${DATAHUB_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s
+ start_period: 20s
+ interval: 1s
+ retries: 3
+ timeout: 5s
+ volumes:
+ - osdata:/usr/share/elasticsearch/data
+ opensearch-setup: &opensearch-setup
+ <<: *elasticsearch-setup-dev
+ container_name: opensearch-setup
+ profiles: *opensearch-profiles-quickstart
+ hostname: opensearch-setup
+ image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head}
+ environment:
+ <<: *search-datastore-environment
+ USE_AWS_ELASTICSEARCH: ${USE_AWS_ELASTICSEARCH:-true}
+ depends_on:
+ opensearch:
+ condition: service_healthy
+ labels:
+ datahub_setup_job: true
+ opensearch-setup-dev:
+ <<: *opensearch-setup
+ container_name: opensearch-setup-dev
+ profiles: *opensearch-profiles-dev
+ hostname: opensearch-setup-dev
+ image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:debug
+ environment:
+ <<: *search-datastore-environment
+ USE_AWS_ELASTICSEARCH: ${USE_AWS_ELASTICSEARCH:-true}
+ depends_on:
+ opensearch:
+ condition: service_healthy
+
+networks:
+ default:
+ name: datahub_network
+
+volumes:
+ neo4jdata:
+ esdata:
+ osdata:
+ broker:
+ mysqldata:
+ cassandradata:
+ postgresdata:
diff --git a/docker/profiles/docker-compose.yml b/docker/profiles/docker-compose.yml
new file mode 100644
index 0000000000000..534ca9702e2d7
--- /dev/null
+++ b/docker/profiles/docker-compose.yml
@@ -0,0 +1,13 @@
+---
+version: '3.9'
+name: datahub
+
+include:
+ # Contains storage layers: i.e. mysql, kafka, elasticsearch
+ - docker-compose.prerequisites.yml
+ # Actions pod
+ - docker-compose.actions.yml
+ # Frontend
+ - docker-compose.frontend.yml
+ # Remaining components: i.e. gms, system-update, consumers
+ - docker-compose.gms.yml
diff --git a/docker/profiles/elasticsearch b/docker/profiles/elasticsearch
new file mode 120000
index 0000000000000..7712783b3e8d6
--- /dev/null
+++ b/docker/profiles/elasticsearch
@@ -0,0 +1 @@
+../elasticsearch
\ No newline at end of file
diff --git a/docker/profiles/elasticsearch-setup b/docker/profiles/elasticsearch-setup
new file mode 120000
index 0000000000000..670a10e8c3786
--- /dev/null
+++ b/docker/profiles/elasticsearch-setup
@@ -0,0 +1 @@
+../elasticsearch-setup
\ No newline at end of file
diff --git a/docker/profiles/kafka-broker b/docker/profiles/kafka-broker
new file mode 120000
index 0000000000000..23b248a4e0bbd
--- /dev/null
+++ b/docker/profiles/kafka-broker
@@ -0,0 +1 @@
+../broker
\ No newline at end of file
diff --git a/docker/profiles/kafka-setup b/docker/profiles/kafka-setup
new file mode 120000
index 0000000000000..35b9c167ac26e
--- /dev/null
+++ b/docker/profiles/kafka-setup
@@ -0,0 +1 @@
+../kafka-setup
\ No newline at end of file
diff --git a/docker/profiles/monitoring b/docker/profiles/monitoring
new file mode 120000
index 0000000000000..1371b42ae4593
--- /dev/null
+++ b/docker/profiles/monitoring
@@ -0,0 +1 @@
+../monitoring
\ No newline at end of file
diff --git a/docker/profiles/mysql b/docker/profiles/mysql
new file mode 120000
index 0000000000000..057b59f760165
--- /dev/null
+++ b/docker/profiles/mysql
@@ -0,0 +1 @@
+../mysql
\ No newline at end of file
diff --git a/docker/profiles/mysql-setup b/docker/profiles/mysql-setup
new file mode 120000
index 0000000000000..f9199ec3fc58f
--- /dev/null
+++ b/docker/profiles/mysql-setup
@@ -0,0 +1 @@
+../mysql-setup
\ No newline at end of file
diff --git a/docker/profiles/neo4j b/docker/profiles/neo4j
new file mode 120000
index 0000000000000..0d4849d989d43
--- /dev/null
+++ b/docker/profiles/neo4j
@@ -0,0 +1 @@
+../neo4j
\ No newline at end of file
diff --git a/docker/profiles/postgres b/docker/profiles/postgres
new file mode 120000
index 0000000000000..be56a57bd0ab8
--- /dev/null
+++ b/docker/profiles/postgres
@@ -0,0 +1 @@
+../postgres
\ No newline at end of file
diff --git a/docker/profiles/postgres-setup b/docker/profiles/postgres-setup
new file mode 120000
index 0000000000000..38f51721feacb
--- /dev/null
+++ b/docker/profiles/postgres-setup
@@ -0,0 +1 @@
+../postgres-setup/
\ No newline at end of file
From a29fce9d823dee31480e2efee1dc1bf16fd4c739 Mon Sep 17 00:00:00 2001
From: Nate Bryant
Date: Tue, 19 Dec 2023 15:08:55 -0500
Subject: [PATCH 13/25] Adds urnBasedPagination option to datahub-upgrade
RestoreIndices (#9232)
Co-authored-by: RyanHolstien
---
.../restoreindices/RestoreIndices.java | 1 +
.../upgrade/restoreindices/SendMAEStep.java | 62 ++++++++++++++++---
docker/datahub-upgrade/README.md | 12 +++-
.../metadata/entity/EntityServiceImpl.java | 2 +
.../metadata/entity/ebean/EbeanAspectDao.java | 22 ++++++-
.../restoreindices/RestoreIndicesArgs.java | 8 +++
.../restoreindices/RestoreIndicesResult.java | 2 +
7 files changed, 96 insertions(+), 13 deletions(-)
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/RestoreIndices.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/RestoreIndices.java
index d38685553dff2..f46bb9b05624d 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/RestoreIndices.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/RestoreIndices.java
@@ -24,6 +24,7 @@ public class RestoreIndices implements Upgrade {
public static final String WRITER_POOL_SIZE = "WRITER_POOL_SIZE";
public static final String URN_ARG_NAME = "urn";
public static final String URN_LIKE_ARG_NAME = "urnLike";
+ public static final String URN_BASED_PAGINATION_ARG_NAME = "urnBasedPagination";
public static final String STARTING_OFFSET_ARG_NAME = "startingOffset";
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java
index ce59cf2edb84e..574b1f08b5f54 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java
@@ -31,6 +31,7 @@ public class SendMAEStep implements UpgradeStep {
private static final int DEFAULT_STARTING_OFFSET = 0;
private static final int DEFAULT_THREADS = 1;
+ private static final boolean DEFAULT_URN_BASED_PAGINATION = false;
private final Database _server;
private final EntityService _entityService;
@@ -89,6 +90,7 @@ private RestoreIndicesArgs getArgs(UpgradeContext context) {
result.numThreads = getThreadCount(context.parsedArgs());
result.batchDelayMs = getBatchDelayMs(context.parsedArgs());
result.start = getStartingOffset(context.parsedArgs());
+ result.urnBasedPagination = getUrnBasedPagination(context.parsedArgs());
if (containsKey(context.parsedArgs(), RestoreIndices.ASPECT_NAME_ARG_NAME)) {
result.aspectName = context.parsedArgs().get(RestoreIndices.ASPECT_NAME_ARG_NAME).get();
}
@@ -140,18 +142,49 @@ public Function executable() {
List> futures = new ArrayList<>();
startTime = System.currentTimeMillis();
- while (start < rowCount) {
- args = args.clone();
- args.start = start;
- futures.add(executor.submit(new KafkaJob(context, args)));
- start = start + args.batchSize;
- }
- while (futures.size() > 0) {
- List tmpResults = iterateFutures(futures);
- for (RestoreIndicesResult tmpResult : tmpResults) {
- reportStats(context, finalJobResult, tmpResult, rowCount, startTime);
+ if (args.urnBasedPagination) {
+ RestoreIndicesResult previousResult = null;
+ int rowsProcessed = 1;
+ while (rowsProcessed > 0) {
+ args = args.clone();
+ if (previousResult != null) {
+ args.lastUrn = previousResult.lastUrn;
+ args.lastAspect = previousResult.lastAspect;
+ }
+ args.start = start;
+ context
+ .report()
+ .addLine(
+ String.format(
+ "Getting next batch of urns + aspects, starting with %s - %s",
+ args.lastUrn, args.lastAspect));
+ Future future = executor.submit(new KafkaJob(context, args));
+ try {
+ RestoreIndicesResult result = future.get();
+ reportStats(context, finalJobResult, result, rowCount, startTime);
+ previousResult = result;
+ rowsProcessed = result.rowsMigrated + result.ignored;
+ context.report().addLine(String.format("Rows processed this loop %d", rowsProcessed));
+ start += args.batchSize;
+ } catch (InterruptedException | ExecutionException e) {
+ return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.FAILED);
+ }
+ }
+ } else {
+ while (start < rowCount) {
+ args = args.clone();
+ args.start = start;
+ futures.add(executor.submit(new KafkaJob(context, args)));
+ start = start + args.batchSize;
+ }
+ while (futures.size() > 0) {
+ List tmpResults = iterateFutures(futures);
+ for (RestoreIndicesResult tmpResult : tmpResults) {
+ reportStats(context, finalJobResult, tmpResult, rowCount, startTime);
+ }
}
}
+
executor.shutdown();
if (finalJobResult.rowsMigrated != rowCount) {
float percentFailed = 0.0f;
@@ -233,6 +266,15 @@ private int getThreadCount(final Map> parsedArgs) {
return getInt(parsedArgs, DEFAULT_THREADS, RestoreIndices.NUM_THREADS_ARG_NAME);
}
+ private boolean getUrnBasedPagination(final Map> parsedArgs) {
+ boolean urnBasedPagination = DEFAULT_URN_BASED_PAGINATION;
+ if (containsKey(parsedArgs, RestoreIndices.URN_BASED_PAGINATION_ARG_NAME)) {
+ urnBasedPagination =
+ Boolean.parseBoolean(parsedArgs.get(RestoreIndices.URN_BASED_PAGINATION_ARG_NAME).get());
+ }
+ return urnBasedPagination;
+ }
+
private int getInt(
final Map> parsedArgs, int defaultVal, String argKey) {
int result = defaultVal;
diff --git a/docker/datahub-upgrade/README.md b/docker/datahub-upgrade/README.md
index 0d019971604d6..9c96114cdb2dd 100644
--- a/docker/datahub-upgrade/README.md
+++ b/docker/datahub-upgrade/README.md
@@ -15,8 +15,16 @@ to metadata_aspect_v2 table. Arguments:
2. **NoCodeDataMigrationCleanup**: Cleanses graph index, search index, and key-value store of legacy DataHub data (metadata_aspect table) once
the No Code Data Migration has completed successfully. No arguments.
-3. **RestoreIndices**: Restores indices by fetching the latest version of each aspect and producing MAE
-
+3. **RestoreIndices**: Restores indices by fetching the latest version of each aspect and producing MAE. Arguments:
+ - *batchSize* (Optional): The number of rows to migrate at a time. Defaults to 1000.
+ - *batchDelayMs* (Optional): The number of milliseconds of delay between migrated batches. Used for rate limiting. Defaults to 250.
+ - *numThreads* (Optional): The number of threads to use, defaults to 1. Note that this is not used if `urnBasedPagination` is true.
+ - *aspectName* (Optional): The aspect name for producing events.
+ - *urn* (Optional): The urn for producing events.
+ - *urnLike* (Optional): The urn pattern for producing events, using `%` as a wild card
+ - *urnBasedPagination* (Optional): Paginate the SQL results using the urn + aspect string instead of `OFFSET`. Defaults to false,
+ though should improve performance for large amounts of data.
+
4. **RestoreBackup**: Restores the storage stack from a backup of the local database
## Environment Variables
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
index a333839416556..7bd8e763cdc27 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
@@ -1161,6 +1161,7 @@ public RestoreIndicesResult restoreIndices(
Urn urn;
try {
urn = Urn.createFromString(aspect.getKey().getUrn());
+ result.lastUrn = urn.toString();
} catch (Exception e) {
logger.accept(
String.format(
@@ -1188,6 +1189,7 @@ public RestoreIndicesResult restoreIndices(
result.timeEntityRegistryCheckMs += System.currentTimeMillis() - startTime;
startTime = System.currentTimeMillis();
final String aspectName = aspect.getKey().getAspect();
+ result.lastAspect = aspectName;
// 3. Verify that the aspect is a valid aspect associated with the entity
AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
index b2b47c1d5ba32..26946890daa3b 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
@@ -477,11 +477,31 @@ public PagedList getPagedAspects(final RestoreIndicesArgs args) {
if (args.urnLike != null) {
exp = exp.like(EbeanAspectV2.URN_COLUMN, args.urnLike);
}
+
+ int start = args.start;
+ if (args.urnBasedPagination) {
+ start = 0;
+ if (args.lastUrn != null && !args.lastUrn.isEmpty()) {
+ exp = exp.where().ge(EbeanAspectV2.URN_COLUMN, args.lastUrn);
+
+ // To prevent processing the same aspect multiple times in a restore, it compares against
+ // the last aspect if the urn matches the last urn
+ if (args.lastAspect != null && !args.lastAspect.isEmpty()) {
+ exp =
+ exp.where()
+ .and()
+ .or()
+ .ne(EbeanAspectV2.URN_COLUMN, args.lastUrn)
+ .gt(EbeanAspectV2.ASPECT_COLUMN, args.lastAspect);
+ }
+ }
+ }
+
return exp.orderBy()
.asc(EbeanAspectV2.URN_COLUMN)
.orderBy()
.asc(EbeanAspectV2.ASPECT_COLUMN)
- .setFirstRow(args.start)
+ .setFirstRow(start)
.setMaxRows(args.batchSize)
.findPagedList();
}
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java
index d8fcbe0b7d44d..e50b44b7f0eca 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java
@@ -11,6 +11,9 @@ public class RestoreIndicesArgs implements Cloneable {
public String aspectName;
public String urn;
public String urnLike;
+ public Boolean urnBasedPagination = false;
+ public String lastUrn = "";
+ public String lastAspect = "";
@Override
public RestoreIndicesArgs clone() {
@@ -51,4 +54,9 @@ public RestoreIndicesArgs setBatchSize(Integer batchSize) {
}
return this;
}
+
+ public RestoreIndicesArgs setUrnBasedPagination(Boolean urnBasedPagination) {
+ this.urnBasedPagination = urnBasedPagination;
+ return this;
+ }
}
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesResult.java
index 8479338660db0..a270cf4548bed 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesResult.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesResult.java
@@ -13,4 +13,6 @@ public class RestoreIndicesResult {
public long aspectCheckMs = 0;
public long createRecordMs = 0;
public long sendMessageMs = 0;
+ public String lastUrn = "";
+ public String lastAspect = "";
}
From 3777730d782bc1069f7752f74a199aa6447be0d0 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Tue, 19 Dec 2023 15:30:47 -0600
Subject: [PATCH 14/25] fix(quickstart): force strings for mysql version
(#9485)
---
docker/quickstart/quickstart_version_mapping.yaml | 8 ++++----
.../src/datahub/cli/quickstart_versioning.py | 4 ++--
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/docker/quickstart/quickstart_version_mapping.yaml b/docker/quickstart/quickstart_version_mapping.yaml
index 9948bd55fdc0b..b08cfda175aa9 100644
--- a/docker/quickstart/quickstart_version_mapping.yaml
+++ b/docker/quickstart/quickstart_version_mapping.yaml
@@ -23,7 +23,7 @@ quickstart_version_map:
default:
composefile_git_ref: master
docker_tag: head
- mysql_tag: 5.7
+ mysql_tag: "5.7"
# default: # Use this to pin default to a specific version.
# composefile_git_ref: fd1bd51541a132017a648f4a2f037eec8f70ba26 # v0.10.0 + quickstart compose file fixes
# docker_tag: v0.10.0
@@ -31,19 +31,19 @@ quickstart_version_map:
head:
composefile_git_ref: master
docker_tag: head
- mysql_tag: 5.7
+ mysql_tag: "5.7"
# v0.13.0 we upgraded MySQL image for EOL
v0.13.0:
composefile_git_ref: master
docker_tag: head
- mysql_tag: 8.2
+ mysql_tag: "8.2"
# v0.9.6 images contain security vulnerabilities
v0.9.6:
composefile_git_ref: v0.9.6.1
docker_tag: v0.9.6.1
- mysql_tag: 5.7
+ mysql_tag: "5.7"
# If stable is not defined the latest released version will be used.
# stable:
diff --git a/metadata-ingestion/src/datahub/cli/quickstart_versioning.py b/metadata-ingestion/src/datahub/cli/quickstart_versioning.py
index be7439f330dfb..1c3ce93c1f788 100644
--- a/metadata-ingestion/src/datahub/cli/quickstart_versioning.py
+++ b/metadata-ingestion/src/datahub/cli/quickstart_versioning.py
@@ -94,7 +94,7 @@ def fetch_quickstart_config(cls) -> "QuickstartVersionMappingConfig":
try:
release = cls._fetch_latest_version()
config.quickstart_version_map["stable"] = QuickstartExecutionPlan(
- composefile_git_ref=release, docker_tag=release, mysql_tag=release
+ composefile_git_ref=release, docker_tag=release, mysql_tag="5.7"
)
except Exception:
click.echo(
@@ -123,7 +123,7 @@ def get_quickstart_execution_plan(
QuickstartExecutionPlan(
composefile_git_ref=composefile_git_ref,
docker_tag=docker_tag,
- mysql_tag=mysql_tag,
+ mysql_tag=str(mysql_tag),
),
)
# new CLI version is downloading the composefile corresponding to the requested version
From 76be5173b292b936216aad1409090b70615a78f8 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Tue, 19 Dec 2023 15:52:59 -0600
Subject: [PATCH 15/25] fix(docker): fix frontend dev docker path (#9488)
---
docker/docker-compose.dev.yml | 2 +-
docker/profiles/docker-compose.frontend.yml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml
index 774c4e17bee21..a69fb977a3417 100644
--- a/docker/docker-compose.dev.yml
+++ b/docker/docker-compose.dev.yml
@@ -24,7 +24,7 @@ services:
- JAVA_TOOL_OPTIONS=-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5002
- DATAHUB_ANALYTICS_ENABLED=${DATAHUB_ANALYTICS_ENABLED:-true}
volumes:
- - ../datahub-frontend/build/stage/playBinary:/datahub-frontend
+ - ../datahub-frontend/build/stage/main:/datahub-frontend
datahub-gms:
image: linkedin/datahub-gms:debug
ports:
diff --git a/docker/profiles/docker-compose.frontend.yml b/docker/profiles/docker-compose.frontend.yml
index 2b82829648dac..80cb4e7b4b596 100644
--- a/docker/profiles/docker-compose.frontend.yml
+++ b/docker/profiles/docker-compose.frontend.yml
@@ -21,7 +21,7 @@ x-datahub-frontend-service-dev: &datahub-frontend-service-dev
JAVA_TOOL_OPTIONS: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5002
DATAHUB_ANALYTICS_ENABLED: ${DATAHUB_ANALYTICS_ENABLED:-true}
volumes:
- - ../../datahub-frontend/build/stage/playBinary:/datahub-frontend
+ - ../../datahub-frontend/build/stage/main:/datahub-frontend
services:
frontend-quickstart:
From 16d3df620f07c4d41118be9c8f38dc0cf46df76f Mon Sep 17 00:00:00 2001
From: Salman-Apptware <101426513+Salman-Apptware@users.noreply.github.com>
Date: Wed, 20 Dec 2023 16:32:52 +0530
Subject: [PATCH 16/25] fix(ui): Tab doesn't represent the page you are on for
non-data asset pages (#9468)
---
datahub-web-react/src/app/AppProviders.tsx | 13 ++++----
.../src/app/entity/group/GroupInfoSideBar.tsx | 17 +++++++++++
.../src/app/entity/user/UserInfoSideBar.tsx | 19 +++++++++++-
.../src/app/search/SearchablePage.tsx | 27 +++++++++++++++++
.../src/app/shared/BrowserTabTitleContext.tsx | 30 +++++++++++++++++++
5 files changed, 100 insertions(+), 6 deletions(-)
create mode 100644 datahub-web-react/src/app/shared/BrowserTabTitleContext.tsx
diff --git a/datahub-web-react/src/app/AppProviders.tsx b/datahub-web-react/src/app/AppProviders.tsx
index 81a8ddbfc9bac..00597e1cf7640 100644
--- a/datahub-web-react/src/app/AppProviders.tsx
+++ b/datahub-web-react/src/app/AppProviders.tsx
@@ -5,6 +5,7 @@ import UserContextProvider from './context/UserContextProvider';
import QuickFiltersProvider from '../providers/QuickFiltersProvider';
import SearchContextProvider from './search/context/SearchContextProvider';
import EntityRegistryProvider from './EntityRegistryProvider';
+import { BrowserTitleProvider } from './shared/BrowserTabTitleContext';
interface Props {
children: React.ReactNode;
@@ -15,11 +16,13 @@ export default function AppProviders({ children }: Props) {
-
-
- {children}
-
-
+
+
+
+ {children}
+
+
+
diff --git a/datahub-web-react/src/app/entity/group/GroupInfoSideBar.tsx b/datahub-web-react/src/app/entity/group/GroupInfoSideBar.tsx
index 07885a4d0f630..044b09dc185e5 100644
--- a/datahub-web-react/src/app/entity/group/GroupInfoSideBar.tsx
+++ b/datahub-web-react/src/app/entity/group/GroupInfoSideBar.tsx
@@ -21,6 +21,7 @@ import {
} from '../shared/SidebarStyledComponents';
import GroupMembersSideBarSection from './GroupMembersSideBarSection';
import { useUserContext } from '../../context/useUserContext';
+import { useBrowserTitle } from '../../shared/BrowserTabTitleContext';
import StripMarkdownText, { removeMarkdown } from '../shared/components/styled/StripMarkdownText';
import { Editor } from '../shared/tabs/Documentation/components/editor/Editor';
import EditGroupDescriptionModal from './EditGroupDescriptionModal';
@@ -157,6 +158,22 @@ export default function GroupInfoSidebar({ sideBarData, refetch }: Props) {
const { url } = useRouteMatch();
const history = useHistory();
+ const { updateTitle } = useBrowserTitle();
+
+ useEffect(()=>{
+ // You can use the title and updateTitle function here
+ // For example, updating the title when the component mounts
+ if(name){
+ updateTitle(`Group | ${name}`);
+ }
+ // // Don't forget to clean up the title when the component unmounts
+ return () => {
+ if(name){ // added to condition for rerendering issue
+ updateTitle('');
+ }
+ };
+ }, [name, updateTitle]);
+
/* eslint-disable @typescript-eslint/no-unused-vars */
const [editGroupModal, showEditGroupModal] = useState(false);
const me = useUserContext();
diff --git a/datahub-web-react/src/app/entity/user/UserInfoSideBar.tsx b/datahub-web-react/src/app/entity/user/UserInfoSideBar.tsx
index c01dd3a635924..71bfbfcd49a16 100644
--- a/datahub-web-react/src/app/entity/user/UserInfoSideBar.tsx
+++ b/datahub-web-react/src/app/entity/user/UserInfoSideBar.tsx
@@ -1,5 +1,5 @@
import { Divider, message, Space, Button, Typography, Tag } from 'antd';
-import React, { useState } from 'react';
+import React, { useEffect, useState } from 'react';
import { EditOutlined, MailOutlined, PhoneOutlined, SlackOutlined } from '@ant-design/icons';
import { useUpdateCorpUserPropertiesMutation } from '../../../graphql/user.generated';
import { EntityRelationship, DataHubRole } from '../../../types.generated';
@@ -21,6 +21,7 @@ import {
import EntityGroups from '../shared/EntityGroups';
import { mapRoleIcon } from '../../identity/user/UserUtils';
import { useUserContext } from '../../context/useUserContext';
+import { useBrowserTitle } from '../../shared/BrowserTabTitleContext';
const { Paragraph } = Typography;
@@ -61,6 +62,22 @@ export default function UserInfoSideBar({ sideBarData, refetch }: Props) {
const me = useUserContext();
const isProfileOwner = me?.user?.urn === urn;
+ const { updateTitle } = useBrowserTitle();
+
+ useEffect(()=>{
+ // You can use the title and updateTitle function here
+ // For example, updating the title when the component mounts
+ if(name){
+ updateTitle(`User | ${name}`);
+ }
+ // // Don't forget to clean up the title when the component unmounts
+ return () => {
+ if(name){ // added to condition for rerendering issue
+ updateTitle('');
+ }
+ };
+ }, [name, updateTitle]);
+
const getEditModalData = {
urn,
name,
diff --git a/datahub-web-react/src/app/search/SearchablePage.tsx b/datahub-web-react/src/app/search/SearchablePage.tsx
index 9d02d85d3634c..53dfc866b9b64 100644
--- a/datahub-web-react/src/app/search/SearchablePage.tsx
+++ b/datahub-web-react/src/app/search/SearchablePage.tsx
@@ -3,6 +3,7 @@ import { useHistory, useLocation } from 'react-router';
import { debounce } from 'lodash';
import * as QueryString from 'query-string';
import { useTheme } from 'styled-components';
+import { Helmet } from 'react-helmet-async';
import { SearchHeader } from './SearchHeader';
import { useEntityRegistry } from '../useEntityRegistry';
import { EntityType, FacetFilterInput } from '../../types.generated';
@@ -19,6 +20,7 @@ import { useQuickFiltersContext } from '../../providers/QuickFiltersContext';
import { useUserContext } from '../context/useUserContext';
import { useSelectedSortOption } from './context/SearchContext';
import { HALF_SECOND_IN_MS } from '../entity/shared/tabs/Dataset/Queries/utils/constants';
+import { useBrowserTitle } from '../shared/BrowserTabTitleContext';
const styles = {
children: {
@@ -68,6 +70,28 @@ export const SearchablePage = ({ onSearch, onAutoComplete, children }: Props) =>
const { user } = userContext;
const viewUrn = userContext.localState?.selectedViewUrn;
+ const { title, updateTitle } = useBrowserTitle();
+
+ useEffect(() => {
+ // Update the title only if it's not already set and there is a valid pathname
+ if (!title && location.pathname) {
+ const formattedPath = location.pathname
+ .split('/')
+ .filter(word => word !== '')
+ .map(word => word.charAt(0).toUpperCase() + word.slice(1))
+ .join(' | ');
+
+ if (formattedPath) {
+ return updateTitle(formattedPath);
+ }
+ }
+
+ // Clean up the title when the component unmounts
+ return () => {
+ updateTitle('');
+ };
+ }, [location.pathname, title, updateTitle]);
+
useEffect(() => {
if (suggestionsData !== undefined) {
setNewSuggestionData(suggestionsData);
@@ -140,6 +164,9 @@ export const SearchablePage = ({ onSearch, onAutoComplete, children }: Props) =>
authenticatedUserPictureLink={user?.editableProperties?.pictureLink}
entityRegistry={entityRegistry}
/>
+
+ {title}
+
{children}
>
);
diff --git a/datahub-web-react/src/app/shared/BrowserTabTitleContext.tsx b/datahub-web-react/src/app/shared/BrowserTabTitleContext.tsx
new file mode 100644
index 0000000000000..284e2771124c8
--- /dev/null
+++ b/datahub-web-react/src/app/shared/BrowserTabTitleContext.tsx
@@ -0,0 +1,30 @@
+import React, { createContext, ReactNode, useContext } from 'react';
+
+interface BrowserTitleContextProps {
+ title: string;
+ updateTitle: (newTitle: string) => void;
+}
+
+const BrowserTitleContext = createContext(undefined);
+
+export const BrowserTitleProvider: React.FC<{ children: ReactNode }> = ({ children }) => {
+ const [title, setTitle] = React.useState('');
+
+ const updateTitle = (newTitle: string) => {
+ setTitle(newTitle);
+ };
+
+ return (
+
+ {children}
+
+ );
+};
+
+export const useBrowserTitle = () => {
+ const context = useContext(BrowserTitleContext);
+ if (!context) {
+ throw new Error('useBrowserTitle must be used within a BrowserTitleProvider');
+ }
+ return context;
+};
From c8e59aabedb9a6f43f4bcfbf20bdffad6abc85d5 Mon Sep 17 00:00:00 2001
From: noggi
Date: Wed, 20 Dec 2023 12:33:23 -0800
Subject: [PATCH 17/25] Do not sync demo in downstream repos (#9493)
---
.github/workflows/docker-unified.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml
index 169a86000adcc..7cef38b1cd47c 100644
--- a/.github/workflows/docker-unified.yml
+++ b/.github/workflows/docker-unified.yml
@@ -911,13 +911,13 @@ jobs:
]
steps:
- uses: aws-actions/configure-aws-credentials@v1
- if: ${{ needs.setup.outputs.publish != 'false' }}
+ if: ${{ needs.setup.outputs.publish != 'false' && github.repository_owner == 'datahub-project' && needs.setup.outputs.repository_name == 'datahub' }}
with:
aws-access-key-id: ${{ secrets.AWS_SQS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SQS_ACCESS_KEY }}
aws-region: us-west-2
- uses: isbang/sqs-action@v0.2.0
- if: ${{ needs.setup.outputs.publish != 'false' }}
+ if: ${{ needs.setup.outputs.publish != 'false' && github.repository_owner == 'datahub-project' && needs.setup.outputs.repository_name == 'datahub' }}
with:
sqs-url: ${{ secrets.DATAHUB_HEAD_SYNC_QUEUE }}
message: '{ "command": "git-sync", "args" : {"repoName": "${{ needs.setup.outputs.repository_name }}", "repoOrg": "${{ github.repository_owner }}", "repoBranch": "${{ needs.setup.outputs.branch_name }}", "repoShaShort": "${{ needs.setup.outputs.short_sha }}" }}'
From bf813d1d24107d858260dc2852489e034eb4cf8c Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Wed, 20 Dec 2023 15:49:03 -0500
Subject: [PATCH 18/25] fix(ingest): update ingest_stats event with transformer
types (#9487)
---
metadata-ingestion/src/datahub/ingestion/run/pipeline.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index 25e17d692109a..d7c70dbea0b14 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -528,6 +528,9 @@ def log_ingestion_stats(self) -> None:
{
"source_type": self.config.source.type,
"sink_type": self.config.sink.type,
+ "transformer_types": [
+ transformer.type for transformer in self.config.transformers or []
+ ],
"records_written": stats.discretize(
self.sink.get_report().total_records_written
),
From 50be329492048534cb83c6f81bad87c5c49ee05c Mon Sep 17 00:00:00 2001
From: Sumit Patil <91715217+sumitappt@users.noreply.github.com>
Date: Thu, 21 Dec 2023 13:24:33 +0530
Subject: [PATCH 19/25] feat(ui/glossary): Keep the same tab selected when
browsing Glossary (#9469)
---
.../shared/EntityDropdown/EntityDropdown.tsx | 1 +
.../containers/profile/header/EntityTabs.tsx | 1 +
.../entity/shared/containers/profile/utils.ts | 16 ++++++++
.../app/glossary/GlossaryBrowser/NodeItem.tsx | 2 +-
.../app/glossary/GlossaryBrowser/TermItem.tsx | 9 ++++-
.../e2e/glossary/glossary_navigation.js | 38 +++++++++++++++++++
6 files changed, 64 insertions(+), 3 deletions(-)
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
index 8d7f1cca9c1cb..664a77a731d34 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
@@ -180,6 +180,7 @@ function EntityDropdown(props: Props) {
)}
{menuItems.has(EntityMenuItems.ADD_TERM) && (
setIsCreateTermModalVisible(true)}
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityTabs.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityTabs.tsx
index 58693eca8af0e..25e044259f240 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityTabs.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityTabs.tsx
@@ -39,6 +39,7 @@ export const EntityTabs = ({ tabs, selectedTab }: Props) => {
return (
(
-
+
))}
)}
diff --git a/datahub-web-react/src/app/glossary/GlossaryBrowser/TermItem.tsx b/datahub-web-react/src/app/glossary/GlossaryBrowser/TermItem.tsx
index 6980c15a1c256..56495b53eded3 100644
--- a/datahub-web-react/src/app/glossary/GlossaryBrowser/TermItem.tsx
+++ b/datahub-web-react/src/app/glossary/GlossaryBrowser/TermItem.tsx
@@ -5,6 +5,7 @@ import { useEntityRegistry } from '../../useEntityRegistry';
import { ANTD_GRAY } from '../../entity/shared/constants';
import { ChildGlossaryTermFragment } from '../../../graphql/glossaryNode.generated';
import { useGlossaryEntityData } from '../../entity/shared/GlossaryEntityContext';
+import { useGlossaryActiveTabPath } from '../../entity/shared/containers/profile/utils';
const TermWrapper = styled.div`
font-weight: normal;
@@ -47,13 +48,15 @@ interface Props {
term: ChildGlossaryTermFragment;
isSelecting?: boolean;
selectTerm?: (urn: string, displayName: string) => void;
+ includeActiveTabPath?: boolean;
}
function TermItem(props: Props) {
- const { term, isSelecting, selectTerm } = props;
+ const { term, isSelecting, selectTerm, includeActiveTabPath } = props;
const { entityData } = useGlossaryEntityData();
const entityRegistry = useEntityRegistry();
+ const activeTabPath = useGlossaryActiveTabPath();
function handleSelectTerm() {
if (selectTerm) {
@@ -68,7 +71,9 @@ function TermItem(props: Props) {
{!isSelecting && (
{entityRegistry.getDisplayName(term.type, isOnEntityPage ? entityData : term)}
diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
index 7ddf36aa87c2d..dd3b0a567c75f 100644
--- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
+++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
@@ -1,4 +1,5 @@
const glossaryTerm = "CypressGlosssaryNavigationTerm";
+const glossarySecondTerm = "CypressGlossarySecondTerm";
const glossaryTermGroup = "CypressGlosssaryNavigationGroup";
const glossaryParentGroup = "CypressNode";
@@ -30,6 +31,39 @@ describe("glossary sidebar navigation test", () => {
cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTermGroup).click().wait(3000);
cy.get('*[class^="GlossaryEntitiesList"]').contains(glossaryTerm).should("be.visible");
+ // Create another term and move it to the same term group
+ cy.clickOptionWithText(glossaryTermGroup);
+ cy.openThreeDotDropdown();
+ cy.clickOptionWithTestId("entity-menu-add-term-button");
+
+ // Wait for the create term modal to be visible
+ cy.waitTextVisible("Create Glossary Term");
+ cy.enterTextInTestId("create-glossary-entity-modal-name", glossarySecondTerm);
+ cy.clickOptionWithTestId("glossary-entity-modal-create-button");
+
+ // Wait for the new term to be visible in the sidebar
+ cy.clickOptionWithText(glossarySecondTerm).wait(3000);
+
+ // Move the term to the created term group
+ cy.openThreeDotDropdown();
+ cy.clickOptionWithTestId("entity-menu-move-button");
+ cy.get('[data-testid="move-glossary-entity-modal"]').contains(glossaryTermGroup).click({ force: true });
+ cy.get('[data-testid="move-glossary-entity-modal"]').contains(glossaryTermGroup).should("be.visible");
+ cy.clickOptionWithTestId("glossary-entity-modal-move-button");
+ cy.waitTextVisible("Moved Glossary Term!");
+
+ // Ensure the new term is under the parent term group in the navigation sidebar
+ cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTermGroup).click();
+ cy.get('*[class^="GlossaryEntitiesList"]').contains(glossarySecondTerm).should("be.visible");
+
+
+ // Switch between terms and ensure the "Properties" tab is active
+ cy.clickOptionWithText(glossaryTerm);
+ cy.get('[data-testid="entity-tab-headers-test-id"]').contains("Properties").click({ force: true });
+ cy.get('[data-node-key="Properties"]').contains("Properties").should("have.attr", "aria-selected", "true");
+ cy.clickOptionWithText(glossarySecondTerm);
+ cy.get('[data-node-key="Properties"]').contains("Properties").should("have.attr", "aria-selected", "true");
+
// Move a term group from the root level to be under a parent term group
cy.goToGlossaryList();
cy.clickOptionWithText(glossaryTermGroup);
@@ -52,6 +86,10 @@ describe("glossary sidebar navigation test", () => {
cy.clickOptionWithText(glossaryTerm).wait(3000);
cy.deleteFromDropdown();
cy.waitTextVisible("Deleted Glossary Term!");
+ cy.clickOptionWithText(glossaryTermGroup);
+ cy.clickOptionWithText(glossarySecondTerm).wait(3000);
+ cy.deleteFromDropdown();
+ cy.waitTextVisible("Deleted Glossary Term!");
cy.clickOptionWithText(glossaryParentGroup);
cy.clickOptionWithText(glossaryTermGroup).wait(3000);
cy.deleteFromDropdown();
From 80fb145a7b85b323f339d7901658dd9fde5bd4db Mon Sep 17 00:00:00 2001
From: Sumit Patil <91715217+sumitappt@users.noreply.github.com>
Date: Thu, 21 Dec 2023 17:57:41 +0530
Subject: [PATCH 20/25] style(search): Tag overflow add padding (#9497)
---
datahub-web-react/src/app/preview/DefaultPreviewCard.tsx | 1 +
1 file changed, 1 insertion(+)
diff --git a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
index 36c4c020e7131..a6d8422f827d5 100644
--- a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
+++ b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
@@ -114,6 +114,7 @@ const TagContainer = styled.div`
margin-left: 0px;
margin-top: 3px;
flex-wrap: wrap;
+ margin-right: 8px;
`;
const TagSeparator = styled.div`
From a49a435eef92b20cdc9878c8189b8ca0288e8b7f Mon Sep 17 00:00:00 2001
From: Aseem Bansal
Date: Thu, 21 Dec 2023 19:38:46 +0530
Subject: [PATCH 21/25] feat(analytics): change MAU chart to be until last
month (#9499)
---
.../datahub/graphql/analytics/resolver/GetChartsResolver.java | 3 ++-
.../main/java/com/linkedin/datahub/graphql/util/DateUtil.java | 4 ++++
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java
index 3f635872747a5..6ba3c5090f1c4 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java
@@ -91,6 +91,7 @@ private List getProductAnalyticsCharts(Authentication authentica
final List charts = new ArrayList<>();
DateUtil dateUtil = new DateUtil();
final DateTime startOfNextWeek = dateUtil.getStartOfNextWeek();
+ final DateTime startOfThisMonth = dateUtil.getStartOfThisMonth();
final DateTime startOfNextMonth = dateUtil.getStartOfNextMonth();
final DateRange trailingWeekDateRange = dateUtil.getTrailingWeekDateRange();
@@ -103,7 +104,7 @@ private List getProductAnalyticsCharts(Authentication authentica
charts.add(
getActiveUsersTimeSeriesChart(
startOfNextMonth.minusMonths(12),
- startOfNextMonth.minusMillis(1),
+ startOfThisMonth.minusMillis(1),
"Monthly Active Users",
DateInterval.MONTH));
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/util/DateUtil.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/util/DateUtil.java
index 4b837605d4e31..677ad8afbaca3 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/util/DateUtil.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/util/DateUtil.java
@@ -13,6 +13,10 @@ public DateTime getStartOfNextWeek() {
return setTimeToZero(getNow().withDayOfWeek(DateTimeConstants.SUNDAY).plusDays(1));
}
+ public DateTime getStartOfThisMonth() {
+ return setTimeToZero(getNow().withDayOfMonth(1));
+ }
+
public DateTime getStartOfNextMonth() {
return setTimeToZero(getNow().withDayOfMonth(1).plusMonths(1));
}
From 55cb56821c00ec993ee5a4c560d7b49d8d71258b Mon Sep 17 00:00:00 2001
From: RyanHolstien
Date: Thu, 21 Dec 2023 10:33:25 -0600
Subject: [PATCH 22/25] fix(kafka): fix infinite deserialization logging
(#9494)
---
docker/docker-compose-without-neo4j.yml | 2 ++
...docker-compose.consumers-without-neo4j.yml | 3 ++
docker/docker-compose.consumers.yml | 3 ++
docker/docker-compose.dev.yml | 1 +
docker/docker-compose.yml | 2 ++
.../docker-compose-m1.quickstart.yml | 1 +
...er-compose-without-neo4j-m1.quickstart.yml | 1 +
...ocker-compose-without-neo4j.quickstart.yml | 1 +
...ose.consumers-without-neo4j.quickstart.yml | 2 ++
.../docker-compose.consumers.quickstart.yml | 2 ++
.../quickstart/docker-compose.quickstart.yml | 1 +
.../config/kafka/ConsumerConfiguration.java | 1 +
.../src/main/resources/application.yml | 1 +
.../kafka/KafkaEventConsumerFactory.java | 30 ++++++++++++++++---
14 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml
index 6191994eaa1ea..0d58a1d91b70b 100644
--- a/docker/docker-compose-without-neo4j.yml
+++ b/docker/docker-compose-without-neo4j.yml
@@ -43,6 +43,8 @@ services:
context: ../
dockerfile: docker/datahub-gms/Dockerfile
env_file: datahub-gms/env/docker-without-neo4j.env
+ environment:
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
healthcheck:
test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health
start_period: 90s
diff --git a/docker/docker-compose.consumers-without-neo4j.yml b/docker/docker-compose.consumers-without-neo4j.yml
index 8228951d9385f..f1be585232a1a 100644
--- a/docker/docker-compose.consumers-without-neo4j.yml
+++ b/docker/docker-compose.consumers-without-neo4j.yml
@@ -15,6 +15,8 @@ services:
context: ../
dockerfile: docker/datahub-mae-consumer/Dockerfile
env_file: datahub-mae-consumer/env/docker-without-neo4j.env
+ environment:
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
datahub-mce-consumer:
container_name: datahub-mce-consumer
hostname: datahub-mce-consumer
@@ -28,3 +30,4 @@ services:
environment:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
diff --git a/docker/docker-compose.consumers.yml b/docker/docker-compose.consumers.yml
index 2d37094035859..8d331cea2f0b9 100644
--- a/docker/docker-compose.consumers.yml
+++ b/docker/docker-compose.consumers.yml
@@ -15,6 +15,8 @@ services:
context: ../
dockerfile: docker/datahub-mae-consumer/Dockerfile
env_file: datahub-mae-consumer/env/docker.env
+ environment:
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
depends_on:
neo4j:
condition: service_healthy
@@ -36,6 +38,7 @@ services:
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- GRAPH_SERVICE_IMPL=neo4j
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
depends_on:
neo4j:
condition: service_healthy
diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml
index a69fb977a3417..7067b68fba3f9 100644
--- a/docker/docker-compose.dev.yml
+++ b/docker/docker-compose.dev.yml
@@ -45,6 +45,7 @@ services:
- SEARCH_SERVICE_ENABLE_CACHE=false
- LINEAGE_SEARCH_CACHE_ENABLED=false
- SHOW_BROWSE_V2=true
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
volumes:
- ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh
- ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 95f56fe47e3cc..146055830d04e 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -36,6 +36,8 @@ services:
container_name: datahub-gms
hostname: datahub-gms
image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head}
+ environment:
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
ports:
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
build:
diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml
index 7b7ca4052f324..8b87001915283 100644
--- a/docker/quickstart/docker-compose-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-m1.quickstart.yml
@@ -97,6 +97,7 @@ services:
- GRAPH_SERVICE_IMPL=${GRAPH_SERVICE_IMPL:-elasticsearch}
- JAVA_OPTS=-Xms1g -Xmx1g
- KAFKA_BOOTSTRAP_SERVER=broker:29092
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
index 53dacaf6ef63b..5373e93da6bcb 100644
--- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
@@ -97,6 +97,7 @@ services:
- GRAPH_SERVICE_IMPL=elasticsearch
- JAVA_OPTS=-Xms1g -Xmx1g
- KAFKA_BOOTSTRAP_SERVER=broker:29092
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
index 1ca91aa19206d..51a40395e3459 100644
--- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
+++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
@@ -97,6 +97,7 @@ services:
- GRAPH_SERVICE_IMPL=elasticsearch
- JAVA_OPTS=-Xms1g -Xmx1g
- KAFKA_BOOTSTRAP_SERVER=broker:29092
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
diff --git a/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml
index d05933df96a43..4ed57dca1f080 100644
--- a/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml
+++ b/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml
@@ -6,6 +6,7 @@ services:
datahub-mae-consumer:
container_name: datahub-mae-consumer
environment:
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
- DATAHUB_UPGRADE_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-duhe-consumer-job-client-mcl
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
@@ -44,6 +45,7 @@ services:
- GRAPH_SERVICE_IMPL=elasticsearch
- JAVA_OPTS=-Xms1g -Xmx1g
- KAFKA_BOOTSTRAP_SERVER=broker:29092
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=false
- MCE_CONSUMER_ENABLED=true
diff --git a/docker/quickstart/docker-compose.consumers.quickstart.yml b/docker/quickstart/docker-compose.consumers.quickstart.yml
index f0bd3a0f927c8..ba8432d8a89af 100644
--- a/docker/quickstart/docker-compose.consumers.quickstart.yml
+++ b/docker/quickstart/docker-compose.consumers.quickstart.yml
@@ -9,6 +9,7 @@ services:
neo4j:
condition: service_healthy
environment:
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
- DATAHUB_UPGRADE_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-duhe-consumer-job-client-mcl
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
@@ -54,6 +55,7 @@ services:
- GRAPH_SERVICE_IMPL=neo4j
- JAVA_OPTS=-Xms1g -Xmx1g
- KAFKA_BOOTSTRAP_SERVER=broker:29092
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=false
- MCE_CONSUMER_ENABLED=true
diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml
index c77b4418b6f36..56071cfe1e9e6 100644
--- a/docker/quickstart/docker-compose.quickstart.yml
+++ b/docker/quickstart/docker-compose.quickstart.yml
@@ -97,6 +97,7 @@ services:
- GRAPH_SERVICE_IMPL=${GRAPH_SERVICE_IMPL:-elasticsearch}
- JAVA_OPTS=-Xms1g -Xmx1g
- KAFKA_BOOTSTRAP_SERVER=broker:29092
+ - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ConsumerConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ConsumerConfiguration.java
index b505674f2ed9c..61b9d5c816790 100644
--- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ConsumerConfiguration.java
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ConsumerConfiguration.java
@@ -6,4 +6,5 @@
public class ConsumerConfiguration {
private int maxPartitionFetchBytes;
+ private boolean stopOnDeserializationError;
}
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index 0ea6b8712953e..36498f7c45fea 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -236,6 +236,7 @@ kafka:
maxRequestSize: ${KAFKA_PRODUCER_MAX_REQUEST_SIZE:5242880} # the max bytes sent by the producer, also see kafka-setup MAX_MESSAGE_BYTES for matching value
consumer:
maxPartitionFetchBytes: ${KAFKA_CONSUMER_MAX_PARTITION_FETCH_BYTES:5242880} # the max bytes consumed per partition
+ stopOnDeserializationError: ${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:true} # Stops kafka listener container on deserialization error, allows user to fix problems before moving past problematic offset. If false will log and move forward past the offset
schemaRegistry:
type: ${SCHEMA_REGISTRY_TYPE:KAFKA} # INTERNAL or KAFKA or AWS_GLUE
url: ${KAFKA_SCHEMAREGISTRY_URL:http://localhost:8081}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java
index 2a6338ac15e93..4c0308546d857 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java
@@ -21,6 +21,11 @@
import org.springframework.kafka.config.ConcurrentKafkaListenerContainerFactory;
import org.springframework.kafka.config.KafkaListenerContainerFactory;
import org.springframework.kafka.core.DefaultKafkaConsumerFactory;
+import org.springframework.kafka.listener.CommonContainerStoppingErrorHandler;
+import org.springframework.kafka.listener.CommonDelegatingErrorHandler;
+import org.springframework.kafka.listener.DefaultErrorHandler;
+import org.springframework.kafka.support.serializer.DeserializationException;
+import org.springframework.kafka.support.serializer.ErrorHandlingDeserializer;
@Slf4j
@Configuration
@@ -66,8 +71,6 @@ private static Map buildCustomizedProperties(
SchemaRegistryConfig schemaRegistryConfig) {
KafkaProperties.Consumer consumerProps = baseKafkaProperties.getConsumer();
- // Specify (de)serializers for record keys and for record values.
- consumerProps.setKeyDeserializer(StringDeserializer.class);
// Records will be flushed every 10 seconds.
consumerProps.setEnableAutoCommit(true);
consumerProps.setAutoCommitInterval(Duration.ofSeconds(10));
@@ -81,7 +84,13 @@ private static Map buildCustomizedProperties(
Map customizedProperties = baseKafkaProperties.buildConsumerProperties();
customizedProperties.put(
- ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, schemaRegistryConfig.getDeserializer());
+ ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ErrorHandlingDeserializer.class);
+ customizedProperties.put(
+ ErrorHandlingDeserializer.KEY_DESERIALIZER_CLASS, StringDeserializer.class);
+ customizedProperties.put(
+ ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ErrorHandlingDeserializer.class);
+ customizedProperties.put(
+ ErrorHandlingDeserializer.VALUE_DESERIALIZER_CLASS, schemaRegistryConfig.getDeserializer());
// Override KafkaProperties with SchemaRegistryConfig only for non-empty values
schemaRegistryConfig.getProperties().entrySet().stream()
@@ -98,7 +107,8 @@ private static Map buildCustomizedProperties(
@Bean(name = "kafkaEventConsumer")
protected KafkaListenerContainerFactory> createInstance(
@Qualifier("kafkaConsumerFactory")
- DefaultKafkaConsumerFactory kafkaConsumerFactory) {
+ DefaultKafkaConsumerFactory kafkaConsumerFactory,
+ @Qualifier("configurationProvider") ConfigurationProvider configurationProvider) {
ConcurrentKafkaListenerContainerFactory factory =
new ConcurrentKafkaListenerContainerFactory<>();
@@ -106,6 +116,18 @@ protected KafkaListenerContainerFactory> createInstance(
factory.setContainerCustomizer(new ThreadPoolContainerCustomizer());
factory.setConcurrency(kafkaEventConsumerConcurrency);
+ /* Sets up a delegating error handler for Deserialization errors, if disabled will
+ use DefaultErrorHandler (does back-off retry and then logs) rather than stopping the container. Stopping the container
+ prevents lost messages until the error can be examined, disabling this will allow progress, but may lose data
+ */
+ if (configurationProvider.getKafka().getConsumer().isStopOnDeserializationError()) {
+ CommonDelegatingErrorHandler delegatingErrorHandler =
+ new CommonDelegatingErrorHandler(new DefaultErrorHandler());
+ delegatingErrorHandler.addDelegate(
+ DeserializationException.class, new CommonContainerStoppingErrorHandler());
+ factory.setCommonErrorHandler(delegatingErrorHandler);
+ }
+
log.info(
String.format(
"Event-based KafkaListenerContainerFactory built successfully. Consumer concurrency = %s",
From b80d2f471c559cd31cedb47a79cf07e779b065b9 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Thu, 21 Dec 2023 13:35:34 -0500
Subject: [PATCH 23/25] fix(ingest/fivetran): only materialize upstream lineage
(#9490)
---
.../ingestion/source/fivetran/fivetran.py | 19 +++++++----
.../integration/fivetran/fivetran_golden.json | 32 -------------------
2 files changed, 12 insertions(+), 39 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py
index c0395b4e4e796..12e362fa8a3e3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py
@@ -7,6 +7,7 @@
DataProcessInstance,
InstanceRunResult,
)
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.decorators import (
SourceCapability,
@@ -248,13 +249,17 @@ def _get_connector_workunits(
# Map Fivetran's connector entity with Datahub's datajob entity
datajob = self._generate_datajob_from_connector(connector)
- for mcp in datajob.generate_mcp(materialize_iolets=True):
- if mcp.entityType == "dataset" and isinstance(mcp.aspect, StatusClass):
- # While we "materialize" the referenced datasets, we don't want them
- # to be tracked by stateful ingestion.
- yield mcp.as_workunit(is_primary_source=False)
- else:
- yield mcp.as_workunit()
+ for mcp in datajob.generate_mcp(materialize_iolets=False):
+ yield mcp.as_workunit()
+
+ # Materialize the upstream referenced datasets.
+ # We assume that the downstreams are materialized by other ingestion sources.
+ for iolet in datajob.inlets:
+ # We don't want these to be tracked by stateful ingestion.
+ yield MetadataChangeProposalWrapper(
+ entityUrn=str(iolet),
+ aspect=StatusClass(removed=False),
+ ).as_workunit(is_primary_source=False)
# Map Fivetran's job/sync history entity with Datahub's data process entity
for job in connector.jobs:
diff --git a/metadata-ingestion/tests/integration/fivetran/fivetran_golden.json b/metadata-ingestion/tests/integration/fivetran/fivetran_golden.json
index a72c960a72296..b8f05fa6e93aa 100644
--- a/metadata-ingestion/tests/integration/fivetran/fivetran_golden.json
+++ b/metadata-ingestion/tests/integration/fivetran/fivetran_golden.json
@@ -178,38 +178,6 @@
"lastRunId": "no-run-id-provided"
}
},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD)",
- "changeType": "UPSERT",
- "aspectName": "status",
- "aspect": {
- "json": {
- "removed": false
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "powerbi-test",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD)",
- "changeType": "UPSERT",
- "aspectName": "status",
- "aspect": {
- "json": {
- "removed": false
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "powerbi-test",
- "lastRunId": "no-run-id-provided"
- }
-},
{
"entityType": "dataJob",
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
From a18c72083d763b08282b67146881d4f918b257de Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Thu, 21 Dec 2023 13:50:39 -0500
Subject: [PATCH 24/25] feat(ingest): handle multiline string coercion (#9484)
---
docs-website/download_historical_versions.py | 4 +-
docs/developers.md | 6 +--
.../src/datahub/configuration/git.py | 12 +----
.../validate_multiline_string.py | 31 ++++++++++++
.../ingestion/source/bigquery_v2/lineage.py | 2 +-
.../ingestion/source/looker/lookml_source.py | 7 ++-
.../source_config/usage/bigquery_usage.py | 3 ++
.../src/datahub/utilities/logging_manager.py | 1 +
.../unit/config/test_pydantic_validators.py | 50 +++++++++++++++----
9 files changed, 86 insertions(+), 30 deletions(-)
create mode 100644 metadata-ingestion/src/datahub/configuration/validate_multiline_string.py
diff --git a/docs-website/download_historical_versions.py b/docs-website/download_historical_versions.py
index 53ee9cf1e63ef..7493210ffa2a5 100644
--- a/docs-website/download_historical_versions.py
+++ b/docs-website/download_historical_versions.py
@@ -37,9 +37,9 @@ def fetch_urls(
except Exception as e:
if attempt < max_retries:
print(f"Attempt {attempt + 1}/{max_retries}: {e}")
- time.sleep(retry_delay)
+ time.sleep(retry_delay * 2**attempt)
else:
- print(f"Max retries reached. Unable to fetch data.")
+ print("Max retries reached. Unable to fetch data.")
raise
diff --git a/docs/developers.md b/docs/developers.md
index 60d31f5e4523f..fe007a56ddc68 100644
--- a/docs/developers.md
+++ b/docs/developers.md
@@ -17,10 +17,8 @@ title: "Local Development"
On macOS, these can be installed using [Homebrew](https://brew.sh/).
```shell
-# Install Java 8 and 11
-brew tap homebrew/cask-versions
-brew install java11
-brew install --cask zulu8
+# Install Java
+brew install openjdk@17
# Install Python
brew install python@3.10 # you may need to add this to your PATH
diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py
index a5f88744661a4..3c76c8da0d571 100644
--- a/metadata-ingestion/src/datahub/configuration/git.py
+++ b/metadata-ingestion/src/datahub/configuration/git.py
@@ -1,4 +1,3 @@
-import os
import pathlib
from typing import Any, Dict, Optional, Union
@@ -6,6 +5,7 @@
from datahub.configuration.common import ConfigModel
from datahub.configuration.validate_field_rename import pydantic_renamed_field
+from datahub.configuration.validate_multiline_string import pydantic_multiline_string
_GITHUB_PREFIX = "https://github.com/"
_GITLAB_PREFIX = "https://gitlab.com/"
@@ -92,15 +92,7 @@ class GitInfo(GitReference):
description="The url to call `git clone` on. We infer this for github and gitlab repos, but it is required for other hosts.",
)
- @validator("deploy_key_file")
- def deploy_key_file_should_be_readable(
- cls, v: Optional[FilePath]
- ) -> Optional[FilePath]:
- if v is not None:
- # pydantic does existence checks, we just need to check if we can read it
- if not os.access(v, os.R_OK):
- raise ValueError(f"Unable to read deploy key file {v}")
- return v
+ _fix_deploy_key_newlines = pydantic_multiline_string("deploy_key")
@validator("deploy_key", pre=True, always=True)
def deploy_key_filled_from_deploy_key_file(
diff --git a/metadata-ingestion/src/datahub/configuration/validate_multiline_string.py b/metadata-ingestion/src/datahub/configuration/validate_multiline_string.py
new file mode 100644
index 0000000000000..0baaf4f0264b9
--- /dev/null
+++ b/metadata-ingestion/src/datahub/configuration/validate_multiline_string.py
@@ -0,0 +1,31 @@
+from typing import Optional, Type, Union
+
+import pydantic
+
+
+def pydantic_multiline_string(field: str) -> classmethod:
+ """If the field is present and contains an escaped newline, replace it with a real newline.
+
+ This makes the assumption that the field value is never supposed to have a
+ r"\n" in it, and instead should only have newline characters. This is generally
+ a safe assumption for SSH keys and similar.
+
+ The purpose of this helper is to make us more forgiving of small formatting issues
+ in recipes, without sacrificing correctness across the board.
+ """
+
+ def _validate_field(
+ cls: Type, v: Union[None, str, pydantic.SecretStr]
+ ) -> Optional[str]:
+ if v is not None:
+ if isinstance(v, pydantic.SecretStr):
+ v = v.get_secret_value()
+ v = v.replace(r"\n", "\n")
+
+ return v
+
+ # Hack: Pydantic maintains unique list of validators by referring its __name__.
+ # https://github.com/pydantic/pydantic/blob/v1.10.9/pydantic/main.py#L264
+ # This hack ensures that multiple field deprecated do not overwrite each other.
+ _validate_field.__name__ = f"{_validate_field.__name__}_{field}"
+ return pydantic.validator(field, pre=True, allow_reuse=True)(_validate_field)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
index eddd08c92b808..b44b06feb95af 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
@@ -175,7 +175,7 @@ def make_lineage_edges_from_parsing_result(
table_name = str(
BigQueryTableRef.from_bigquery_table(
BigqueryTableIdentifier.from_string_name(
- DatasetUrn.create_from_string(table_urn).get_dataset_name()
+ DatasetUrn.from_string(table_urn).name
)
)
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py
index b76bef49a7e6f..33079f3fd9ac1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py
@@ -2060,10 +2060,9 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901
)
logger.debug("Failed to process explore", exc_info=e)
- processed_view_files = processed_view_map.get(model.connection)
- if processed_view_files is None:
- processed_view_map[model.connection] = set()
- processed_view_files = processed_view_map[model.connection]
+ processed_view_files = processed_view_map.setdefault(
+ model.connection, set()
+ )
project_name = self.get_project_name(model_name)
logger.debug(f"Model: {model_name}; Includes: {model.resolved_includes}")
diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/usage/bigquery_usage.py b/metadata-ingestion/src/datahub/ingestion/source_config/usage/bigquery_usage.py
index 5eb9c83236e4f..13abe73cc4e09 100644
--- a/metadata-ingestion/src/datahub/ingestion/source_config/usage/bigquery_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source_config/usage/bigquery_usage.py
@@ -11,6 +11,7 @@
from datahub.configuration.common import AllowDenyPattern, ConfigurationError
from datahub.configuration.source_common import EnvConfigMixin
from datahub.configuration.validate_field_removal import pydantic_removed_field
+from datahub.configuration.validate_multiline_string import pydantic_multiline_string
from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
from datahub.ingestion.source_config.bigquery import BigQueryBaseConfig
@@ -44,6 +45,8 @@ class BigQueryCredential(ConfigModel):
description="If not set it will be default to https://www.googleapis.com/robot/v1/metadata/x509/client_email",
)
+ _fix_private_key_newlines = pydantic_multiline_string("private_key")
+
@pydantic.root_validator(skip_on_failure=True)
def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
if values.get("client_x509_cert_url") is None:
diff --git a/metadata-ingestion/src/datahub/utilities/logging_manager.py b/metadata-ingestion/src/datahub/utilities/logging_manager.py
index a8eacb0a9938d..62aa1ca7ab791 100644
--- a/metadata-ingestion/src/datahub/utilities/logging_manager.py
+++ b/metadata-ingestion/src/datahub/utilities/logging_manager.py
@@ -199,6 +199,7 @@ def configure_logging(debug: bool, log_file: Optional[str] = None) -> Iterator[N
for handler in handlers:
root_logger.removeHandler(handler)
for lib in DATAHUB_PACKAGES:
+ lib_logger = logging.getLogger(lib)
lib_logger.removeHandler(handler)
lib_logger.propagate = True
diff --git a/metadata-ingestion/tests/unit/config/test_pydantic_validators.py b/metadata-ingestion/tests/unit/config/test_pydantic_validators.py
index 399245736805c..f687a2776f6e2 100644
--- a/metadata-ingestion/tests/unit/config/test_pydantic_validators.py
+++ b/metadata-ingestion/tests/unit/config/test_pydantic_validators.py
@@ -1,12 +1,14 @@
from typing import Optional
+import pydantic
import pytest
from pydantic import ValidationError
-from datahub.configuration.common import ConfigModel
+from datahub.configuration.common import ConfigModel, ConfigurationWarning
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_removal import pydantic_removed_field
from datahub.configuration.validate_field_rename import pydantic_renamed_field
+from datahub.configuration.validate_multiline_string import pydantic_multiline_string
from datahub.utilities.global_warning_util import (
clear_global_warnings,
get_global_warnings,
@@ -22,8 +24,9 @@ class TestModel(ConfigModel):
v = TestModel.parse_obj({"b": "original"})
assert v.b == "original"
- v = TestModel.parse_obj({"a": "renamed"})
- assert v.b == "renamed"
+ with pytest.warns(ConfigurationWarning, match="a is deprecated"):
+ v = TestModel.parse_obj({"a": "renamed"})
+ assert v.b == "renamed"
with pytest.raises(ValidationError):
TestModel.parse_obj({"a": "foo", "b": "bar"})
@@ -44,9 +47,10 @@ class TestModel(ConfigModel):
assert v.b == "original"
assert v.b1 == "original"
- v = TestModel.parse_obj({"a": "renamed", "a1": "renamed"})
- assert v.b == "renamed"
- assert v.b1 == "renamed"
+ with pytest.warns(ConfigurationWarning, match=r"a.* is deprecated"):
+ v = TestModel.parse_obj({"a": "renamed", "a1": "renamed"})
+ assert v.b == "renamed"
+ assert v.b1 == "renamed"
with pytest.raises(ValidationError):
TestModel.parse_obj({"a": "foo", "b": "bar", "b1": "ok"})
@@ -74,8 +78,9 @@ class TestModel(ConfigModel):
v = TestModel.parse_obj({"b": "original"})
assert v.b == "original"
- v = TestModel.parse_obj({"b": "original", "r1": "removed", "r2": "removed"})
- assert v.b == "original"
+ with pytest.warns(ConfigurationWarning, match=r"r\d was removed"):
+ v = TestModel.parse_obj({"b": "original", "r1": "removed", "r2": "removed"})
+ assert v.b == "original"
def test_field_deprecated():
@@ -92,7 +97,10 @@ class TestModel(ConfigModel):
v = TestModel.parse_obj({"b": "original"})
assert v.b == "original"
- v = TestModel.parse_obj({"b": "original", "d1": "deprecated", "d2": "deprecated"})
+ with pytest.warns(ConfigurationWarning, match=r"d\d.+ deprecated"):
+ v = TestModel.parse_obj(
+ {"b": "original", "d1": "deprecated", "d2": "deprecated"}
+ )
assert v.b == "original"
assert v.d1 == "deprecated"
assert v.d2 == "deprecated"
@@ -100,3 +108,27 @@ class TestModel(ConfigModel):
assert any(["d2 is deprecated" in warning for warning in get_global_warnings()])
clear_global_warnings()
+
+
+def test_multiline_string_fixer():
+ class TestModel(ConfigModel):
+ s: str
+ m: Optional[pydantic.SecretStr] = None
+
+ _validate_s = pydantic_multiline_string("s")
+ _validate_m = pydantic_multiline_string("m")
+
+ v = TestModel.parse_obj({"s": "foo\nbar"})
+ assert v.s == "foo\nbar"
+
+ v = TestModel.parse_obj({"s": "foo\\nbar"})
+ assert v.s == "foo\nbar"
+
+ v = TestModel.parse_obj({"s": "normal", "m": "foo\\nbar"})
+ assert v.s == "normal"
+ assert v.m
+ assert v.m.get_secret_value() == "foo\nbar"
+
+ v = TestModel.parse_obj({"s": "normal", "m": pydantic.SecretStr("foo\\nbar")})
+ assert v.m
+ assert v.m.get_secret_value() == "foo\nbar"
From cfc641f0d03408b85ae75c2e4830c5f307ce6a68 Mon Sep 17 00:00:00 2001
From: Tamas Nemeth
Date: Thu, 21 Dec 2023 20:32:51 +0100
Subject: [PATCH 25/25] fix(ingest/databricks): Pinning databricks sdk to not
fail on mypy issues (#9500)
---
metadata-ingestion/setup.py | 4 +++-
.../src/datahub/ingestion/source/aws/aws_common.py | 2 +-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 13c9d3c99aaca..0dcac7a7fc1b4 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -274,7 +274,9 @@
databricks = {
# 0.1.11 appears to have authentication issues with azure databricks
- "databricks-sdk>=0.9.0",
+ # 0.16.0 added py.typed support which caused mypy to fail. The databricks sdk is pinned until we resolve mypy issues.
+ # https://github.com/databricks/databricks-sdk-py/pull/483
+ "databricks-sdk>=0.9.0,<0.16.0",
"pyspark~=3.3.0",
"requests",
# Version 2.4.0 includes sqlalchemy dialect, 2.8.0 includes some bug fixes
diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py b/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py
index 0fb211a5d7b16..421991a0966c3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py
@@ -167,7 +167,7 @@ def get_session(self) -> Session:
return session
- def get_credentials(self) -> Dict[str, str]:
+ def get_credentials(self) -> Dict[str, Optional[str]]:
credentials = self.get_session().get_credentials()
if credentials is not None:
return {