diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js
index 734c6aef3375d..308c359e23ecb 100644
--- a/docs-website/docusaurus.config.js
+++ b/docs-website/docusaurus.config.js
@@ -88,7 +88,26 @@ module.exports = {
},
items: [
{
- to: "cloud/",
+ type: "dropdown",
+ label: "Solutions",
+ position: "right",
+ items: [
+ {
+ to: "/solutions/discovery",
+ label: "Discovery",
+ },
+ {
+ to: "/solutions/observability",
+ label: "Observability",
+ },
+ {
+ to: "/solutions/governance",
+ label: "Governance",
+ },
+ ]
+ },
+ {
+ to: "/cloud",
activeBasePath: "cloud",
label: "Cloud",
position: "right",
diff --git a/docs-website/src/pages/_components/Community/index.js b/docs-website/src/pages/_components/Community/index.js
index a4f2b2304e51e..20917332c443d 100644
--- a/docs-website/src/pages/_components/Community/index.js
+++ b/docs-website/src/pages/_components/Community/index.js
@@ -2,7 +2,7 @@ import React, { useState, useRef, useEffect } from "react";
import styles from "./community.module.scss";
import useBaseUrl from "@docusaurus/useBaseUrl";
-const TARGET_COUNT = 11535;
+const TARGET_COUNT = 12219;
const INCREMENT = 1;
const Community = () => {
diff --git a/docs-website/src/pages/_components/Hero/hero.module.scss b/docs-website/src/pages/_components/Hero/hero.module.scss
index c7f4ec6a0b78f..1850757bd454c 100644
--- a/docs-website/src/pages/_components/Hero/hero.module.scss
+++ b/docs-website/src/pages/_components/Hero/hero.module.scss
@@ -233,6 +233,9 @@
}
.hero__cta {
margin-top: 12px;
+ .cta__primary {
+ margin-bottom: 4px;
+ }
}
.hero__footer_cta {
margin-top: 12px;
diff --git a/docs-website/src/pages/_components/Hero/index.js b/docs-website/src/pages/_components/Hero/index.js
index ca94203e94c06..a61b9d8a402bd 100644
--- a/docs-website/src/pages/_components/Hero/index.js
+++ b/docs-website/src/pages/_components/Hero/index.js
@@ -69,7 +69,7 @@ const Hero = ({ onOpenTourModal }) => {
diff --git a/docs-website/src/pages/cloud/FeatureCards/index.js b/docs-website/src/pages/cloud/FeatureCards/index.js
index 4a45cbcbe1717..52c7a5eec46b4 100644
--- a/docs-website/src/pages/cloud/FeatureCards/index.js
+++ b/docs-website/src/pages/cloud/FeatureCards/index.js
@@ -9,7 +9,7 @@ const data = {
{
title: "Data Discovery",
icon: "/img/assets/data-discovery.svg",
- cloudPageLink: "https://www.acryldata.io/acryl-datahub",
+ cloudPageLink: "/solutions/discovery",
cloudBenefits: [
{ text: "Enhanced search ranking", link: "" }, // →
{ text: "Personalization for every persona", link: "" }, // →
@@ -27,7 +27,7 @@ const data = {
{
title: "Data Observability",
icon: "/img/assets/data-ob.svg",
- cloudPageLink: "https://www.acryldata.io/observe",
+ cloudPageLink: "/solutions/observability",
cloudBenefits: [
{ text: "Continuous data quality monitors", link: "" }, // →
{ text: "End-to-end data incident tracking & management", link: "" }, // →
@@ -45,7 +45,7 @@ const data = {
{
title: "Data Governance",
icon: "/img/assets/data-governance.svg",
- cloudPageLink: "https://www.acryldata.io/acryl-datahub#governance",
+ cloudPageLink: "/solutions/governance",
cloudBenefits: [
{ text: "Human-assisted asset certification workflows", link: "" }, // →
{ text: "Automations to enforce governance standards", link: "" }, // →
diff --git a/docs-website/src/pages/solutions/_components/CaseStudy/case-study.module.scss b/docs-website/src/pages/solutions/_components/CaseStudy/case-study.module.scss
new file mode 100644
index 0000000000000..c41a3bde6a52a
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/CaseStudy/case-study.module.scss
@@ -0,0 +1,176 @@
+.container {
+ display: flex;
+ flex-direction: column;
+ font-family: "Manrope";
+}
+.case_study {
+ display: flex;
+ flex-direction: column;
+ width: 100vw;
+ margin: 5rem auto;
+
+ .case_study_heading {
+ color: var(--primitives-text-text-heading, #373A47);
+ text-align: center;
+ font-family: Manrope;
+ font-size: 29.177px;
+ font-style: normal;
+ font-weight: 400;
+ line-height: normal;
+ letter-spacing: 0.292px;
+ }
+
+ .card_row::-webkit-scrollbar {
+ display: none;
+ }
+ .card_row {
+ overflow-x: scroll;
+ width: 100vw;
+ scrollbar-width: none;
+ display: flex;
+ margin-top: 2rem;
+
+ .card_row_wrapper {
+ display: flex;
+ flex-direction: row;
+ align-items: center;
+ margin: auto;
+ }
+ .cardLink {
+ color: #000;
+
+ &:hover {
+ text-decoration: none;
+ }
+ }
+
+ .card {
+ max-width: 419px;
+ max-height: 148px;
+ padding: 1.2rem;
+ margin: 1rem 0.5rem;
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+ border-radius: var(--number-scales-2s-32, 32px);
+ border: 1px solid var(--semantics-bg-bg-white, #99999930);
+ background: var(--primitives-grays-1, #9999991a);
+
+ transition: all .3s ease-in-out;
+ &:hover {
+ box-shadow: 0px 1px 4px 1px #0000001C;
+ border-color: #33333340;
+ background: var(--primitives-grays-1, #dddddd1a);
+ }
+
+ .card_image {
+ border-radius: 16px;
+ display: flex;
+ justify-content: center;
+ align-items: center;
+ padding: 0;
+ margin: auto;
+ margin-right: 16px;
+
+ }
+
+ .card_heading_div {
+ width: 70%;
+ padding: 12px;
+
+ .card_heading {
+ color: var(--primitives-grays-9, #484C5C);
+ font-family: Manrope;
+ font-size: 1.3rem;
+ font-style: normal;
+ font-weight: 400;
+ line-height: normal;
+ -webkit-line-clamp: 3;
+ -webkit-box-orient: vertical;
+ overflow: hidden;
+ display: -webkit-box;
+
+ }
+ }
+ }
+
+ }
+
+ a:hover {
+ text-decoration: none !important;
+ }
+
+ .bottom_line {
+ cursor: pointer;
+ text-decoration: none;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ color: #12b0fb;
+ font-size: 1.1rem;
+ font-weight: 500;
+ margin-top: 50px;
+ margin-bottom: 10px;
+
+ span {
+ line-height: 10px;
+ font-size: 1.5rem;
+ margin-left: 10px;
+ }
+ }
+}
+
+@media (max-width: 800px) {
+ .case_study {
+ .case_study_heading {
+ text-align: center;
+ font-family: "Manrope";
+ width: 80%;
+ margin: auto;
+ font-size: 1.5rem;
+ line-height: normal;
+ font-weight: 400;
+ }
+
+ .card_row {
+ margin-top: 16px;
+ display: flex;
+ justify-content: flex-start;
+
+ .card_row_wrapper {
+ padding: 0 0;
+ width: 100vh;
+ align-items: flex-start;
+ justify-content: flex-start;
+ }
+
+ .card {
+ min-width: 240px;
+ padding: 0.8rem;
+ margin: 0.5rem;
+ display: flex;
+
+ .card_image {
+ width: 50px;
+ height: 50px;
+ margin: 16px auto;
+ }
+
+ .card_heading_div {
+ text-align: left;
+ padding: 8px;
+
+ .card_heading {
+ font-size: 1rem;
+ line-height: 1.2rem;
+ }
+ }
+ }
+ }
+
+ .bottom_line {
+ font-size: 1rem;
+ margin-top: 40px;
+ }
+ }
+}
diff --git a/docs-website/src/pages/solutions/_components/CaseStudy/index.js b/docs-website/src/pages/solutions/_components/CaseStudy/index.js
new file mode 100644
index 0000000000000..9ebffa006e570
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/CaseStudy/index.js
@@ -0,0 +1,34 @@
+import React from "react";
+import styles from "./case-study.module.scss";
+import clsx from "clsx";
+import Link from '@docusaurus/Link'
+
+const CaseStudy = ({ caseStudyContent }) => {
+ const { title, backgroundColor, items } = caseStudyContent;
+ return (
+
+
+
+ {title}
+
+
+
+
+ {items.map((caseStudy) => (
+
+
+
+
+
+
+ ))}
+
+
+
+
+ );
+};
+
+export default CaseStudy;
diff --git a/docs-website/src/pages/solutions/_components/Hero/hero.module.scss b/docs-website/src/pages/solutions/_components/Hero/hero.module.scss
new file mode 100644
index 0000000000000..ef2d90404ed08
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/Hero/hero.module.scss
@@ -0,0 +1,181 @@
+.hero__container {
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+ gap: 16px;
+ background-color: #F9F9FA;
+ background-image: url(/img/solutions/hero-background.png);
+ background-position: bottom 0rem center;
+ background-size: contain;
+ background-repeat: no-repeat;
+ max-height: 890px;
+ min-height: 85vh;
+ border-radius: 48px;
+ margin: 3rem 3rem 12rem 3rem;
+ padding: 5rem;
+
+ .hero__topQuote {
+ color: var(--primitives-text-tex-subtext, #777E99);
+ font-family: Manrope;
+ font-size: 1.2rem;
+ font-style: normal;
+ font-weight: 500;
+ line-height: normal;
+ letter-spacing: 0.2rem;
+ text-transform: uppercase;
+ margin-top: 5vh;
+ }
+
+ .hero__title {
+ color: #373A47;
+ font-family: Manrope;
+ font-size: 4rem;
+ font-style: normal;
+ font-weight: 300;
+ line-height: 4.25rem;
+ max-width: 720px;
+ text-align: center;
+ }
+
+ .hero__description {
+ color: var(--primitives-text-text-heading, #373A47);
+ text-align: center;
+ font-family: Manrope;
+ font-size: 1.25rem;
+ font-style: normal;
+ font-weight: 400;
+ line-height: 2.25rem;
+ max-width: 800px;
+ margin-top: 1rem;
+ }
+
+ .cta__tertiary {
+ color: black;
+ font-size: 1rem;
+ margin-top: .5rem;
+ font-weight: 500;
+ }
+
+ .cta__tertiary:hover {
+ color: black;
+ text-decoration: none;
+ opacity: .8;
+ }
+
+ .hero__cta {
+ margin-top: 0rem;
+ display: flex;
+
+ a {
+ cursor: pointer;
+ font-size: 1rem;
+ background-color: white;
+ padding: 4px 20px;
+ border-radius: 50px;
+ margin: 0 10px 0 0;
+ font-weight: 600;
+ text-decoration: none;
+ transition: background-color .2s ease-in-out;
+ &:hover {
+ opacity : 0.8;
+ }
+ }
+
+ .cta__primary {
+ padding: var(--number-scales-2s-12, 8px) var(--number-scales-2s-24, 24px);
+ justify-content: center;
+ align-items: center;
+ gap: var(--number-scales-2s-8, 8px);
+ border-radius: var(--number-scales-2s-full, 999px);
+ background: var(--semantics-surface-default, #1890FF);
+ color: white;
+ }
+
+ .cta__secondary {
+ padding: var(--number-scales-2s-12, 8px) var(--number-scales-2s-24, 24px);
+ justify-content: center;
+ align-items: center;
+ gap: var(--number-scales-2s-8, 8px);
+ border-radius: var(--number-scales-2s-full, 999px);
+ border: 1px solid var(--semantics-border-default, #1890FF);
+ background: var(--semantics-bg-bg-white, #FFF);
+ }
+ }
+
+ .hero__img_container {
+ position: relative;
+
+ .hero__img {
+ z-index: 10;
+ height: 40vh;
+ min-height: 320px;
+ margin-bottom: -8rem;
+ }
+
+ .hero__img_gradient {
+ position: absolute;
+ right: 0;
+ left: 0;
+ bottom: 0;
+ margin-bottom: -8rem;
+ height: 60%;
+ width: min-content;
+ background: linear-gradient(to top, #FFFFFF 15.52%, transparent);
+ }
+ }
+
+ // Mobile adjustments
+ @media (max-width: 768px) {
+ max-height: auto;
+ padding: 1.25rem;
+ margin: 1rem .5rem 6rem .5rem;
+ border-radius: 24px;
+
+ .hero__topQuote {
+ font-size: 1rem;
+ margin-top: 3vh;
+ text-align: center;
+ }
+
+ .hero__title {
+ font-size: 3rem;
+ line-height: 3.5rem;
+ max-width: 100%;
+ }
+
+ .hero__description {
+ font-size: 1rem;
+ line-height: 1.75rem;
+ max-width: 100%;
+ }
+
+ .hero__img_container {
+ .hero__img {
+ height: 30vh;
+ min-height: 200px;
+ margin-bottom: -6rem;
+ object-fit: contain;
+ }
+ }
+
+ .cta__tertiary {
+ font-size: 0.9rem;
+ }
+
+ .hero__cta {
+ a {
+ margin: 0.5rem;
+ padding: 8px 16px;
+ font-size: 0.9rem;
+ }
+
+ .cta__primary {
+ background: #1890FF;
+ }
+
+ .cta__secondary {
+ border: 1px solid #1890FF;
+ }
+ }
+ }
+}
diff --git a/docs-website/src/pages/solutions/_components/Hero/index.js b/docs-website/src/pages/solutions/_components/Hero/index.js
new file mode 100644
index 0000000000000..fd50f2871b9eb
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/Hero/index.js
@@ -0,0 +1,45 @@
+import React from 'react';
+import styles from './hero.module.scss';
+import Link from "@docusaurus/Link";
+
+const Hero = ({ onOpenTourModal, heroContent }) => {
+ const { topQuote, title, description, imgSrc } = heroContent
+ return (
+
+
+
+ {topQuote}
+
+
+ {title}
+
+
{description}
+
+
+ Start with Open Source →
+
+
+
+
+
+
+
+
+ );
+};
+
+export default Hero;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/_components/Integrations/index.js b/docs-website/src/pages/solutions/_components/Integrations/index.js
new file mode 100644
index 0000000000000..77f028eb4cf74
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/Integrations/index.js
@@ -0,0 +1,54 @@
+import React, { useRef, useEffect } from "react";
+import styles from "./integrations.module.scss";
+import useBaseUrl from "@docusaurus/useBaseUrl";
+
+const Integrations = () => {
+ const integrationsPath = 'img/solutions/integrations';
+ const hasAnimatedRef = useRef(false);
+ const counterRef = useRef(null);
+ const handleScroll = () => {
+ if (hasAnimatedRef.current) return;
+ if (!counterRef.current) return;
+
+ const { top } = counterRef.current.getBoundingClientRect();
+ const windowHeight = window.innerHeight;
+
+ if (top <= windowHeight) {
+ hasAnimatedRef.current = true;
+ animateNumber();
+ }
+ };
+
+ useEffect(() => {
+ window.addEventListener('scroll', handleScroll);
+ return () => {
+ window.removeEventListener('scroll', handleScroll);
+ }
+ }, [])
+
+ return (
+
+
+ Integrates with your data stack
+
+
+
+
+
+ {[...Array(3)].map((_, i) => (
+
+ {[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].map((item, index) => (
+
+
+ ))}
+
+ ))}
+
+
+
+
+
+ );
+};
+
+export default Integrations;
diff --git a/docs-website/src/pages/solutions/_components/Integrations/integrations.module.scss b/docs-website/src/pages/solutions/_components/Integrations/integrations.module.scss
new file mode 100644
index 0000000000000..da0c6964e8775
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/Integrations/integrations.module.scss
@@ -0,0 +1,127 @@
+.container {
+ display: flex;
+ flex-direction: column;
+
+ .section_header {
+ color: var(--primitives-text-tex-subtext, #777E99);
+ text-align: center;
+ font-family: "Helvetica Neue";
+ font-size: 1.5rem;
+ font-style: normal;
+ font-weight: 400;
+ line-height: normal;
+ letter-spacing: 0.5px;
+
+ margin-bottom: 2rem;
+ position: relative;
+ padding: 0 2rem;
+ display: block;
+ width: 100%;
+
+ &:before, &:after {
+ content: " ";
+ height: 1px;
+ width: calc((100vw - 48rem)/2);
+ background: #D9DBE4;
+ display: block;
+ position: absolute;
+ top: 50%;
+ }
+
+ &:before {
+ left: 8rem;
+ }
+
+ &:after {
+ right: 8rem;
+ }
+ }
+}
+
+
+.carouselContainer {
+ width: 100%;
+ min-height: 100px;
+ min-width: 400px;
+ overflow: hidden;
+}
+@media screen and (max-width: 800px) {
+ .carouselContainer {
+ min-width: auto;
+ }
+}
+
+.slider {
+ height: 100px;
+ margin: 1rem auto;
+ overflow: hidden;
+ position: relative;
+ display: flex;
+ align-items: center;
+ width: 100%;
+
+ &::before,
+ &::after {
+ position: absolute;
+ content: '';
+ width: 90%;
+ height: 100%;
+ z-index: 9;
+ }
+
+}
+
+.slider {
+ position: relative;
+}
+
+.slide_track {
+ display: flex;
+ width: max-content;
+ animation: scroll 30s linear infinite;
+}
+
+.slide {
+ width: 100px;
+ height: 100px;
+ margin-left: 52px;
+ display: flex;
+ justify-content: space-between;
+ overflow: hidden;
+ flex-direction: column;
+ align-items: center;
+ position: relative;
+ background-size: cover;
+ background-repeat: no-repeat;
+}
+
+@keyframes scroll {
+ 0% {
+ transform: translateX(0);
+ }
+
+ 100% {
+ transform: translateX(-50%);
+ }
+}
+
+
+@keyframes slideIn {
+ 0% {
+ opacity: 0;
+ transform: translateY(20px);
+ }
+
+ 100% {
+ opacity: 1;
+ transform: translateY(0);
+ }
+}
+
+@media only screen and (max-width: 800px) {
+ .slide {
+ width: 80px;
+ height: 80px;
+ margin: auto 1rem;
+ }
+}
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/_components/IntegrationsStatic/index.js b/docs-website/src/pages/solutions/_components/IntegrationsStatic/index.js
new file mode 100644
index 0000000000000..76b99b156704e
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/IntegrationsStatic/index.js
@@ -0,0 +1,34 @@
+import React, { useRef, useEffect } from "react";
+import styles from "./integrations.module.scss";
+import useBaseUrl from "@docusaurus/useBaseUrl";
+
+const Integrations = () => {
+ const integrationsPath = 'img/solutions/integrations-observe';
+
+
+ return (
+
+
+ Integrates with your data stack
+
+
+
+
+
+ {[...Array(1)].map((_, i) => (
+
+ {[1, 2, 3, 4, 5, 6].map((item, index) => (
+
+
+ ))}
+
+ ))}
+
+
+
+
+
+ );
+};
+
+export default Integrations;
diff --git a/docs-website/src/pages/solutions/_components/IntegrationsStatic/integrations.module.scss b/docs-website/src/pages/solutions/_components/IntegrationsStatic/integrations.module.scss
new file mode 100644
index 0000000000000..aa2201fd0185c
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/IntegrationsStatic/integrations.module.scss
@@ -0,0 +1,107 @@
+.container {
+ display: flex;
+ flex-direction: column;
+
+ .section_header {
+ color: var(--primitives-text-tex-subtext, #777E99);
+ text-align: center;
+ font-family: "Helvetica Neue";
+ font-size: 1.5rem;
+ font-style: normal;
+ font-weight: 400;
+ line-height: normal;
+ letter-spacing: 0.5px;
+
+ margin-bottom: 2rem;
+ position: relative;
+ padding: 0 2rem;
+ display: block;
+ width: 100%;
+
+ &:before, &:after {
+ content: " ";
+ height: 1px;
+ width: calc((100vw - 65rem)/2);
+ background: #D9DBE4;
+ display: block;
+ position: absolute;
+ top: 50%;
+ }
+
+ &:before {
+ left: 20rem;
+ }
+
+ &:after {
+ right: 20rem;
+ }
+ }
+}
+
+
+.carouselContainer {
+ width: 100%;
+ min-height: 100px;
+ min-width: 400px;
+}
+
+.slider {
+ height: 100px;
+ margin: 1rem auto;
+ overflow: hidden;
+ position: relative;
+ display: flex;
+ align-items: center;
+ width: 100%;
+
+ &::before,
+ &::after {
+ position: absolute;
+ content: '';
+ width: 90%;
+ height: 100%;
+ z-index: 9;
+ }
+
+}
+
+.carouselContainer {
+ overflow: hidden;
+}
+
+.slider {
+ position: relative;
+}
+
+.slide_track {
+ display: flex;
+ width: max-content;
+ margin: auto;
+}
+
+.slide {
+ width: 100px;
+ height: 100px;
+ margin: auto 3rem;
+ display: flex;
+ justify-content: space-between;
+ overflow: hidden;
+ flex-direction: column;
+ align-items: center;
+ position: relative;
+ background-size: cover;
+ background-repeat: no-repeat;
+}
+
+
+@media only screen and (max-width: 800px) {
+ .slider {
+ max-width: 100vw;
+ min-width: auto;
+ }
+ .slide {
+ width: 80px;
+ height: 80px;
+ margin: auto 1rem;
+ }
+}
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/_components/Persona/index.js b/docs-website/src/pages/solutions/_components/Persona/index.js
new file mode 100644
index 0000000000000..d377de28525bd
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/Persona/index.js
@@ -0,0 +1,46 @@
+import React from "react";
+import styles from "./styles.module.scss";
+import clsx from "clsx";
+
+const Persona = ({ personaContent }) => {
+ const { title, personas } = personaContent;
+
+ return (
+
+
+
{title}
+
+
+
+
+ {personas.map((persona, index) => (
+
+
+
+
+
+
+
+
+
+
+
+ ))}
+
+
+
+
+
+
+
+
+ );
+};
+
+const FeatureItem = ({ text }) => (
+
+ {text}
+
+);
+
+export default Persona;
diff --git a/docs-website/src/pages/solutions/_components/Persona/styles.module.scss b/docs-website/src/pages/solutions/_components/Persona/styles.module.scss
new file mode 100644
index 0000000000000..3568cc2b75611
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/Persona/styles.module.scss
@@ -0,0 +1,163 @@
+.container {
+ padding: 5rem;
+ padding-bottom: 0;
+ display: flex;
+ position: relative;
+ justify-content: center;
+ align-items: center;
+ background: linear-gradient(30deg, #EBEBEB 15.52%, #FFF 85.84%);
+}
+
+.personas {
+ width: 100%;
+}
+
+.persona_heading {
+ color: var(--primitives-text-tex-subtext, #777E99);
+ text-align: center;
+ font-family: Manrope;
+ font-size: 2rem;
+ font-style: normal;
+ font-weight: 500;
+ line-height: normal;
+ letter-spacing: 0.34px;
+ margin-bottom: 2rem;
+ position: relative;
+ padding: 0 2rem;
+ display: block;
+ width: 100%;
+
+ &:before, &:after {
+ content: " ";
+ height: 2px;
+ width: calc((100vw - 1500px)/2);
+ background: #D9DBE4;
+ display: block;
+ position: absolute;
+ top: 50%;
+ }
+
+ &:before {
+ left: 8rem;
+ }
+
+ &:after {
+ right: 8rem;
+ }
+}
+
+.persona_row {
+ display: flex;
+ justify-content: center;
+ gap: 50px;
+ position: relative;
+ padding-top: 20px; /* Add some padding to give space above the line */
+}
+
+.persona_row_mobile {
+ display: none;
+}
+
+.persona_row_wrapper {
+ display: flex;
+ justify-content: center;
+ align-items: center;
+ background: inherit; /* Inherit the container's gradient */
+}
+
+.persona {
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+ min-width: 365px;
+ margin: 2rem;
+ margin-bottom: 0;
+ height: 100%;
+ background: inherit; /* Allow persona card to have gradient */
+}
+
+.persona_img {
+ margin-bottom: 12px;
+ z-index: 1;
+}
+
+.persona_img img {
+ height: 200px;
+}
+
+.features {
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+}
+
+.featureItem {
+ display: flex;
+ padding: 0.625rem;
+ border-radius: 0.375rem;
+ background: white;
+ opacity: 0.8;
+ color: var(--primitives-text-tex-subtext, #777E99);
+ font-family: Manrope;
+ font-size: 1rem;
+ font-style: normal;
+ font-weight: 400;
+ line-height: normal;
+ letter-spacing: 0.01rem;
+ text-transform: capitalize;
+ margin: 0.5rem;
+ justify-content: center;
+ width: max-content;
+}
+
+.card_gradient {
+ position: absolute;
+ right: 0;
+ left: 0;
+ bottom: 0;
+ height: 40%;
+ background: linear-gradient(to top, #EBEBEB 15.52%, transparent);
+}
+
+.persona_bg_line {
+ width: 100%;
+ max-width: 900px;
+ height: 0;
+ border-bottom: 1px dashed #aaa;
+ background-size: contain;
+ position: absolute;
+ top: 28%;
+ margin: auto;
+}
+
+
+@media (max-width: 768px) {
+ .container {
+ padding: 4rem 2rem;
+ max-width: 100vw;
+ }
+
+ .persona_heading {
+ font-size: 1.4rem;
+ }
+ .persona_row {
+ display: none !important;
+ }
+ .persona_row_mobile {
+ display: flex !important;
+ flex-direction: column;
+ align-items: center;
+ }
+
+ .persona_row_wrapper {
+ display: block !important;
+ }
+
+ .persona {
+ margin: 3rem auto;
+ }
+
+ .card_gradient {
+ height: 15%;
+ }
+}
diff --git a/docs-website/src/pages/solutions/_components/QuickstartContent/index.js b/docs-website/src/pages/solutions/_components/QuickstartContent/index.js
new file mode 100644
index 0000000000000..422a7b16cc216
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/QuickstartContent/index.js
@@ -0,0 +1,63 @@
+import React, { useEffect, useRef, useState } from "react";
+import clsx from "clsx";
+import useBaseUrl from "@docusaurus/useBaseUrl";
+import styles from "./quickstartcontent.module.scss";
+import { motion, useScroll, useTransform} from 'framer-motion';
+
+const QuickstartContent = ({ quickstartContent }) => {
+ const scrollableElement = useRef(null)
+ const { scrollYProgress } = useScroll({
+ target: scrollableElement,
+ offset: ["start end", "end end"]
+ })
+ const scaleBar = useTransform(scrollYProgress, [0, 0.2, .9, 1], [0, 0, .8, 1]);
+ const opacityBar = useTransform(scrollYProgress, [0, 0.2, 0.4], [0, 0, 1]);
+
+ return (
+
+
+
+ {quickstartContent.map((data, idx) => (
+
+
+
+
+
+
+ ))}
+
+
+ );
+};
+
+export default QuickstartContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/_components/QuickstartContent/quickstartcontent.module.scss b/docs-website/src/pages/solutions/_components/QuickstartContent/quickstartcontent.module.scss
new file mode 100644
index 0000000000000..bf9369929112a
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/QuickstartContent/quickstartcontent.module.scss
@@ -0,0 +1,118 @@
+@media only screen and (max-width: 800px) {
+ .quickstart {
+ margin: 6rem auto!important;
+ width: 95vw!important;
+ }
+}
+.quickstart {
+ width: 80vw;
+ margin: 12rem auto;
+ display: flex;
+ flex-direction: column;
+
+ :global {
+
+ .quickstart__bar {
+ width: 4px;
+ height: 100%;
+ position: absolute;
+
+ background: linear-gradient(180deg, #546167 0.71%, #58595f 95.37%, #69cfff 100%);
+ border-radius: 10px;
+ transform-origin: top;
+ // transition: transform 0.3s;
+ // animation: progress 0.3s linear;
+
+ /* Inside auto layout */
+ display: block;
+ }
+
+ .quickstart__container {
+ position: relative;
+ }
+
+ .quickstart__content {
+ display: flex;
+ margin: 3rem;
+ width: 100%;
+
+ .quickstart__text {
+ width: 60%;
+ min-width: 600px;
+ padding-right: 2rem;
+ display: flex;
+ justify-content: center;
+ flex-direction: column;
+
+ div {
+ padding-left: 1rem;
+ }
+
+ .quickstart__text__head {
+
+ /* H4 | Semibold */
+ font-family: 'Manrope';
+ font-style: normal;
+ font-weight: 400;
+ font-size: 2rem;
+ line-height: 2.5rem;
+
+ color: #171B2B;
+ margin-top: 1rem;
+ }
+
+ }
+
+ .quickstart__img {
+ display: flex;
+ align-items: flex-start;
+ justify-content: center;
+ width: 20%;
+
+ img {
+ width: 2rem;
+ min-width: 100px;
+ }
+ }
+ }
+
+
+ @media only screen and (max-width: 800px) {
+ .quickstart__bar {
+ display: none;
+ }
+
+ .quickstart__container {
+ max-width: 95% !important;
+ }
+
+ .quickstart__content {
+ display: flex;
+ margin: 2rem;
+
+ .quickstart__text {
+ min-width: 0;
+ width: 100%;
+ padding-left: 40px;
+ padding-right: 0;
+
+ div {
+ padding-left: 0;
+ }
+ .quickstart__text__head {
+ font-size: 1.25rem;
+ font-weight: 400;
+ line-height: 1.75rem;
+ margin: 0 !important;
+ padding-right: 1.5rem;
+ }
+ }
+ .quickstart__img {
+ display: flex;
+ min-width: 50px;
+ margin: auto;
+ }
+ }
+ }
+ }
+}
diff --git a/docs-website/src/pages/solutions/_components/SlidingTabs/index.js b/docs-website/src/pages/solutions/_components/SlidingTabs/index.js
new file mode 100644
index 0000000000000..2dca5f63ae765
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/SlidingTabs/index.js
@@ -0,0 +1,69 @@
+import React, { useState } from 'react';
+import styles from './styles.module.scss';
+import clsx from 'clsx';
+
+const TabbedComponent = () => {
+ const [activeTab, setActiveTab] = useState(0);
+
+ const tabs = [
+ {
+ title: 'Deploy with enterprise-grade security',
+ description: 'Acryl Observe deploys and runs in your own VPC, offering pre-configured support for advanced networking features—like AWS PrivateLink, or Private Google Access—to facilitate secure, private connectivity to cloud services. Plus, both Observe and Acryl Cloud are certified to meet rigorous compliance and security standards, like SOC 2.',
+ icon: "/img/solutions/lock.png",
+ },
+ {
+ title: 'Scale from Zero to Infinity',
+ description: 'Acryl Observe is built for any scale. Leveraging the power of Acryl Cloud, Observe can scale to support data warehouses with petabytes of data in tens of thousands of tables—and tens of billions of rows. And because it’s a fully managed SaaS offering, it’s also ideal for small organizations still building out their data ecosystems.',
+ icon: "/img/solutions/rocket-launch.png",
+ },
+ {
+ title: 'Reduce tool clutter and operational burden',
+ description: 'Simplify your stack. Avoid duplication across tools by unifying data discovery, data governance, and data quality into one central tool. Skip spending countless engineering hours maintaining inaccessible, code-first data quality frameworks',
+ icon: "/img/solutions/communities.png",
+ },
+ {
+ title: 'Reduce the risk of vendor lock-in',
+ description: 'Get the benefits of open source in a fully managed, limitlessly scalable SaaS offering. Acryl Observe and Acryl Cloud are built on top of the DataHub Project, proven open-source technology with an active, thriving community of contributors and users. Customers get 100% compatibility with open-source DataHub, plus regular updates and improvements, source code transparency, community-based support, proven security, and protection against vendor lock-in.',
+ icon: "/img/solutions/water-lock.png",
+ }
+ ];
+
+ return (
+
+
+
+
+ Secure. Scalable. Simple. Open.
+
+
+ {tabs.map((tab, index) => (
+
+
+
setActiveTab(index)}
+ >
+
+
+ {tab.title}
+
+
+ {activeTab === index && (
+
+ {tab.description}
+
+ )}
+
+
+ ))}
+
+
+
+
+
+ );
+};
+
+export default TabbedComponent;
diff --git a/docs-website/src/pages/solutions/_components/SlidingTabs/styles.module.scss b/docs-website/src/pages/solutions/_components/SlidingTabs/styles.module.scss
new file mode 100644
index 0000000000000..982234a2b721e
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/SlidingTabs/styles.module.scss
@@ -0,0 +1,171 @@
+.tabbedComponent {
+ padding-top: 48px;
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+}
+
+.leftSection {
+ padding-left: 4rem;
+}
+
+.title {
+ color: #373A47;
+ font-family: Manrope;
+ font-size: 3.6rem;
+ font-style: normal;
+ font-weight: 500;
+ line-height: 4.5rem;
+ letter-spacing: -0.18rem;
+ text-align: left;
+ margin-bottom: 3rem;
+
+ .titleBlue {
+ color: var(--semantics-border-default, #1890FF);
+ }
+}
+
+.container {
+ display: flex;
+ flex-direction: column; // Changed to column for mobile view
+ background: white;
+ overflow: hidden;
+ width: 100vw;
+}
+
+.tabs {
+ display: flex;
+ flex-direction: column;
+ justify-content: flex-start;
+ text-align: left !important;
+}
+
+.tab {
+ align-items: center;
+ margin: 0.5rem 0;
+ position: relative;
+
+ &.activeTab {
+ border-left: 3px solid #4C49E4;
+ .tabTitle {
+ color: black !important;
+ }
+ .icon {
+ filter: brightness(0);
+ }
+
+ }
+
+ .tabButton {
+ padding: 0rem 1rem 1rem 1rem;
+ background: none;
+ border: none;
+ cursor: pointer;
+ display: flex;
+ align-items: center;
+ width: 100%;
+ justify-content: left;
+ text-align: left;
+
+ .tabTitle {
+ color: var(--primitives-text-tex-subtext, #777E99);
+ font-family: Manrope;
+ font-size: 1.45rem;
+ font-style: normal;
+ font-weight: 600;
+ line-height: normal;
+ padding-left: 1rem;
+ transition: all .3s;
+ }
+ .icon {
+ transition: all .3s;
+ }
+ &:hover {
+ .tabTitle {
+ color: black;
+ }
+ .icon {
+ filter: brightness(0);
+ }
+ }
+ }
+
+ .dropdown {
+ background-color: #ffffff;
+ margin-top: 5px;
+ padding: 0rem 1.5rem 0.5rem 1.5rem;
+ color: #777E99;
+ font-family: Manrope;
+ font-size: 1.25rem;
+ font-style: normal;
+ font-weight: 500;
+ line-height: 2rem; /* 160% */
+ }
+}
+
+.imageContainer {
+ justify-content: right;
+ background-color: transparent;
+ margin: 1rem 0;
+ height: 520px;
+ align-self: center;
+ width: 40%;
+ border-radius: 24px;
+ display: flex;
+ flex-grow: 1;
+}
+
+.tabImage {
+ width: 100%;
+ height: 100%;
+ display: flex;
+ background-size: contain;
+ background-repeat: no-repeat;
+ background-position: right center;
+}
+
+
+
+@media (min-width: 799px) {
+ .container {
+ flex-direction: row; // Change back to row for larger screens
+ padding: 40px 0px;
+ }
+ .tabs {
+ width: 800px;
+ }
+
+ .imageContainer {
+ margin: 1rem 0rem 1rem 1rem;
+ }
+}
+
+@media (max-width: 800px) {
+ .title {
+ font-size: 2.5rem;
+ line-height: 3rem;
+ }
+ .tabButton {
+ .icon {
+ height: 24px;
+ width: 24px;
+ }
+ .tabTitle {
+ font-size: 1rem!important;
+ padding-left: .25rem!important;
+ }
+ }
+ .dropdown {
+ font-size: .9rem!important;
+ line-height: 1.5rem!important;
+ }
+
+ .imageContainer {
+ display: none !important;
+ }
+
+ .leftSection {
+ padding: 2rem;
+ }
+
+}
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/_components/Testimonials/index.js b/docs-website/src/pages/solutions/_components/Testimonials/index.js
new file mode 100644
index 0000000000000..54aaca0ebb5f8
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/Testimonials/index.js
@@ -0,0 +1,29 @@
+import React, { useEffect, useRef, useState } from "react";
+import clsx from "clsx";
+import useBaseUrl from "@docusaurus/useBaseUrl";
+import styles from "./styles.module.scss";
+
+const Testimonials = ({ testimonialsData }) => {
+ const { title, feature1, feature2, feature1Link, feature2Link, imgSrc } = testimonialsData;
+ return (
+
+
+
+
+
+ {title}
+
+
+ Seamlessly integrated with DataHub Cloud's{feature1} and {feature2} solutions.
+
+
+
+
+
+
+
+
+ );
+};
+
+export default Testimonials;
diff --git a/docs-website/src/pages/solutions/_components/Testimonials/styles.module.scss b/docs-website/src/pages/solutions/_components/Testimonials/styles.module.scss
new file mode 100644
index 0000000000000..ce49d7b4f6fa1
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/Testimonials/styles.module.scss
@@ -0,0 +1,99 @@
+.testimonials {
+ background: linear-gradient(90deg, #F8F9F9 0%, #E9EAEC 100%);
+
+ :global {
+ .testimonials__content {
+ width: 80vw;
+ max-width: 1200px;
+ margin: 0 auto;
+ position: relative;
+ padding: 4.5rem 0;
+
+ .testimonials__card {
+ margin: 0rem 4rem;
+ display: flex;
+
+ .testimonials__logo {
+ min-width: 100px;
+ margin: 2rem auto;
+ img {
+ max-width: 100%;
+ }
+ }
+
+ .testimonials__text {
+ width: 94%;
+ padding-left: 2rem;
+ color: #2e2e38;
+
+ .testimonials__quote_title {
+ font-family: "Manrope";
+ font-style: normal;
+ font-weight: 500;
+ font-size: 3.5rem;
+ line-height: 120%;
+ position: relative;
+ margin: 1.8rem auto;
+ }
+
+ .testimonials__quote_description {
+ font-size: 1.8rem;
+ margin-top: 0.5rem;
+ color: #656c77;
+ font-weight: 300;
+ line-height: normal;
+ }
+
+ .testimonials__quote_black {
+ color: #2E2E38;
+
+ &:hover {
+ text-decoration: none;
+ opacity: 0.8;
+ cursor: pointer;
+ }
+ }
+ }
+ }
+ }
+
+ @media only screen and (max-width: 800px) {
+ .testimonials__content {
+ width: 100vw;
+ padding: 2rem 0;
+ text-align: center;
+
+ .testimonials__card {
+ flex-direction: column;
+ margin: 2rem 1rem;
+
+ .testimonials__logo {
+ max-width: 40px;
+ margin-bottom: 16px;
+ }
+
+ .testimonials__text {
+ width: 100%;
+ padding-left: 0;
+ max-width: 100%;
+ .testimonials__quote_title {
+ font-size: 2.25rem;
+ }
+ .testimonials__quote_description {
+ font-size: 1.5rem;
+ }
+ .testimonials__quote {
+ font-size: 1.1rem;
+ line-height: 1.75rem;
+ }
+ .testimonials__company {
+ font-size: 1rem;
+ }
+ }
+ }
+ }
+
+ }
+ }
+ }
+
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/_components/Tiles/index.js b/docs-website/src/pages/solutions/_components/Tiles/index.js
new file mode 100644
index 0000000000000..3b087e97ae20b
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/Tiles/index.js
@@ -0,0 +1,80 @@
+import React from "react";
+import styles from "./tiles.module.scss";
+import useBaseUrl from "@docusaurus/useBaseUrl";
+import clsx from "clsx";
+
+const Tiles = ({ tilesContent }) => {
+ const { title, theme, tileItems } = tilesContent;
+
+ const sectionThemeClass = theme === "dark" ? styles.darkSection : styles.lightSection;
+ const itemThemeClass = theme === "dark" ? styles.darkItem : styles.lightItem;
+ const diagramItemThemeClass = theme === "dark" ? styles.darkDiagramItem : styles.lightDiagramItem;
+
+ return (
+
+
+
+
+
+
+ {tileItems.map((item, index) => (
+
+ {index % 2 !== 0 ? (
+ <>
+
+
+
+
+
+
{item.title}
+
{item.subtitle}
+
+
+ >
+ ) : (
+ <>
+
+
+
{item.title}
+
{item.subtitle}
+
+
+
+
+
+ >
+ )}
+
+ ))}
+
+
+
+
+
+ );
+};
+
+export default Tiles;
diff --git a/docs-website/src/pages/solutions/_components/Tiles/tiles.module.scss b/docs-website/src/pages/solutions/_components/Tiles/tiles.module.scss
new file mode 100644
index 0000000000000..a71b0b8445541
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/Tiles/tiles.module.scss
@@ -0,0 +1,203 @@
+.lightSection {
+ background: #FAFAFA;
+}
+
+.darkSection {
+ background: white;
+}
+
+.ecosystem_section {
+ padding: 4vh 0;
+ padding-bottom: 6vh;
+ display: flex;
+ justify-content: center;
+
+ .ecosystem_section_content {
+ width: 70%;
+ height: 100%;
+ display: flex;
+ flex-direction: column;
+ justify-content: center;
+ padding: 2rem 0rem;
+ text-align: left;
+
+ .ecosystem_section_upper_content {
+ padding-top: 1rem;
+ padding-left: 0;
+ display: flex;
+ justify-content: flex-start;
+
+ .ecosystem_section_heading {
+ line-height: 4rem;
+ font-family: Manrope;
+ font-family: Manrope;
+ font-size: 3.25rem;
+ font-style: normal;
+ font-weight: 500;
+ line-height: normal;
+ mix-blend-mode: luminosity;
+ width: 100%;
+ }
+ }
+
+ .ecosystem_section_lower_content {
+ margin-top: 48px;
+ display: flex;
+ flex-direction: row;
+ justify-content: center;
+
+ .itemWrappers {
+ display: flex;
+ flex-direction: column;
+ align-self: stretch;
+
+ .itemWrapper {
+ display: flex;
+ padding: 4rem 0px;
+ width: 100%;
+ align-items: center;
+ justify-content: space-between;
+ flex-direction: row;
+ flex-wrap: nowrap;
+ }
+
+ .alternate {
+ .item {
+ margin-right: 2rem;
+ margin-left: 0;
+ }
+ }
+
+ .item {
+ display: flex;
+ justify-content: center;
+ align-items: flex-start;
+ padding: 0;
+ margin: 0;
+ margin-left: 4rem;
+ flex-grow: 1;
+ max-width: 520px;
+
+ .item__title {
+ font-family: Manrope;
+ font-size: 2.25rem;
+ font-style: normal;
+ font-weight: 500;
+ line-height: normal;
+ margin-bottom: 1rem;
+ }
+
+ .item__subtitle {
+ font-family: Manrope;
+ font-size: 1.25rem;
+ font-style: normal;
+ font-weight: 400;
+ line-height: normal;
+ color: #777E99;
+ }
+ }
+
+ .diagramItem {
+ display: flex;
+ overflow: hidden;
+ height: 400px;
+ min-width: 400px;
+ max-width: 400px;
+ justify-content: flex-end;
+ align-items: center;
+ border-radius: var(--number-scales-2s-20, 32px);
+ border: 0.5px solid #1890FF;
+ background: #FAFAFA;
+ }
+ }
+ }
+
+ .item, .diagramItem {
+ margin: 8px;
+ }
+ }
+}
+@media only screen and (max-width: 800px) {
+ .ecosystem_section {
+ padding: 1rem 0;
+ .ecosystem_section_content {
+ width: 90%;
+ min-width: 0;
+ height: auto !important;
+ padding-bottom: 48px;
+ padding-top: 48px;
+
+ .ecosystem_section_upper_content {
+ height: auto !important;
+
+ .ecosystem_section_heading {
+ font-size: 1.75rem;
+ line-height: 2.25rem;
+ font-weight: 600;
+ text-align: center;
+ width: 90%;
+ margin: auto;
+ }
+ }
+
+ .ecosystem_section_lower_content {
+ height: auto;
+ margin-top: 8px;
+
+ .diagramItem {
+ width: 100%;
+ height: auto;
+ border: none;
+ }
+ .itemWrappers {
+ .itemWrapper {
+ flex-direction: column-reverse;
+ &.alternate {
+ flex-direction: column;
+ }
+ width: 95vw;
+ padding: 2rem 0;
+ align-items: center;
+ margin: auto;
+
+ }
+
+ .diagramItem {
+ height: auto;
+ width: 95vw;
+ aspect-ratio: 1;
+ min-width: auto;
+ margin: auto;
+ }
+
+ .item {
+ text-align: center;
+ padding : 1rem;
+ margin: 2rem 0!important;
+
+ .item__title {
+ font-size: 1.5rem;
+ }
+ .item__subtitle {
+ font-size: 1rem;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+.darkDiagramItem {
+ background: linear-gradient(180deg, #F5EFF8 0%, #F1F4F4 100%);
+}
+
+.lightDiagramItem {
+ background: linear-gradient(37deg, rgba(238, 240, 242, 0.90) 2.81%, rgba(253, 249, 241, 0.90) 107.71%);
+}
+
+.diagramItem__img {
+ overflow: hidden;
+ width: 100%;
+ height: auto;
+}
diff --git a/docs-website/src/pages/solutions/_components/Trials/index.js b/docs-website/src/pages/solutions/_components/Trials/index.js
new file mode 100644
index 0000000000000..ecca6685810b8
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/Trials/index.js
@@ -0,0 +1,79 @@
+import React from "react";
+import styles from "./styles.module.scss";
+import useBaseUrl from "@docusaurus/useBaseUrl";
+import clsx from "clsx";
+import Link from "@docusaurus/Link";
+
+const Trials = ({ onOpenTourModal, trialsContent }) => {
+ const { title, trialsCardItems } = trialsContent;
+
+ return (
+
+
+
+
+
Discover. Observe. Govern.
+
{title}
+
+
+ Get started with Open Source →
+
+
+
+
+
+
+
+
+ {trialsCardItems.slice(0, 2).map((item, index) => (
+
+
+ {item.title.split("\n").map((line, idx) => (
+
+ {line}
+
+
+ ))}
+
+ ))}
+
+
+ {trialsCardItems.slice(2).map((item, index) => (
+
+
+ {item.title.split("\n").map((line, idx) => (
+
+ {line}
+
+
+ ))}
+
+ ))}
+
+
+
+
+
+ );
+};
+
+export default Trials;
diff --git a/docs-website/src/pages/solutions/_components/Trials/styles.module.scss b/docs-website/src/pages/solutions/_components/Trials/styles.module.scss
new file mode 100644
index 0000000000000..9a991c4af36ab
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/Trials/styles.module.scss
@@ -0,0 +1,259 @@
+.container {
+ display: flex;
+ flex-direction: column;
+ background: #fafafa;
+ width: 80vw;
+ min-width: 900px;
+ max-width: 1200px;
+ margin: 0 auto;
+}
+.trial {
+ height: 600px;
+ background: white;
+ display: flex;
+ justify-content: space-between;
+
+ .trial_left {
+ width: 55%;
+ height: 100%;
+ display: flex;
+ justify-content: center;
+ align-items: center;
+
+ .left_content {
+ flex-grow: 1;
+ padding-right: 24px;
+
+ .trial_title {
+ font-weight: 300;
+ }
+
+ span {
+ color: #8088a3;
+ font-size: 1.5rem;
+ font-weight: 400;
+ }
+ p {
+ color: #2e2e38;
+ font-size: 3rem;
+ font-weight: 400;
+ line-height: normal;
+ }
+ .btn_div {
+ display: flex;
+ gap: 1rem;
+ margin-bottom: 1rem;
+
+ a:first-child {
+ cursor: pointer;
+ text-decoration: none;
+ display: inline-block;
+ font-size: 1rem;
+ background-color: #1890ff;
+ padding: 4px 20px;
+ border-radius: 50px;
+ margin: 0 0 0 0;
+ color: white;
+ transition: opacity .2s ease-in-out;
+ &:hover {
+ opacity: .9;
+ }
+ }
+
+ a:nth-child(2) {
+ cursor: pointer;
+ text-decoration: none;
+ display: inline-block;
+ font-size: 1rem;
+ padding: 4px 20px;
+ border-radius: 50px;
+ margin: 0 0 0 0;
+ background-color: transparent;
+ color: #1890ff;
+ border: 1px solid #1890ff;
+ transition: background-color .2s ease-in-out;
+ &:hover {
+ background-color: #1890ff1A;
+ }
+ }
+ }
+
+ .start_arrow {
+ margin-top: .5rem;
+ display: inline-block;
+ font-size: 1.1rem;
+ color: #1890ff;
+ font-weight: 500;
+ cursor: pointer;
+ text-decoration: none;
+ }
+ }
+ }
+ .trial_right {
+ width: 45%;
+ max-width: 520px;
+ height: 100%;
+ position: relative;
+ overflow: hidden;
+
+ .gradientTop, .gradientBottom {
+ position: absolute;
+ background: linear-gradient(#FFFFFF, #FFFFFF00);
+ height: 100px;
+ width: 100%;
+ z-index: 10;
+ }
+ .gradientBottom {
+ transform: rotate(180deg);
+ bottom: 0;
+ }
+
+ .right_content {
+ height: 100%;
+ display: flex;
+ gap: 2rem;
+ .right_l {
+ width: 50%;
+ height: 95%;
+ margin-top: -.5rem;
+ display: flex;
+ flex-direction: column;
+ gap: 1rem;
+
+ div {
+ font-size: 1.25rem;
+ font-weight: 400;
+ color: #000;
+ line-height: 1.5rem;
+ img {
+ margin-bottom: 8px;
+ }
+ }
+
+ div:first-child {
+ display: flex;
+ flex-direction: column;
+ justify-content: center;
+ padding-left: 2rem;
+ padding-right: 1.5rem;
+ height: 49%;
+ border-radius: 35px;
+ background-color: #fff;
+ border: 1px solid #ddd;
+ // background-image: linear-gradient(to bottom, #FC526333, #FC5263);
+ }
+
+ div:nth-child(2) {
+ display: flex;
+ flex-direction: column;
+ justify-content: center;
+ padding-left: 2rem;
+ padding-right: 1.5rem;
+ background-color: #fff;
+ border: 1px solid #ddd;
+ // background-image: linear-gradient(
+ // to bottom,
+ // #77B750,
+ // #77B75080
+ // );
+ height: 49%;
+ border-radius: 35px;
+ }
+ }
+ .right_r {
+ width: 50%;
+ height: calc(100% - 2rem);
+ margin-top: 2.5rem;
+ display: flex;
+ flex-direction: column;
+ gap: 1rem;
+
+ div {
+ font-size: 1.25rem;
+ font-weight: 400;
+ color: #000;
+ line-height: 1.5rem;
+ img {
+ margin-bottom: 8px;
+ }
+ }
+
+ div:first-child {
+ display: flex;
+ flex-direction: column;
+ justify-content: center;
+ padding-left: 2rem;
+ padding-right: 1.5rem;
+ // background-image: linear-gradient(to bottom, #1890FF80, #1890FF);
+ background-color: #fff;
+ border: 1px solid #ddd;
+ height: 49%;
+ border-radius: 35px;
+ }
+
+ div:nth-child(2) {
+ display: flex;
+ flex-direction: column;
+ justify-content: center;
+ padding-left: 2rem;
+ padding-right: 1.5rem;
+ background-color: #fff;
+ border: 1px solid #ddd;
+ // background-image: linear-gradient(
+ // to bottom,
+ // #EFB300,
+ // #EFB30033
+ // );
+ height: 49%;
+ border-radius: 35px;
+ }
+ }
+ }
+ }
+}
+
+// Responsiveness
+@media (max-width: 800px) {
+
+ .container {
+ flex-direction: column;
+ width: 90vw;
+ min-width: 0;
+ margin: 4rem auto;
+ }
+ .trial {
+ flex-direction: column;
+ justify-content: flex-start;
+ height: auto;
+ .trial_left {
+ width: 100%;
+ .left_content {
+ span {
+ font-size: 1.3rem;
+ }
+ p {
+ font-size: 2.5rem;
+ }
+ }
+ .start_arrow {
+ margin-top: 0;
+ margin-bottom: 1rem;
+ }
+ }
+ .trial_right {
+ width: 100%;
+ max-width: none;
+ height: 500px;
+ margin: auto;
+ }
+ .right_content {
+ .right_l, .right_r {
+ div {
+ font-size: .9rem!important;
+ line-height: 1.2rem !important;
+ color: #333!important;
+ }
+ }
+ }
+ }
+}
diff --git a/docs-website/src/pages/solutions/_components/UnifiedTabs/index.js b/docs-website/src/pages/solutions/_components/UnifiedTabs/index.js
new file mode 100644
index 0000000000000..0ccf0501670a3
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/UnifiedTabs/index.js
@@ -0,0 +1,42 @@
+import React, { useState } from 'react';
+import styles from './styles.module.scss';
+import clsx from 'clsx';
+import useBaseUrl from '@docusaurus/useBaseUrl';
+
+const TabbedComponent = ({ unifiedTabsData }) => {
+ const [activeTab, setActiveTab] = useState(0);
+
+ return (
+
+
+
+ {unifiedTabsData.map((tab, index) => (
+
setActiveTab(index)}
+ >
+
+ {tab.tabName}
+
+
+ ))}
+
+
+
+
+
{unifiedTabsData[activeTab].title}
+
{unifiedTabsData[activeTab].description}
+
+
+
+
+
+
+
+
+
+ );
+};
+
+export default TabbedComponent;
diff --git a/docs-website/src/pages/solutions/_components/UnifiedTabs/styles.module.scss b/docs-website/src/pages/solutions/_components/UnifiedTabs/styles.module.scss
new file mode 100644
index 0000000000000..a3a1a455af3d3
--- /dev/null
+++ b/docs-website/src/pages/solutions/_components/UnifiedTabs/styles.module.scss
@@ -0,0 +1,124 @@
+.tabbedComponent {
+ text-align: left;
+ padding: 20px;
+ padding-top: 48px;
+ padding-bottom: 48px;
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+}
+
+.tabsContainer {
+ width: 100%; // Ensure full width
+ max-width: 1200px; // Limit to the same max-width as content
+ margin-bottom: 1rem;
+ display: flex;
+}
+
+.tabs {
+ display: flex;
+ justify-content: space-between; // Space out tabs across the container
+ width: 100%; // Ensure tabs take full width
+}
+
+.tabButton {
+ flex: 1;
+ padding: 0.8rem 1.5rem;
+ background: none;
+ border: none;
+ cursor: pointer;
+ font-size: 1.5rem;
+ font-weight: 500;
+ color: #777e99;
+ text-align: center;
+
+
+ .tabButtonText {
+ width: min-content;
+ margin: auto;
+ transition: color 0.2s, border-bottom 0.2s;
+ }
+ .tabButtonText:hover, .active {
+ color: #1890ff;
+ border-bottom: 3px solid #1890ff;
+ }
+}
+
+.container {
+ display: flex;
+ flex-direction: row;
+ background: white;
+ max-width: 1200px; // Same max-width as tabs
+ width: 100%; // Full width to align with tabs
+ overflow: hidden;
+}
+
+.tabContent {
+ flex: 1;
+ padding: 2rem;
+ margin: auto;
+}
+
+.tabTitle {
+ font-size: 2.5rem;
+ font-weight: 500;
+ color: #373A47;
+ line-height: 120%;
+ margin-bottom: 1rem;
+}
+
+.tabTitle.active {
+ color: #1890ff;
+ text-decoration: underline;
+}
+
+.tabDescription {
+ font-size: 1.4rem;
+ color: #777e99;
+ line-height: 2rem;
+ font-weight: 300;
+}
+
+.imageContainer {
+ display: flex;
+ justify-content: center;
+ align-items: center;
+ margin: 1rem 0;
+ width: 30rem;
+}
+
+.tabImage {
+ width: 100%;
+ height: 100%;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ box-shadow: 0px 2px 4px 0px #0000001C;
+ border-radius: 2.2rem;
+}
+
+@media (min-width: 768px) {
+ .container {
+ flex-direction: row;
+ padding: 40px 32px;
+ }
+
+ .imageContainer {
+ margin: 1rem;
+ }
+}
+
+@media (max-width: 768px) {
+ .container {
+ display: block;
+ }
+
+ .imageContainer {
+ margin: auto;
+ max-width: 100%;
+ }
+
+ .tabTitle {
+ font-size: 1.8rem;
+ }
+}
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/discovery/_content/discoveryCaseStudyContent.js b/docs-website/src/pages/solutions/discovery/_content/discoveryCaseStudyContent.js
new file mode 100644
index 0000000000000..95bbfde0bd12a
--- /dev/null
+++ b/docs-website/src/pages/solutions/discovery/_content/discoveryCaseStudyContent.js
@@ -0,0 +1,26 @@
+const caseStudyContent = {
+ title: "See how industry leaders are using DataHub for Discovery today.",
+ backgroundColor: "#F3F3F6",
+ items: [
+ {
+ imgSrc: "/img/solutions/logo-notion.png",
+ title: "How Notion Used DataHub to harness their sprawling data.",
+ link: "https://www.notion.so/blog/a-brief-history-of-notions-data-catalog",
+ alt: "notion"
+ },
+ {
+ imgSrc: "/img/solutions/logo-myob.png",
+ title: "How MYOB eliminated breaking changes with DataHub.",
+ link: "/adoption-stories/#myob",
+ alt: "MYOB",
+ },
+ {
+ imgSrc: "/img/solutions/logo-dpg-media.png",
+ title: "How Acryl Data Helped DPG Media Save 25% Per Mo in Snowflake.",
+ link: "/adoption-stories/#dpg-media",
+ alt: "DPG Media",
+ }
+ ]
+ };
+
+export default caseStudyContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/discovery/_content/discoveryHeroContent.js b/docs-website/src/pages/solutions/discovery/_content/discoveryHeroContent.js
new file mode 100644
index 0000000000000..085e94085126b
--- /dev/null
+++ b/docs-website/src/pages/solutions/discovery/_content/discoveryHeroContent.js
@@ -0,0 +1,8 @@
+const heroContent = {
+ topQuote: "Discovery in DATAHUB",
+ title: "Make data \n\n democratization a reality",
+ description: "Enable everyone in your organization to effortlessly discover trustworthy data, tailor experiences for each persona, eliminate breaking changes with lineage, and build confidence in your data with a unified view of business and technical context.",
+ imgSrc: "/img/solutions/hero-discovery.png",
+ };
+
+ export default heroContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/discovery/_content/discoveryQuickstartContent.js b/docs-website/src/pages/solutions/discovery/_content/discoveryQuickstartContent.js
new file mode 100644
index 0000000000000..e1c6a84512fd5
--- /dev/null
+++ b/docs-website/src/pages/solutions/discovery/_content/discoveryQuickstartContent.js
@@ -0,0 +1,16 @@
+const quickstartData = [
+ {
+ title: "Where can I find our quarterly revenue reporting?",
+ image: "/img/solutions/icon-revenue.png",
+ },
+ {
+ title: "This metric looks wrong. How was it calculated?",
+ image: "/img/solutions/icon-metric.png",
+ },
+ {
+ title: "What reports will be impacted during a data migration?",
+ image: "/img/solutions/icon-migration.png",
+ }
+];
+
+export default quickstartData;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/discovery/_content/discoveryTestimonialsContent.js b/docs-website/src/pages/solutions/discovery/_content/discoveryTestimonialsContent.js
new file mode 100644
index 0000000000000..e9c72175ffcb7
--- /dev/null
+++ b/docs-website/src/pages/solutions/discovery/_content/discoveryTestimonialsContent.js
@@ -0,0 +1,10 @@
+const testimonialsData = {
+ title: "Enter end-to-end Data Discovery.",
+ feature1: "Data Observability",
+ feature1Link: "/solutions/observability",
+ feature2: "Governance",
+ feature2Link: "/solutions/governance",
+ imgSrc: "/img/solutions/discovery-icons-group.png",
+};
+
+export default testimonialsData;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/discovery/_content/discoveryTilesContent.js b/docs-website/src/pages/solutions/discovery/_content/discoveryTilesContent.js
new file mode 100644
index 0000000000000..c15042ea1c8db
--- /dev/null
+++ b/docs-website/src/pages/solutions/discovery/_content/discoveryTilesContent.js
@@ -0,0 +1,67 @@
+const tilesContent = [
+ {
+ title: "Enable self-service
data discovery.",
+ theme: "dark",
+ tileItems: [
+ {
+ title: "Your role, your view: discover data that matters to you.",
+ subtitle: "Tailor search experiences for every user type, from analysts to executives. Foster company-wide engagement and turn every employee into a data champion.",
+ imgSrc: "/img/solutions/discovery-tile-1.png",
+ },
+ {
+ title: "Silence the irrelevant. Amplify what counts.",
+ subtitle: "Cut through data clutter to reveal the assets that truly move the needle for your organization.",
+ imgSrc: "/img/solutions/discovery-tile-2.png",
+ },
+ {
+ title: "Maximize relevance. Minimize time-to-discovery.",
+ subtitle: "Tailor search results to reflect your organization's trust signals. Boost user confidence by prioritizing results that meet your standards of reliability and relevance.",
+ imgSrc: "/img/solutions/discovery-tile-3.png",
+ }
+ ]
+ },
+ {
+ title: "Unlock the full potential of
automated data lineage.",
+ theme: "light",
+ tileItems: [
+ {
+ title: "Demystify complex, cross-platform dependency chains.",
+ subtitle: "Find out when things go wrong, with alerts that reach your team where they work—whether it’s Slack, email, or anywhere else.",
+ imgSrc: "/img/solutions/discovery-tile-4.png",
+ },
+ {
+ title: "Know your data’s Impact.",
+ subtitle: "Instantly identify downstream consumers of your data. Enable seamless communication and collaboration across your data ecosystem.",
+ imgSrc: "/img/solutions/discovery-tile-5.png",
+ },
+ {
+ title: "Illuminate the black box of data transformations.",
+ subtitle: "Shine a light on how your key metrics are derived. Automated lineage provides transparency, fostering trust in your data-driven decisions.",
+ imgSrc: "/img/solutions/discovery-tile-6.png",
+ }
+ ]
+ },
+ {
+ title: "Build trust in the relevance
and accuracy of your data.",
+ theme: "dark",
+ tileItems: [
+ {
+ title: "Increase your data confidence.",
+ subtitle: "Tailor search experiences for every user type, from analysts to executives. Foster company-wide engagement and turn every employee into a data champion.",
+ imgSrc: "/img/solutions/discovery-tile-7.png",
+ },
+ {
+ title: "Time travel through your data's evolution.",
+ subtitle: "View the shape and content of your data as it changes over time. Gain confidence in your current data by understanding its past.",
+ imgSrc: "/img/solutions/discovery-tile-8.png",
+ },
+ {
+ title: "Your data quality companion, everywhere you work.",
+ subtitle: "Seamlessly integrate DataHub's insights into your BI tools and communication channels. Keep data trust at the forefront of every decision.",
+ imgSrc: "/img/solutions/discovery-tile-9.png",
+ }
+ ]
+ }
+]
+
+ export default tilesContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/discovery/_content/discoveryTrialsContent.js b/docs-website/src/pages/solutions/discovery/_content/discoveryTrialsContent.js
new file mode 100644
index 0000000000000..b9b695549210d
--- /dev/null
+++ b/docs-website/src/pages/solutions/discovery/_content/discoveryTrialsContent.js
@@ -0,0 +1,24 @@
+const trialsContent = {
+ title: "Make data democratization a reality today.",
+ trialsCardItems: [
+ {
+ title: "Unlock self-service data discovery.",
+ imgSrc: "/img/solutions/trial-icon-lock.svg",
+
+ },
+ {
+ title: "Stop breaking changes before they happen.",
+ imgSrc: "/img/solutions/trial-icon-alert.svg",
+ },
+ {
+ title: "Build trust in the relevance and accuracy of your data.",
+ imgSrc: "/img/solutions/trial-icon-star.svg",
+ },
+ {
+ title: "Unify Discovery, Observability and Governance in one tool.",
+ imgSrc: "/img/solutions/trial-icon-link.svg",
+ }
+ ]
+};
+
+export default trialsContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/discovery/index.js b/docs-website/src/pages/solutions/discovery/index.js
new file mode 100644
index 0000000000000..01efca1e0e7c6
--- /dev/null
+++ b/docs-website/src/pages/solutions/discovery/index.js
@@ -0,0 +1,62 @@
+import React, { useState } from "react";
+import Layout from "@theme/Layout";
+import useDocusaurusContext from "@docusaurus/useDocusaurusContext";
+import Hero from "../_components/Hero";
+import QuickstartContent from "../_components/QuickstartContent";
+import Tiles from "../_components/Tiles";
+import Trials from "../_components/Trials";
+import Testimonials from "../_components/Testimonials";
+import CaseStudy from "../_components/CaseStudy";
+import CloseButton from "@ant-design/icons/CloseCircleFilled";
+import quickstartData from "./_content/discoveryQuickstartContent";
+import heroContent from "./_content/discoveryHeroContent";
+import caseStudyContent from "./_content/discoveryCaseStudyContent";
+import Integrations from "../_components/Integrations";
+import tilesContent from "./_content/discoveryTilesContent";
+import testimonialsData from "./_content/discoveryTestimonialsContent";
+import trialsContent from "./_content/discoveryTrialsContent";
+
+function Home() {
+ const context = useDocusaurusContext();
+ const { siteConfig = {} } = context;
+
+ if (siteConfig.customFields.isSaas) {
+ window.location.replace("/docs");
+ }
+
+ const [isTourModalVisible, setIsTourModalVisible] = useState(false);
+ const onOpenTourModal = () => {
+ setIsTourModalVisible(true);
+ };
+ const onCloseTourModal = () => {
+ setIsTourModalVisible(false);
+ };
+ return !siteConfig.customFields.isSaas ? (
+
+ {isTourModalVisible ? (
+
+ ) : null}
+
+
+
+
+
+ {tilesContent.map((content, index) => (
+
+ ))}
+
+
+
+
+ ) : null;
+}
+
+export default Home;
diff --git a/docs-website/src/pages/solutions/discovery/styles.module.scss b/docs-website/src/pages/solutions/discovery/styles.module.scss
new file mode 100644
index 0000000000000..af0cd90c99134
--- /dev/null
+++ b/docs-website/src/pages/solutions/discovery/styles.module.scss
@@ -0,0 +1,21 @@
+.container {
+ display: flex;
+ flex-direction: column;
+ background: #fafafa;
+ width: 80vw;
+ min-width: 900px;
+ max-width: 1200px;
+ margin: 0 auto;
+ }
+
+ // Responsiveness
+ @media (max-width: 800px) {
+
+ .container {
+ flex-direction: column;
+ width: 90vw;
+ min-width: 0;
+ margin: 4rem auto;
+ }
+ }
+
diff --git a/docs-website/src/pages/solutions/governance/_content/governanceCaseStudyContent.js b/docs-website/src/pages/solutions/governance/_content/governanceCaseStudyContent.js
new file mode 100644
index 0000000000000..4e12756a980c9
--- /dev/null
+++ b/docs-website/src/pages/solutions/governance/_content/governanceCaseStudyContent.js
@@ -0,0 +1,26 @@
+const caseStudyContent = {
+ title: "See how industry leaders are using DataHub for Governance today.",
+ backgroundColor: "#FFFFFF",
+ items: [
+ {
+ imgSrc: "/img/solutions/optum.jpg",
+ title: "How Optum uses DataHub to govern and manage access.",
+ link: "/adoption-stories/#optum",
+ alt: "optum"
+ },
+ {
+ imgSrc: "/img/solutions/checkout.jpg",
+ title: "How Checkout.com manages compliance with sensitive data.",
+ link: "/adoption-stories/#checkout-com",
+ alt: "Checkout.com",
+ },
+ {
+ imgSrc: "/img/solutions/wolt.png",
+ title: "How Wolt complies with legal requirements using DataHub.",
+ link: "/adoption-stories/#wolt",
+ alt: "Wolt",
+ }
+ ]
+ };
+
+export default caseStudyContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/governance/_content/governanceHeroContent.js b/docs-website/src/pages/solutions/governance/_content/governanceHeroContent.js
new file mode 100644
index 0000000000000..779a9cb7062ff
--- /dev/null
+++ b/docs-website/src/pages/solutions/governance/_content/governanceHeroContent.js
@@ -0,0 +1,8 @@
+const heroContent = {
+ topQuote: "Data governance in datahub",
+ title: "Measure and minimize compliance risk, effortlessly.",
+ description: "Ensure every data asset is accounted for and responsibility governed by defining and enforcing documentation standards Automate your governance program to automatically classify assets as they evolve over time. Minimize redundant, manual work with GenAI documentation, AI-driven classification, smart propagation, and more.",
+ imgSrc: "/img/solutions/hero-governance.png",
+ };
+
+ export default heroContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/governance/_content/governancePersonaContent.js b/docs-website/src/pages/solutions/governance/_content/governancePersonaContent.js
new file mode 100644
index 0000000000000..9d456bee5d30a
--- /dev/null
+++ b/docs-website/src/pages/solutions/governance/_content/governancePersonaContent.js
@@ -0,0 +1,31 @@
+const personaContent = {
+ title: "The only platform that serves everyone involved with Governance.",
+ personas: [
+ {
+ imgSrc: "/img/solutions/persona_compliance_officers.png",
+ feature1: "Data Classification Glossary",
+ feature2: "Custom Form builder",
+ feature3: "Automated Form Assigning",
+ feature4: "Analytics Reporting",
+ alt: "governance_officers"
+ },
+ {
+ imgSrc: "/img/solutions/persona_developers.png",
+ feature1: "Highly Extensible Open-Core",
+ feature2: "Rich APIs and SDKs",
+ feature3: "Automation Framework",
+ feature4: "Shift Left Architecture",
+ alt: "developers",
+ },
+ {
+ imgSrc: "/img/solutions/persona_owners_and_smes.png",
+ feature1: "Seamless UI Workflows",
+ feature2: "Respond to Requests",
+ feature3: "Notifications Where They Work",
+ feature4: "Proposal Workflows",
+ alt: "data_owners",
+ }
+ ]
+ };
+
+export default personaContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/governance/_content/governanceQuickstartContent.js b/docs-website/src/pages/solutions/governance/_content/governanceQuickstartContent.js
new file mode 100644
index 0000000000000..c59e3c215d707
--- /dev/null
+++ b/docs-website/src/pages/solutions/governance/_content/governanceQuickstartContent.js
@@ -0,0 +1,12 @@
+const quickstartData = [
+ {
+ title: "Running into challenges while tracking sensitive data as it flows through your data landscape?",
+ image: "/img/solutions/icon-cloud.png",
+ },
+ {
+ title: "Is enforcing and measuring compliance initiatives impossibly manual, tedious and slow?",
+ image: "/img/solutions/icon-calendar.png",
+ },
+];
+
+export default quickstartData;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/governance/_content/governanceTestimonialsContent.js b/docs-website/src/pages/solutions/governance/_content/governanceTestimonialsContent.js
new file mode 100644
index 0000000000000..a450c20926bd5
--- /dev/null
+++ b/docs-website/src/pages/solutions/governance/_content/governanceTestimonialsContent.js
@@ -0,0 +1,10 @@
+const testimonialsData = {
+ title: "Enter end-to-end Data Governance.",
+ feature1: "Data Discovery",
+ feature2: "Observability",
+ feature1Link: "/solutions/discovery",
+ feature2Link: "/solutions/observability",
+ imgSrc: "/img/solutions/governance-icons-group.png",
+};
+
+export default testimonialsData;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/governance/_content/governanceTilesContent.js b/docs-website/src/pages/solutions/governance/_content/governanceTilesContent.js
new file mode 100644
index 0000000000000..d8786f9d0c8ab
--- /dev/null
+++ b/docs-website/src/pages/solutions/governance/_content/governanceTilesContent.js
@@ -0,0 +1,51 @@
+const tilesContent = [
+ {
+ title: "Save countless hours of manual
labeling with automated classification.",
+ theme: "dark",
+ tileItems: [
+ {
+ title: "Data producers classify, DataHub centralizes.",
+ subtitle: "Tag sensitive data where it's born. See classifications automatically reflected in DataHub, ensuring consistent compliance across your data ecosystem.",
+ imgSrc: "/img/solutions/governance-tile-1.png",
+ },
+ {
+ title: "Classification that flows: Lineage-driven compliance.",
+ subtitle: "Automatically classify your data as it moves and transforms. Ensure consistent compliance classification across your entire data ecosystem.",
+ imgSrc: "/img/solutions/governance-tile-2.png",
+ },
+ {
+ title: "Keep your tags in harmony, automatically.",
+ subtitle: "Create a seamless flow of metadata between DataHub and source systems, ensuring tags are always up-to-date, everywhere.'s trust signals. Boost user confidence by prioritizing results that meet your standards of reliability and relevance.",
+ imgSrc: "/img/solutions/governance-tile-3.png",
+ },
+ {
+ title: "Metadata gaps? AI has you covered.",
+ subtitle: "Combine human insight with Gen AI prowess to enhance accuracy and coverage in your metadata management.",
+ imgSrc: "/img/solutions/governance-tile-4.png",
+ }
+ ]
+ },
+ {
+ title: "Streamline your compliance program:
Standardize, delegate, and measure with ease.",
+ theme: "light",
+ tileItems: [
+ {
+ title: "Customize your compliance blueprint.",
+ subtitle: "Define compliance requirements to match your organization’s unique needs. Establish clear annotation guidelines to ensure consistency and accuracy across your data landscape.",
+ imgSrc: "/img/solutions/governance-tile-5.png",
+ },
+ {
+ title: "Equip data experts with crystal-clear guidelines.",
+ subtitle: "Route annotation tasks to those who know the data best. Take the guesswork out of compliance, freeing up time for innovation.",
+ imgSrc: "/img/solutions/governance-tile-6.png",
+ },
+ {
+ title: "Never lose sight of compliance goals.",
+ subtitle: "Maintain a clear view of progress and keep data experts accountable with targeted alerts. Transform compliance tracking from a chore to a breeze",
+ imgSrc: "/img/solutions/governance-tile-7.png",
+ }
+ ]
+ }
+]
+
+ export default tilesContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/governance/_content/governanceTrialsContent.js b/docs-website/src/pages/solutions/governance/_content/governanceTrialsContent.js
new file mode 100644
index 0000000000000..a1eed676e8909
--- /dev/null
+++ b/docs-website/src/pages/solutions/governance/_content/governanceTrialsContent.js
@@ -0,0 +1,27 @@
+const trialsContent = {
+ title: "Start measuring and minimizing your compliance risk today.",
+ trialsCardItems: [
+ {
+ title: "Get everyone speaking the same data language.",
+ imgSrc: "/img/solutions/trial-icon-language.png",
+ className: "soc",
+ },
+ {
+ title: "Empower data owners to own assets, with minimal effort.",
+ imgSrc: "/img/solutions/trial-icon-owner.png",
+ className: "cost",
+ },
+ {
+ title: "Automated classification moves at the speed of your data.",
+ imgSrc: "/img/solutions/trial-icon-lightening.png",
+ className: "enterprise",
+ },
+ {
+ title: "Monitor and enforce governance standards.",
+ imgSrc: "/img/solutions/trial-icon-standard.png",
+ className: "link",
+ },
+ ],
+};
+
+export default trialsContent;
diff --git a/docs-website/src/pages/solutions/governance/index.js b/docs-website/src/pages/solutions/governance/index.js
new file mode 100644
index 0000000000000..82a45cef83438
--- /dev/null
+++ b/docs-website/src/pages/solutions/governance/index.js
@@ -0,0 +1,66 @@
+import React, { useState } from "react";
+import Layout from "@theme/Layout";
+import useDocusaurusContext from "@docusaurus/useDocusaurusContext";
+import useBaseUrl from "@docusaurus/useBaseUrl";
+import Hero from "../_components/Hero";
+import QuickstartContent from "../_components/QuickstartContent";
+import Tiles from "../_components/Tiles";
+import Testimonials from "../_components/Testimonials";
+import CaseStudy from "../_components/CaseStudy";
+import Persona from "../_components/Persona";
+import styles from "./styles.module.scss";
+import CloseButton from "@ant-design/icons/CloseCircleFilled";
+import Link from "@docusaurus/Link";
+import quickstartData from "./_content/governanceQuickstartContent";
+import heroContent from "./_content/governanceHeroContent";
+import caseStudyContent from "./_content/governanceCaseStudyContent";
+import personaContent from "./_content/governancePersonaContent";
+import tilesContent from "./_content/governanceTilesContent";
+import testimonialsData from "./_content/governanceTestimonialsContent";
+import trialsContent from "./_content/governanceTrialsContent";
+import Trials from "../_components/Trials";
+
+function Home() {
+ const context = useDocusaurusContext();
+ const { siteConfig = {} } = context;
+
+ if (siteConfig.customFields.isSaas) {
+ window.location.replace("/docs");
+ }
+
+ const [isTourModalVisible, setIsTourModalVisible] = useState(false);
+ const onOpenTourModal = () => {
+ setIsTourModalVisible(true);
+ };
+ const onCloseTourModal = () => {
+ setIsTourModalVisible(false);
+ };
+ return !siteConfig.customFields.isSaas ? (
+
+ {isTourModalVisible ? (
+
+ ) : null}
+
+
+
+
+
+ {tilesContent.map((content, index) => (
+
+ ))}
+
+
+
+
+ ) : null;
+}
+
+export default Home;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/governance/styles.module.scss b/docs-website/src/pages/solutions/governance/styles.module.scss
new file mode 100644
index 0000000000000..a26d953e9e5ad
--- /dev/null
+++ b/docs-website/src/pages/solutions/governance/styles.module.scss
@@ -0,0 +1,21 @@
+.container {
+ display: flex;
+ flex-direction: column;
+ background: #fafafa;
+ width: 80vw;
+ min-width: 900px;
+ max-width: 1200px;
+ margin: 0 auto;
+}
+
+// Responsiveness
+@media (max-width: 800px) {
+
+ .container {
+ flex-direction: column;
+ width: 90vw;
+ min-width: 0;
+ margin: 4rem auto;
+ }
+}
+
diff --git a/docs-website/src/pages/solutions/observability/_content/observeCaseStudyContent.js b/docs-website/src/pages/solutions/observability/_content/observeCaseStudyContent.js
new file mode 100644
index 0000000000000..ca263fe0d7ff9
--- /dev/null
+++ b/docs-website/src/pages/solutions/observability/_content/observeCaseStudyContent.js
@@ -0,0 +1,26 @@
+const caseStudyContent = {
+ title: "See how industry leaders are using DataHub for Observability today.",
+ backgroundColor: "#F3F3F6",
+ items: [
+ {
+ imgSrc: "/img/solutions/logo-notion.png",
+ title: "How Notion Uses DataHub Cloud to Ensure Data Reliability.",
+ link: "/adoption-stories/#notion",
+ alt: "notion"
+ },
+ {
+ imgSrc: "/img/solutions/logo-myob.png",
+ title: "How MYOB Improved Data Reliability for dbt and Snowflake.",
+ link: "/adoption-stories/#myob",
+ alt: "MYOB",
+ },
+ {
+ imgSrc: "/img/solutions/miro.png",
+ title: "How DataHub Helped Miro improve Data Product Reliability.",
+ link: "/adoption-stories/#miro",
+ alt: "Miro",
+ }
+ ]
+ };
+
+export default caseStudyContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/observability/_content/observeHeroContent.js b/docs-website/src/pages/solutions/observability/_content/observeHeroContent.js
new file mode 100644
index 0000000000000..895b2ca13de8b
--- /dev/null
+++ b/docs-website/src/pages/solutions/observability/_content/observeHeroContent.js
@@ -0,0 +1,8 @@
+const heroContent = {
+ topQuote: "OBSERVABILITY in DATAHUB CLOUD",
+ title: "Trust your most important data.",
+ description: "Detect, resolve, and prevent data quality issues before they impact your business. Seamlessly integrate Data Quality, Data Discovery and Data Governance in one place.",
+ imgSrc: "/img/solutions/hero-observe.png",
+ };
+
+ export default heroContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/observability/_content/observeQuickstartContent.js b/docs-website/src/pages/solutions/observability/_content/observeQuickstartContent.js
new file mode 100644
index 0000000000000..75c4a78a7390e
--- /dev/null
+++ b/docs-website/src/pages/solutions/observability/_content/observeQuickstartContent.js
@@ -0,0 +1,16 @@
+const quickstartData = [
+ {
+ title: "Constantly getting messages about broken data?",
+ image: "/img/solutions/icon-cloud.png",
+ },
+ {
+ title: "Tired of spending days cleaning up bad data instead of making forward progress?",
+ image: "/img/solutions/icon-calendar.png",
+ },
+ {
+ title: "Having trouble managing different tools for data quality, discovery, and compliance?",
+ image: "/img/solutions/icon-wrench.png",
+ }
+];
+
+export default quickstartData;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/observability/_content/observeResourceContent.js b/docs-website/src/pages/solutions/observability/_content/observeResourceContent.js
new file mode 100644
index 0000000000000..512a1a80ff4b6
--- /dev/null
+++ b/docs-website/src/pages/solutions/observability/_content/observeResourceContent.js
@@ -0,0 +1,61 @@
+const resourceData = [
+ {
+ title: "Introducing Acryl Observe",
+ tag: "Blog",
+ backgroundImage:
+ "https://cdn.sanity.io/images/cqo9wkgf/production/710b727fd06db2bbc892e4e9a3f403513fdd75fa-2000x1413.png?w=3840&q=75&fit=clip&auto=format",
+ link: "https://www.acryldata.io/blog/data-quality-should-be-part-of-the-data-catalog-introducing-acryl-observe",
+ readTime: "4"
+ },
+ {
+ title: "When Data Quality Fires Break Out",
+ tag: "Blog",
+ backgroundImage:
+ "https://cdn.sanity.io/images/cqo9wkgf/production/039b82490a6322fde04326d892494923ea45a957-599x624.png?w=1200&q=75&fit=clip&auto=format",
+ link: "https://www.acryldata.io/blog/always-be-the-first-to-know-with-acryl-observe",
+ readTime: "5"
+
+ },
+ {
+ title: "Five Signs You Need a Unified Data Observability Solution",
+ tag: "Checklist",
+ backgroundImage:
+ "https://cdn.sanity.io/images/cqo9wkgf/production/3dab61d2fe39b89d2c32f8399e75a699de3da76c-1920x936.webp?w=3840&q=75&fit=clip&auto=format",
+ link: "https://www.acryldata.io/blog/five-signs-you-need-a-data-observability-solution",
+ readTime: "4",
+ },
+ {
+ title: "Preventing Data Delays with Acryl Observe",
+ tag: "Blog",
+ backgroundImage:
+ "https://cdn.sanity.io/images/cqo9wkgf/production/375b6e03a6a113e43eaa13ffda8072c07ca69c87-611x693.png?w=1920&q=75&fit=clip&auto=format",
+ link: "https://www.acryldata.io/blog/preventing-data-freshness-problems-with-acryl-observe",
+ readTime: "6",
+ },
+ {
+ title: "Detecting Unexpected Table Volume Changes with Acryl Observe",
+ tag: "Blog",
+ backgroundImage:
+ "https://cdn.sanity.io/images/cqo9wkgf/production/e4f8d8ab0736631f968b1fdd8411789d2fb2af7f-2000x1730.png?w=3840&q=75&fit=clip&auto=format",
+ link: "https://www.acryldata.io/blog/product-update-detecting-unexpected-table-volume-changes-with-acryl-observe",
+ readTime: "4",
+ },
+ {
+ title: "Detecting Unexpected Column Changes with Acryl Observe",
+ tag: "Blog",
+ backgroundImage:
+ "https://cdn.sanity.io/images/cqo9wkgf/production/6620e79f4221c7d750d4cfba4371dd52fcd13953-2000x1933.png?w=3840&q=75&fit=clip&auto=format",
+ link: "https://www.acryldata.io/blog/detecting-deep-data-quality-issues-with-column-level-assertions",
+ readTime: "5",
+ },
+ {
+ title: "Implementing Data Contracts with DataHub",
+ tag: "Blog",
+ backgroundImage:
+ "https://cdn.sanity.io/images/cqo9wkgf/production/af26da1550032891f46053fdf673a228ead0307e-2000x1121.png?w=3840&q=75&fit=clip&auto=format",
+ link: "https://www.acryldata.io/blog/data-contracts-in-datahub-combining-verifiability-with-holistic-data-management",
+ readTime: "4",
+ },
+];
+
+export default resourceData;
diff --git a/docs-website/src/pages/solutions/observability/_content/observeTestimonialsContent.js b/docs-website/src/pages/solutions/observability/_content/observeTestimonialsContent.js
new file mode 100644
index 0000000000000..cb87131502275
--- /dev/null
+++ b/docs-website/src/pages/solutions/observability/_content/observeTestimonialsContent.js
@@ -0,0 +1,10 @@
+const testimonialsData = {
+ title: "Enter end-to-end Data Observability.",
+ feature1: "Data Discovery",
+ feature2: "Governance",
+ feature1Link: "/solutions/discovery",
+ feature2Link: "/solutions/governance",
+ imgSrc: "/img/solutions/observe-icons-group.png",
+};
+
+export default testimonialsData;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/observability/_content/observeTilesContent.js b/docs-website/src/pages/solutions/observability/_content/observeTilesContent.js
new file mode 100644
index 0000000000000..e837c7122c905
--- /dev/null
+++ b/docs-website/src/pages/solutions/observability/_content/observeTilesContent.js
@@ -0,0 +1,77 @@
+const tilesContent = [
+ {
+ title: "Detect Data Quality Issues.
Effortlessly.",
+ theme: "dark",
+ tileItems: [
+ {
+ title: "Set it, then forget it.",
+ subtitle: "Create data quality checks effortlessly using the UI or API. Detect freshness, volume, and column value anomalies on tables as soon as they occur. ",
+ imgSrc: "/img/solutions/observe-tile-1.png",
+ },
+ {
+ title: "Cover your blind spots with AI-assisted anomaly detection.",
+ subtitle: "Turn on AI-powered data quality monitors in one click. Catch critical anomalies that would otherwise go undetected.",
+ imgSrc: "/img/solutions/observe-tile-2.png",
+ },
+ {
+ title: "Democratize data quality.",
+ subtitle: "Empower both non-technical and technical teams to create and monitor data quality checks—no code required.",
+ imgSrc: "/img/solutions/observe-tile-3.png",
+ },
+ {
+ title: "Built for your stack.",
+ subtitle: "Instantly start monitoring data on Snowflake, BigQuery, Redshift, Databricks, & more with out-of-the-box integrations.",
+ imgSrc: "/img/solutions/observe-tile-4.png",
+ }
+ ]
+ },
+ {
+ title: "Fix Data Fires.
Fast.",
+ theme: "light",
+ tileItems: [
+ {
+ title: "Get notified where you work.",
+ subtitle: "Find out when things go wrong, with alerts that reach your team where they work—whether it’s Slack, email, or anywhere else.",
+ imgSrc: "/img/solutions/observe-tile-5.png",
+ },
+ {
+ title: "Resolve incidents fast.",
+ subtitle: "Diagnose, debug and fix data issues with rich technical context including data statistics, downstream lineage, and detailed health history.",
+ imgSrc: "/img/solutions/observe-tile-6.png",
+ },
+ {
+ title: "Streamline data incident management.",
+ subtitle: "Centralize your process for tracking progress, sharing context, and keeping everyone who’s impacted in the loop.",
+ imgSrc: "/img/solutions/observe-tile-7.png",
+ }
+ ]
+ },
+ {
+ title: "Visualize the health of all your data.",
+ theme: "dark",
+ tileItems: [
+ {
+ title: "At-a-glance summary of your data quality.",
+ subtitle: "Understand key stats like percent of healthy tables, and table coverage. Easily drill down to see these metrics by domain, owners and more.",
+ imgSrc: "/img/solutions/observe-tile-8.png",
+ },
+ {
+ title: "Slice and dice data quality, your way.",
+ subtitle: "Get detailed breakdowns of assertion failures over the last N days by domains, tags, owners and more.",
+ imgSrc: "/img/solutions/observe-tile-9.png",
+ },
+ {
+ title: "Identify check failures over time.",
+ subtitle: "Easily spot spikes and recurring data quality failures with a customizable timeline view.",
+ imgSrc: "/img/solutions/observe-tile-10.png",
+ },
+ {
+ title: "Triage incidents and track progress.",
+ subtitle: "Understand the current state and historical patterns of data incidents, at a single glance. Take action fast.",
+ imgSrc: "/img/solutions/observe-tile-11.png",
+ }
+ ]
+ }
+ ];
+
+export default tilesContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/observability/_content/observeTrialsContent.js b/docs-website/src/pages/solutions/observability/_content/observeTrialsContent.js
new file mode 100644
index 0000000000000..14eacd50b0faf
--- /dev/null
+++ b/docs-website/src/pages/solutions/observability/_content/observeTrialsContent.js
@@ -0,0 +1,24 @@
+const trialsContent = {
+ title: "Start building trust with your stakeholders, today.",
+ trialsCardItems: [
+ {
+ title: "Protect your mission-critical tables, reports, services, and more.",
+ imgSrc: "/img/solutions/trial-icon-lock.svg",
+
+ },
+ {
+ title: "Know first, not last. Get notified where you work when things go wrong.",
+ imgSrc: "/img/solutions/trial-icon-alert.svg",
+ },
+ {
+ title: "Let AI detect the blindspots in your data quality checks.",
+ imgSrc: "/img/solutions/trial-icon-star.svg",
+ },
+ {
+ title: "Share documentation, compliance and health for any data asset with one link.",
+ imgSrc: "/img/solutions/trial-icon-link.svg",
+ }
+ ]
+ };
+
+ export default trialsContent;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/observability/_content/observeUnifiedTabsContent.js b/docs-website/src/pages/solutions/observability/_content/observeUnifiedTabsContent.js
new file mode 100644
index 0000000000000..f02e60f06d1d6
--- /dev/null
+++ b/docs-website/src/pages/solutions/observability/_content/observeUnifiedTabsContent.js
@@ -0,0 +1,22 @@
+const unifiedTabsData = [
+ {
+ tabName: 'Detect',
+ title: 'Be the first to know when something goes wrong.',
+ description: 'Identify data quality issues and outages before they disrupt operations, damage trust and impact end users. Notify the right people where they work when things go wrong.',
+ image: '/img/solutions/unified-tab-detect.png',
+ },
+ {
+ tabName: 'Resolve',
+ title: 'Spend minutes, not days, resolving issues.',
+ description: 'Accelerate time-to-resolution with detailed lineage, documentation, and ownership information. Track incident status in one place and keep all stakeholders in the loop.',
+ image: '/img/solutions/unified-tab-resolve.png',
+ },
+ {
+ tabName: 'Unify',
+ title: 'One platform to rule them all.',
+ description: 'Empower your teams to discover healthy data on their own. Combine quality with lineage, documentation and ownership information to triage and resolve data quality issues quickly. Make data your competitive edge by unifying Data Quality, Discovery and Governance under one roof.',
+ image: '/img/solutions/unified-tab-unify.png',
+ },
+];
+
+export default unifiedTabsData;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/observability/index.js b/docs-website/src/pages/solutions/observability/index.js
new file mode 100644
index 0000000000000..359bea914e5a1
--- /dev/null
+++ b/docs-website/src/pages/solutions/observability/index.js
@@ -0,0 +1,121 @@
+import React, { useState } from "react";
+import Layout from "@theme/Layout";
+import useDocusaurusContext from "@docusaurus/useDocusaurusContext";
+import useBaseUrl from "@docusaurus/useBaseUrl";
+import Hero from "../_components/Hero";
+import Tiles from "../_components/Tiles";
+import Testimonials from "../_components/Testimonials";
+import CaseStudy from "../_components/CaseStudy";
+import QuickstartContent from "../_components/QuickstartContent";
+import styles from "./styles.module.scss";
+import CloseButton from "@ant-design/icons/CloseCircleFilled";
+import clsx from "clsx";
+import quickstartData from "./_content/observeQuickstartContent";
+import heroContent from "./_content/observeHeroContent";
+import caseStudyContent from "./_content/observeCaseStudyContent";
+import IntegrationsStatic from "../_components/IntegrationsStatic";
+import tilesContent from "./_content/observeTilesContent";
+import testimonialsData from "./_content/observeTestimonialsContent";
+import resourceData from "./_content/observeResourceContent";
+import UnifiedTabs from "../_components/UnifiedTabs";
+import unifiedTabsData from "./_content/observeUnifiedTabsContent";
+import trialsContent from "./_content/observeTrialsContent";
+import Trials from "../_components/Trials";
+import SlidingTabs from "../_components/SlidingTabs";
+
+function Home() {
+ const context = useDocusaurusContext();
+ const { siteConfig = {} } = context;
+
+ if (siteConfig.customFields.isSaas) {
+ window.location.replace("/docs");
+ }
+
+ const [isTourModalVisible, setIsTourModalVisible] = useState(false);
+ const onOpenTourModal = () => {
+ setIsTourModalVisible(true);
+ };
+ const onCloseTourModal = () => {
+ setIsTourModalVisible(false);
+ };
+ return !siteConfig.customFields.isSaas ? (
+
+ {isTourModalVisible ? (
+
+ ) : null}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Depop
+
Olivier Tatard Engineering Manager
+
+
+
+ "We chose Acryl because we see the value of having both a data catalog and observability capabilities in one tool. Having data owners, maintainers, and consumers in one place streamlines incident management and allows for faster time to resolution."
+
+
+
+
+
+ {tilesContent.map((content, index) => (
+
+ ))}
+
+
+
+
+
+
+
+ Resources
+
+
+
+ {resourceData.map((resource) => (
+
+ ))}
+
+
+
+
+
+ ) : null;
+}
+
+export default Home;
\ No newline at end of file
diff --git a/docs-website/src/pages/solutions/observability/styles.module.scss b/docs-website/src/pages/solutions/observability/styles.module.scss
new file mode 100644
index 0000000000000..2125e2237c9dd
--- /dev/null
+++ b/docs-website/src/pages/solutions/observability/styles.module.scss
@@ -0,0 +1,342 @@
+.container {
+ display: flex;
+ flex-direction: column;
+ width: 80vw;
+ min-width: 900px;
+ max-width: 1200px;
+ margin: 0 auto;
+}
+
+.testimonials {
+ background: #f1f1f1;
+
+ :global {
+ .testimonials__content {
+ width: 80vw;
+ max-width: 1200px;
+ margin: 0 auto;
+ padding: 40px 0;
+
+ .testimonials__card {
+ margin: 2rem 4rem;
+ display: flex;
+
+ .testimonials__meta {
+ display: flex;
+ }
+
+ .testimonials__logo {
+ max-width: 120px;
+ margin: auto 1rem;
+ }
+ .testimonials__company {
+ width: 300px;
+ margin: auto 2rem;
+
+ .testimonials__company_title {
+ color: var(--primitives-text-text-heading, #373A47);
+ font-family: Manrope;
+ font-size: 36px;
+ font-style: normal;
+ font-weight: 700;
+ line-height: normal;
+ letter-spacing: -1.92px;
+ margin-bottom: 8px;
+
+ }
+
+ .testimonials__author_title {
+ color: var(--primitives-text-text-heading, #373A47);
+ font-family: Manrope;
+ font-size: 20px;
+ font-style: normal;
+ font-weight: 300;
+ line-height: normal;
+ letter-spacing: -1.28px;
+ }
+ }
+
+ .testimonials__text {
+ width: 100%;
+ color: var(--primitives-text-text-heading, #373A47);
+ font-family: Manrope;
+ font-size: 20px;
+ font-style: normal;
+ font-weight: 400;
+ line-height: 140%;
+ margin: auto 2rem;
+ }
+ }
+ }
+
+ @media only screen and (max-width: 800px) {
+ .testimonials__content {
+ width: 100vw;
+ padding: 20px 0;
+
+ .testimonials__card {
+ flex-direction: column;
+ margin: 2rem 3rem;
+
+ .testimonials__meta {
+ display: flex;
+ }
+
+ .testimonials__logo {
+ max-width: 40px;
+ margin: auto 1rem;
+ }
+
+ .testimonials__company {
+ display: flex;
+ margin: auto 1rem;
+ flex-direction: column;
+
+ .testimonials__company_title {
+ font-size: 1.5rem;
+ margin-bottom: 0;
+ }
+ .testimonials__author_title {
+ padding-left: 0;
+ }
+ }
+
+ .testimonials__text {
+ width: 100%;
+ max-width: 100%;
+ margin: 2rem auto;
+ }
+ }
+ }
+
+ }
+ }
+}
+
+
+// Responsiveness
+@media (max-width: 800px) {
+
+ .container {
+ flex-direction: column;
+ width: 90vw;
+ min-width: 0;
+ margin: 4rem auto;
+ }
+}
+
+
+.resource_container {
+ display: flex;
+ flex-direction: column;
+ background: #F4F4F5;
+ font-family: "Manrope";
+}
+.resource {
+ display: flex;
+ flex-direction: column;
+ width: 100vw;
+ margin: 5rem 0;
+
+ .resource_heading {
+ line-height: 4rem;
+ font-family: Manrope;
+ font-family: Manrope;
+ font-size: 3.25rem;
+ font-style: normal;
+ font-weight: 500;
+ padding-left: 5rem;
+ line-height: normal;
+ mix-blend-mode: luminosity;
+ width: 100%;
+ margin-bottom: 1rem;
+ }
+
+ .card_row::-webkit-scrollbar {
+ display: none;
+ }
+ .card_row {
+ margin-top: 8px;
+ overflow-x: scroll;
+ padding-right: 5rem;
+ padding-left: 5rem;
+ scrollbar-width: none;
+
+ .card_row_wrapper {
+ display: flex;
+ flex-direction: row;
+ align-items: center;
+ flex: 1;
+ }
+ .cardLink {
+ color: #000;
+ cursor: pointer;
+ display: block;
+ height: 100%;
+
+ &:hover {
+ text-decoration: none;
+ }
+ }
+
+ .card {
+ display: flex;
+ flex-direction: column;
+ justify-content: space-between;
+ flex-direction: column;
+ position: relative;
+ width: 300px;
+ height: 400px;
+ flex-shrink: 0;
+ border-radius: 12px;
+ background: white;
+ margin-right: 20px;
+ transition: box-shadow 0.2s ease-in-out;
+ box-shadow: 0px 2px 16px 0px rgba(55, 61, 68, 0.08);
+ overflow-y: visible;
+
+
+ .read_time {
+ position: absolute;
+ top: 16px;
+ right: 16px;
+ color: white;
+ font-size: 1rem;
+ z-index: 10;
+ text-shadow: 0px 1px 4px black;
+ }
+
+ .card_image {
+ position: relative;
+ height: 200px;
+ background-size: cover;
+ background-position: center;
+ border-radius: 12px 12px 0 0;
+ display: flex;
+ justify-content: center;
+ align-items: center;
+
+ img {
+ max-width: 100%;
+ max-height: 100%;
+ object-fit: cover;
+ }
+ }
+
+ .card_content {
+ padding: 16px;
+
+ .card_heading {
+ font-size: 1.4rem;
+ line-height: 1.75rem;
+ font-weight: 600;
+ margin-bottom: 8px;
+ }
+
+ .read_more {
+ color: #1890ff;
+ font-weight: bold;
+ text-decoration: none;
+ font-size: 1rem;
+ position: absolute;
+ bottom: 8px;
+ left: 8px;
+ width: 100%;
+ transition: all .2s;
+
+ &:hover {
+ text-decoration: none;
+ opacity: .9;
+ }
+ }
+ }
+
+ .card_tag {
+ position: absolute;
+ top: 16px;
+ left: 16px;
+ background-color: #F9F9F9;
+ color: black;
+ padding: 4px 24px;
+ border-radius: 20px;
+ font-size: 1.2rem;
+ font-weight: bold;
+ z-index: 10;
+ }
+ }
+
+ }
+
+ .bottom_line {
+ cursor: pointer;
+ text-decoration: none;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ color: #12b0fb;
+ font-size: 1.1rem;
+ font-weight: 500;
+ margin-top: 50px;
+ margin-bottom: 10px;
+
+ span {
+ line-height: 10px;
+ font-size: 1.5rem;
+ margin-left: 10px;
+ }
+ }
+}
+
+@media (max-width: 800px) {
+ .resource {
+ .resource_heading {
+ padding-left: 2rem;
+ font-size: 2rem;
+ }
+
+ .card_row {
+ padding-left: 0;
+
+ .card_row_wrapper {
+ padding: 0 2rem;
+ }
+ }
+ }
+ .case_study {
+ .case_study_heading {
+ text-align: center;
+ font-family: "Manrope";
+
+ div {
+ width: 80%;
+ margin: auto;
+ font-size: 2rem;
+ line-height: normal;
+ font-weight: 400;
+ }
+ p {
+ width: 80%;
+ margin: auto;
+ font-size: 1.1rem;
+ line-height: 1.5rem;
+ margin-top: 12px;
+ }
+ }
+
+ .bottom_line {
+ cursor: pointer;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ color: #12b0fb;
+ font-size: 1.1rem;
+ font-weight: 500;
+ margin-top: 50px;
+ margin-bottom: 10px;
+
+ span {
+ font-size: 1.5rem;
+ }
+ }
+ }
+}
diff --git a/docs-website/static/img/solutions/case-study-card-bg.png b/docs-website/static/img/solutions/case-study-card-bg.png
new file mode 100644
index 0000000000000..7377c7282842e
Binary files /dev/null and b/docs-website/static/img/solutions/case-study-card-bg.png differ
diff --git a/docs-website/static/img/solutions/checkout.jpg b/docs-website/static/img/solutions/checkout.jpg
new file mode 100644
index 0000000000000..4a766475cc599
Binary files /dev/null and b/docs-website/static/img/solutions/checkout.jpg differ
diff --git a/docs-website/static/img/solutions/communities.png b/docs-website/static/img/solutions/communities.png
new file mode 100644
index 0000000000000..f15a7c1636191
Binary files /dev/null and b/docs-website/static/img/solutions/communities.png differ
diff --git a/docs-website/static/img/solutions/discovery-icons-group.png b/docs-website/static/img/solutions/discovery-icons-group.png
new file mode 100644
index 0000000000000..a86094a76409c
Binary files /dev/null and b/docs-website/static/img/solutions/discovery-icons-group.png differ
diff --git a/docs-website/static/img/solutions/discovery-tile-1.png b/docs-website/static/img/solutions/discovery-tile-1.png
new file mode 100644
index 0000000000000..185cd8dd48353
Binary files /dev/null and b/docs-website/static/img/solutions/discovery-tile-1.png differ
diff --git a/docs-website/static/img/solutions/discovery-tile-2.png b/docs-website/static/img/solutions/discovery-tile-2.png
new file mode 100644
index 0000000000000..185838f3086ff
Binary files /dev/null and b/docs-website/static/img/solutions/discovery-tile-2.png differ
diff --git a/docs-website/static/img/solutions/discovery-tile-3.png b/docs-website/static/img/solutions/discovery-tile-3.png
new file mode 100644
index 0000000000000..63c44271a2eaf
Binary files /dev/null and b/docs-website/static/img/solutions/discovery-tile-3.png differ
diff --git a/docs-website/static/img/solutions/discovery-tile-4.png b/docs-website/static/img/solutions/discovery-tile-4.png
new file mode 100644
index 0000000000000..f9c9af7096492
Binary files /dev/null and b/docs-website/static/img/solutions/discovery-tile-4.png differ
diff --git a/docs-website/static/img/solutions/discovery-tile-5.png b/docs-website/static/img/solutions/discovery-tile-5.png
new file mode 100644
index 0000000000000..04a78d27ae898
Binary files /dev/null and b/docs-website/static/img/solutions/discovery-tile-5.png differ
diff --git a/docs-website/static/img/solutions/discovery-tile-6.png b/docs-website/static/img/solutions/discovery-tile-6.png
new file mode 100644
index 0000000000000..2a347ebc0de53
Binary files /dev/null and b/docs-website/static/img/solutions/discovery-tile-6.png differ
diff --git a/docs-website/static/img/solutions/discovery-tile-7.png b/docs-website/static/img/solutions/discovery-tile-7.png
new file mode 100644
index 0000000000000..a1af7d8f4dc38
Binary files /dev/null and b/docs-website/static/img/solutions/discovery-tile-7.png differ
diff --git a/docs-website/static/img/solutions/discovery-tile-8.png b/docs-website/static/img/solutions/discovery-tile-8.png
new file mode 100644
index 0000000000000..c10fa4e690e47
Binary files /dev/null and b/docs-website/static/img/solutions/discovery-tile-8.png differ
diff --git a/docs-website/static/img/solutions/discovery-tile-9.png b/docs-website/static/img/solutions/discovery-tile-9.png
new file mode 100644
index 0000000000000..a3e93d6fc7025
Binary files /dev/null and b/docs-website/static/img/solutions/discovery-tile-9.png differ
diff --git a/docs-website/static/img/solutions/governance-icons-group.png b/docs-website/static/img/solutions/governance-icons-group.png
new file mode 100644
index 0000000000000..464b6bed20563
Binary files /dev/null and b/docs-website/static/img/solutions/governance-icons-group.png differ
diff --git a/docs-website/static/img/solutions/governance-tile-1.png b/docs-website/static/img/solutions/governance-tile-1.png
new file mode 100644
index 0000000000000..28ba604ff6b20
Binary files /dev/null and b/docs-website/static/img/solutions/governance-tile-1.png differ
diff --git a/docs-website/static/img/solutions/governance-tile-2.png b/docs-website/static/img/solutions/governance-tile-2.png
new file mode 100644
index 0000000000000..ae28d833808a2
Binary files /dev/null and b/docs-website/static/img/solutions/governance-tile-2.png differ
diff --git a/docs-website/static/img/solutions/governance-tile-3.png b/docs-website/static/img/solutions/governance-tile-3.png
new file mode 100644
index 0000000000000..534c901e88821
Binary files /dev/null and b/docs-website/static/img/solutions/governance-tile-3.png differ
diff --git a/docs-website/static/img/solutions/governance-tile-4.png b/docs-website/static/img/solutions/governance-tile-4.png
new file mode 100644
index 0000000000000..5527df93309db
Binary files /dev/null and b/docs-website/static/img/solutions/governance-tile-4.png differ
diff --git a/docs-website/static/img/solutions/governance-tile-5.png b/docs-website/static/img/solutions/governance-tile-5.png
new file mode 100644
index 0000000000000..73cbbcaf72c6a
Binary files /dev/null and b/docs-website/static/img/solutions/governance-tile-5.png differ
diff --git a/docs-website/static/img/solutions/governance-tile-6.png b/docs-website/static/img/solutions/governance-tile-6.png
new file mode 100644
index 0000000000000..5eeabc84f72f4
Binary files /dev/null and b/docs-website/static/img/solutions/governance-tile-6.png differ
diff --git a/docs-website/static/img/solutions/governance-tile-7.png b/docs-website/static/img/solutions/governance-tile-7.png
new file mode 100644
index 0000000000000..0914077227800
Binary files /dev/null and b/docs-website/static/img/solutions/governance-tile-7.png differ
diff --git a/docs-website/static/img/solutions/hero-background.png b/docs-website/static/img/solutions/hero-background.png
new file mode 100644
index 0000000000000..3daeaca68dc56
Binary files /dev/null and b/docs-website/static/img/solutions/hero-background.png differ
diff --git a/docs-website/static/img/solutions/hero-discovery.png b/docs-website/static/img/solutions/hero-discovery.png
new file mode 100644
index 0000000000000..28f962d4b84a0
Binary files /dev/null and b/docs-website/static/img/solutions/hero-discovery.png differ
diff --git a/docs-website/static/img/solutions/hero-governance.png b/docs-website/static/img/solutions/hero-governance.png
new file mode 100644
index 0000000000000..e94c1d1303e9f
Binary files /dev/null and b/docs-website/static/img/solutions/hero-governance.png differ
diff --git a/docs-website/static/img/solutions/hero-observe.png b/docs-website/static/img/solutions/hero-observe.png
new file mode 100644
index 0000000000000..91ed48d5f45ef
Binary files /dev/null and b/docs-website/static/img/solutions/hero-observe.png differ
diff --git a/docs-website/static/img/solutions/icon-calendar.png b/docs-website/static/img/solutions/icon-calendar.png
new file mode 100644
index 0000000000000..56a2fda29f25f
Binary files /dev/null and b/docs-website/static/img/solutions/icon-calendar.png differ
diff --git a/docs-website/static/img/solutions/icon-cloud.png b/docs-website/static/img/solutions/icon-cloud.png
new file mode 100644
index 0000000000000..fccb4543fac28
Binary files /dev/null and b/docs-website/static/img/solutions/icon-cloud.png differ
diff --git a/docs-website/static/img/solutions/icon-metric.png b/docs-website/static/img/solutions/icon-metric.png
new file mode 100644
index 0000000000000..ad3d725bef6e9
Binary files /dev/null and b/docs-website/static/img/solutions/icon-metric.png differ
diff --git a/docs-website/static/img/solutions/icon-migration.png b/docs-website/static/img/solutions/icon-migration.png
new file mode 100644
index 0000000000000..c4fadac2150ba
Binary files /dev/null and b/docs-website/static/img/solutions/icon-migration.png differ
diff --git a/docs-website/static/img/solutions/icon-revenue.png b/docs-website/static/img/solutions/icon-revenue.png
new file mode 100644
index 0000000000000..6af1f18e0aeaa
Binary files /dev/null and b/docs-website/static/img/solutions/icon-revenue.png differ
diff --git a/docs-website/static/img/solutions/icon-wrench.png b/docs-website/static/img/solutions/icon-wrench.png
new file mode 100644
index 0000000000000..cb050b3f7239e
Binary files /dev/null and b/docs-website/static/img/solutions/icon-wrench.png differ
diff --git a/docs-website/static/img/solutions/integrations-observe/logo-integration-1.png b/docs-website/static/img/solutions/integrations-observe/logo-integration-1.png
new file mode 100644
index 0000000000000..67a441252fa3f
Binary files /dev/null and b/docs-website/static/img/solutions/integrations-observe/logo-integration-1.png differ
diff --git a/docs-website/static/img/solutions/integrations-observe/logo-integration-2.png b/docs-website/static/img/solutions/integrations-observe/logo-integration-2.png
new file mode 100644
index 0000000000000..091619f03300f
Binary files /dev/null and b/docs-website/static/img/solutions/integrations-observe/logo-integration-2.png differ
diff --git a/docs-website/static/img/solutions/integrations-observe/logo-integration-3.png b/docs-website/static/img/solutions/integrations-observe/logo-integration-3.png
new file mode 100644
index 0000000000000..738e251c327b8
Binary files /dev/null and b/docs-website/static/img/solutions/integrations-observe/logo-integration-3.png differ
diff --git a/docs-website/static/img/solutions/integrations-observe/logo-integration-4.png b/docs-website/static/img/solutions/integrations-observe/logo-integration-4.png
new file mode 100644
index 0000000000000..92d89a262adde
Binary files /dev/null and b/docs-website/static/img/solutions/integrations-observe/logo-integration-4.png differ
diff --git a/docs-website/static/img/solutions/integrations-observe/logo-integration-5.png b/docs-website/static/img/solutions/integrations-observe/logo-integration-5.png
new file mode 100644
index 0000000000000..acc17cb75d585
Binary files /dev/null and b/docs-website/static/img/solutions/integrations-observe/logo-integration-5.png differ
diff --git a/docs-website/static/img/solutions/integrations-observe/logo-integration-6.png b/docs-website/static/img/solutions/integrations-observe/logo-integration-6.png
new file mode 100644
index 0000000000000..d9bb08766f527
Binary files /dev/null and b/docs-website/static/img/solutions/integrations-observe/logo-integration-6.png differ
diff --git a/docs-website/static/img/solutions/integrations/logo-integration-1.png b/docs-website/static/img/solutions/integrations/logo-integration-1.png
new file mode 100644
index 0000000000000..091619f03300f
Binary files /dev/null and b/docs-website/static/img/solutions/integrations/logo-integration-1.png differ
diff --git a/docs-website/static/img/solutions/integrations/logo-integration-10.png b/docs-website/static/img/solutions/integrations/logo-integration-10.png
new file mode 100644
index 0000000000000..acc17cb75d585
Binary files /dev/null and b/docs-website/static/img/solutions/integrations/logo-integration-10.png differ
diff --git a/docs-website/static/img/solutions/integrations/logo-integration-11.png b/docs-website/static/img/solutions/integrations/logo-integration-11.png
new file mode 100644
index 0000000000000..d9bb08766f527
Binary files /dev/null and b/docs-website/static/img/solutions/integrations/logo-integration-11.png differ
diff --git a/docs-website/static/img/solutions/integrations/logo-integration-2.png b/docs-website/static/img/solutions/integrations/logo-integration-2.png
new file mode 100644
index 0000000000000..738e251c327b8
Binary files /dev/null and b/docs-website/static/img/solutions/integrations/logo-integration-2.png differ
diff --git a/docs-website/static/img/solutions/integrations/logo-integration-3.png b/docs-website/static/img/solutions/integrations/logo-integration-3.png
new file mode 100644
index 0000000000000..92d89a262adde
Binary files /dev/null and b/docs-website/static/img/solutions/integrations/logo-integration-3.png differ
diff --git a/docs-website/static/img/solutions/integrations/logo-integration-4.png b/docs-website/static/img/solutions/integrations/logo-integration-4.png
new file mode 100644
index 0000000000000..67a441252fa3f
Binary files /dev/null and b/docs-website/static/img/solutions/integrations/logo-integration-4.png differ
diff --git a/docs-website/static/img/solutions/integrations/logo-integration-5.png b/docs-website/static/img/solutions/integrations/logo-integration-5.png
new file mode 100644
index 0000000000000..acc17cb75d585
Binary files /dev/null and b/docs-website/static/img/solutions/integrations/logo-integration-5.png differ
diff --git a/docs-website/static/img/solutions/integrations/logo-integration-6.png b/docs-website/static/img/solutions/integrations/logo-integration-6.png
new file mode 100644
index 0000000000000..d9bb08766f527
Binary files /dev/null and b/docs-website/static/img/solutions/integrations/logo-integration-6.png differ
diff --git a/docs-website/static/img/solutions/integrations/logo-integration-7.png b/docs-website/static/img/solutions/integrations/logo-integration-7.png
new file mode 100644
index 0000000000000..2672e9c1c2a8e
Binary files /dev/null and b/docs-website/static/img/solutions/integrations/logo-integration-7.png differ
diff --git a/docs-website/static/img/solutions/integrations/logo-integration-8.png b/docs-website/static/img/solutions/integrations/logo-integration-8.png
new file mode 100644
index 0000000000000..4b93e23846004
Binary files /dev/null and b/docs-website/static/img/solutions/integrations/logo-integration-8.png differ
diff --git a/docs-website/static/img/solutions/integrations/logo-integration-9.png b/docs-website/static/img/solutions/integrations/logo-integration-9.png
new file mode 100644
index 0000000000000..eee40d590a148
Binary files /dev/null and b/docs-website/static/img/solutions/integrations/logo-integration-9.png differ
diff --git a/docs-website/static/img/solutions/lock.png b/docs-website/static/img/solutions/lock.png
new file mode 100644
index 0000000000000..ce7cda258c4ce
Binary files /dev/null and b/docs-website/static/img/solutions/lock.png differ
diff --git a/docs-website/static/img/solutions/logo-depop.png b/docs-website/static/img/solutions/logo-depop.png
new file mode 100644
index 0000000000000..337d0087be6fa
Binary files /dev/null and b/docs-website/static/img/solutions/logo-depop.png differ
diff --git a/docs-website/static/img/solutions/logo-dpg-media.png b/docs-website/static/img/solutions/logo-dpg-media.png
new file mode 100644
index 0000000000000..0252a018d65b7
Binary files /dev/null and b/docs-website/static/img/solutions/logo-dpg-media.png differ
diff --git a/docs-website/static/img/solutions/logo-myob.png b/docs-website/static/img/solutions/logo-myob.png
new file mode 100644
index 0000000000000..a166565a7ed85
Binary files /dev/null and b/docs-website/static/img/solutions/logo-myob.png differ
diff --git a/docs-website/static/img/solutions/logo-notion.png b/docs-website/static/img/solutions/logo-notion.png
new file mode 100644
index 0000000000000..ea0601609972d
Binary files /dev/null and b/docs-website/static/img/solutions/logo-notion.png differ
diff --git a/docs-website/static/img/solutions/miro.png b/docs-website/static/img/solutions/miro.png
new file mode 100644
index 0000000000000..3009fcbe03a29
Binary files /dev/null and b/docs-website/static/img/solutions/miro.png differ
diff --git a/docs-website/static/img/solutions/observe-icons-group.png b/docs-website/static/img/solutions/observe-icons-group.png
new file mode 100644
index 0000000000000..704b4a009f899
Binary files /dev/null and b/docs-website/static/img/solutions/observe-icons-group.png differ
diff --git a/docs-website/static/img/solutions/observe-tile-1.png b/docs-website/static/img/solutions/observe-tile-1.png
new file mode 100644
index 0000000000000..61dce01ff4a41
Binary files /dev/null and b/docs-website/static/img/solutions/observe-tile-1.png differ
diff --git a/docs-website/static/img/solutions/observe-tile-10.png b/docs-website/static/img/solutions/observe-tile-10.png
new file mode 100644
index 0000000000000..92bcb393c3f1d
Binary files /dev/null and b/docs-website/static/img/solutions/observe-tile-10.png differ
diff --git a/docs-website/static/img/solutions/observe-tile-11.png b/docs-website/static/img/solutions/observe-tile-11.png
new file mode 100644
index 0000000000000..a4c3d126ef4bd
Binary files /dev/null and b/docs-website/static/img/solutions/observe-tile-11.png differ
diff --git a/docs-website/static/img/solutions/observe-tile-2.png b/docs-website/static/img/solutions/observe-tile-2.png
new file mode 100644
index 0000000000000..de5786a925aa1
Binary files /dev/null and b/docs-website/static/img/solutions/observe-tile-2.png differ
diff --git a/docs-website/static/img/solutions/observe-tile-3.png b/docs-website/static/img/solutions/observe-tile-3.png
new file mode 100644
index 0000000000000..74ad1eb976950
Binary files /dev/null and b/docs-website/static/img/solutions/observe-tile-3.png differ
diff --git a/docs-website/static/img/solutions/observe-tile-4.png b/docs-website/static/img/solutions/observe-tile-4.png
new file mode 100644
index 0000000000000..0aca86f15a791
Binary files /dev/null and b/docs-website/static/img/solutions/observe-tile-4.png differ
diff --git a/docs-website/static/img/solutions/observe-tile-5.png b/docs-website/static/img/solutions/observe-tile-5.png
new file mode 100644
index 0000000000000..2f4839cc65f7f
Binary files /dev/null and b/docs-website/static/img/solutions/observe-tile-5.png differ
diff --git a/docs-website/static/img/solutions/observe-tile-6.png b/docs-website/static/img/solutions/observe-tile-6.png
new file mode 100644
index 0000000000000..ba97412c0576c
Binary files /dev/null and b/docs-website/static/img/solutions/observe-tile-6.png differ
diff --git a/docs-website/static/img/solutions/observe-tile-7.png b/docs-website/static/img/solutions/observe-tile-7.png
new file mode 100644
index 0000000000000..2b882d54857ba
Binary files /dev/null and b/docs-website/static/img/solutions/observe-tile-7.png differ
diff --git a/docs-website/static/img/solutions/observe-tile-8.png b/docs-website/static/img/solutions/observe-tile-8.png
new file mode 100644
index 0000000000000..51ace6a819457
Binary files /dev/null and b/docs-website/static/img/solutions/observe-tile-8.png differ
diff --git a/docs-website/static/img/solutions/observe-tile-9.png b/docs-website/static/img/solutions/observe-tile-9.png
new file mode 100644
index 0000000000000..7c40b3a665522
Binary files /dev/null and b/docs-website/static/img/solutions/observe-tile-9.png differ
diff --git a/docs-website/static/img/solutions/optum.jpg b/docs-website/static/img/solutions/optum.jpg
new file mode 100644
index 0000000000000..8be6a260b0129
Binary files /dev/null and b/docs-website/static/img/solutions/optum.jpg differ
diff --git a/docs-website/static/img/solutions/persona_compliance_officers.png b/docs-website/static/img/solutions/persona_compliance_officers.png
new file mode 100644
index 0000000000000..07780a7c6c41b
Binary files /dev/null and b/docs-website/static/img/solutions/persona_compliance_officers.png differ
diff --git a/docs-website/static/img/solutions/persona_developers.png b/docs-website/static/img/solutions/persona_developers.png
new file mode 100644
index 0000000000000..8d71b4697f3d6
Binary files /dev/null and b/docs-website/static/img/solutions/persona_developers.png differ
diff --git a/docs-website/static/img/solutions/persona_owners_and_smes.png b/docs-website/static/img/solutions/persona_owners_and_smes.png
new file mode 100644
index 0000000000000..101f019c11485
Binary files /dev/null and b/docs-website/static/img/solutions/persona_owners_and_smes.png differ
diff --git a/docs-website/static/img/solutions/personas-mobile.png b/docs-website/static/img/solutions/personas-mobile.png
new file mode 100644
index 0000000000000..ca64e27a4c730
Binary files /dev/null and b/docs-website/static/img/solutions/personas-mobile.png differ
diff --git a/docs-website/static/img/solutions/rocket-launch.png b/docs-website/static/img/solutions/rocket-launch.png
new file mode 100644
index 0000000000000..1da699504cab8
Binary files /dev/null and b/docs-website/static/img/solutions/rocket-launch.png differ
diff --git a/docs-website/static/img/solutions/sliding-tab-bg.png b/docs-website/static/img/solutions/sliding-tab-bg.png
new file mode 100644
index 0000000000000..ac02e994dd109
Binary files /dev/null and b/docs-website/static/img/solutions/sliding-tab-bg.png differ
diff --git a/docs-website/static/img/solutions/trial-icon-alert.svg b/docs-website/static/img/solutions/trial-icon-alert.svg
new file mode 100644
index 0000000000000..407a4322ad1a0
--- /dev/null
+++ b/docs-website/static/img/solutions/trial-icon-alert.svg
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/docs-website/static/img/solutions/trial-icon-language.png b/docs-website/static/img/solutions/trial-icon-language.png
new file mode 100644
index 0000000000000..7c9ee14e1e582
Binary files /dev/null and b/docs-website/static/img/solutions/trial-icon-language.png differ
diff --git a/docs-website/static/img/solutions/trial-icon-lightening.png b/docs-website/static/img/solutions/trial-icon-lightening.png
new file mode 100644
index 0000000000000..8ea71c819f8a1
Binary files /dev/null and b/docs-website/static/img/solutions/trial-icon-lightening.png differ
diff --git a/docs-website/static/img/solutions/trial-icon-link.svg b/docs-website/static/img/solutions/trial-icon-link.svg
new file mode 100644
index 0000000000000..b6fa92c9f2534
--- /dev/null
+++ b/docs-website/static/img/solutions/trial-icon-link.svg
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/docs-website/static/img/solutions/trial-icon-lock.svg b/docs-website/static/img/solutions/trial-icon-lock.svg
new file mode 100644
index 0000000000000..cfbbe5b8668e0
--- /dev/null
+++ b/docs-website/static/img/solutions/trial-icon-lock.svg
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/docs-website/static/img/solutions/trial-icon-owner.png b/docs-website/static/img/solutions/trial-icon-owner.png
new file mode 100644
index 0000000000000..b5382acfe5aba
Binary files /dev/null and b/docs-website/static/img/solutions/trial-icon-owner.png differ
diff --git a/docs-website/static/img/solutions/trial-icon-standard.png b/docs-website/static/img/solutions/trial-icon-standard.png
new file mode 100644
index 0000000000000..e1452a4b33d0b
Binary files /dev/null and b/docs-website/static/img/solutions/trial-icon-standard.png differ
diff --git a/docs-website/static/img/solutions/trial-icon-star.svg b/docs-website/static/img/solutions/trial-icon-star.svg
new file mode 100644
index 0000000000000..527a7fb92e668
--- /dev/null
+++ b/docs-website/static/img/solutions/trial-icon-star.svg
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/docs-website/static/img/solutions/unified-tab-detect.png b/docs-website/static/img/solutions/unified-tab-detect.png
new file mode 100644
index 0000000000000..ae178862aa48b
Binary files /dev/null and b/docs-website/static/img/solutions/unified-tab-detect.png differ
diff --git a/docs-website/static/img/solutions/unified-tab-resolve.png b/docs-website/static/img/solutions/unified-tab-resolve.png
new file mode 100644
index 0000000000000..6f116192fd792
Binary files /dev/null and b/docs-website/static/img/solutions/unified-tab-resolve.png differ
diff --git a/docs-website/static/img/solutions/unified-tab-unify.png b/docs-website/static/img/solutions/unified-tab-unify.png
new file mode 100644
index 0000000000000..0f01b88b0cfde
Binary files /dev/null and b/docs-website/static/img/solutions/unified-tab-unify.png differ
diff --git a/docs-website/static/img/solutions/water-lock.png b/docs-website/static/img/solutions/water-lock.png
new file mode 100644
index 0000000000000..a7fac5bdcb719
Binary files /dev/null and b/docs-website/static/img/solutions/water-lock.png differ
diff --git a/docs-website/static/img/solutions/wolt.png b/docs-website/static/img/solutions/wolt.png
new file mode 100644
index 0000000000000..2633136f36ed7
Binary files /dev/null and b/docs-website/static/img/solutions/wolt.png differ
diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index db97f7aa81d7b..989ebc6be4e73 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -77,6 +77,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
- #11313 - `datahub get` will no longer return a key aspect for entities that don't exist.
- #11369 - The default datahub-rest sink mode has been changed to `ASYNC_BATCH`. This requires a server with version 0.14.0+.
- #11214 Container properties aspect will produce an additional field that will require a corresponding upgrade of server. Otherwise server can reject the aspects.
+- #10190 - `extractor_config.set_system_metadata` of `datahub` source has been moved to be a top level config in the recipe under `flags.set_system_metadata`
### Potential Downtime
diff --git a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
index f2745d5e77f49..1561a36d04c0c 100644
--- a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
+++ b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
@@ -10,16 +10,17 @@
## Concept mapping
-| PowerBI | Datahub |
-|-----------------------|---------------------|
-| `Dashboard` | `Dashboard` |
-| `Dataset's Table` | `Dataset` |
-| `Tile` | `Chart` |
-| `Report.webUrl` | `Chart.externalUrl` |
-| `Workspace` | `Container` |
-| `Report` | `Dashboard` |
-| `PaginatedReport` | `Dashboard` |
-| `Page` | `Chart` |
+| PowerBI | Datahub |
+|-------------------|---------------------|
+| `Dashboard` | `Dashboard` |
+| `Dataset's Table` | `Dataset` |
+| `Tile` | `Chart` |
+| `Report.webUrl` | `Chart.externalUrl` |
+| `Workspace` | `Container` |
+| `Report` | `Dashboard` |
+| `PaginatedReport` | `Dashboard` |
+| `Page` | `Chart` |
+| `App` | `Dashboard` |
- If `Tile` is created from report then `Chart.externalUrl` is set to Report.webUrl.
- The `Page` is unavailable for PowerBI PaginatedReport.
@@ -102,7 +103,7 @@ combine_result
`Pattern-2` is *not* supported for upstream table lineage extraction as it uses nested item-selector i.e. {Source{[Schema="public",Item="book"]}[Data], Source{[Schema="public",Item="issue_history"]}[Data]} as argument to M-QUery table function i.e. Table.Combine
-`Pattern-1` is supported as it first assign the table from schema to variable and then variable is used in M-Query Table function i.e. Table.Combine
+`Pattern-1` is supported as it first assigns the table from schema to variable and then variable is used in M-Query Table function i.e. Table.Combine
## Extract endorsements to tags
@@ -112,18 +113,20 @@ Please note that the default implementation overwrites tags for the ingested ent
## Profiling
-The profiling implementation is done through querying [DAX query endpoint](https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/execute-queries). Therefore the principal needs to have permission to query the datasets to be profiled. Usually this means that the service principal should have `Contributor` role for the workspace to be ingested. Profiling is done with column based queries to be able to handle wide datasets without timeouts.
+The profiling implementation is done through querying [DAX query endpoint](https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/execute-queries). Therefore, the principal needs to have permission to query the datasets to be profiled. Usually this means that the service principal should have `Contributor` role for the workspace to be ingested. Profiling is done with column-based queries to be able to handle wide datasets without timeouts.
-Take into account that the profiling implementation exeutes fairly big amount of DAX queries and for big datasets this is substantial load to the PowerBI system.
+Take into account that the profiling implementation executes a fairly big number of DAX queries, and for big datasets this is a significant load to the PowerBI system.
-The `profiling_pattern` setting may be used to limit profiling actions to only a certain set of resources in PowerBI. Both allow and deny rules are matched against following pattern for every table in a PowerBI Dataset: `workspace_name.dataset_name.table_name`. User may limit profiling with these settings at table level, dataset level or workspace level.
+The `profiling_pattern` setting may be used to limit profiling actions to only a certain set of resources in PowerBI. Both allowed and deny rules are matched against the following pattern for every table in a PowerBI Dataset: `workspace_name.dataset_name.table_name`. Users may limit profiling with these settings at table level, dataset level or workspace level.
## Admin Ingestion vs. Basic Ingestion
PowerBI provides two sets of API i.e. [Basic API and Admin API](https://learn.microsoft.com/en-us/rest/api/power-bi/).
-The Basic API returns metadata of PowerBI resources where service principal has granted access explicitly on resources whereas Admin API returns metadata of all PowerBI resources irrespective of whether service principal has granted or doesn't granted access explicitly on resources.
+The Basic API returns metadata of PowerBI resources where service principal has granted access explicitly on resources,
+whereas Admin API returns metadata of all PowerBI resources irrespective of whether service principal has granted
+or doesn't grant access explicitly on resources.
-The Admin Ingestion (explain below) is the recommended way to execute PowerBI ingestion as this ingestion can extract most of the metadata.
+The Admin Ingestion (explained below) is the recommended way to execute PowerBI ingestion as this ingestion can extract most of the metadata.
### Admin Ingestion: Service Principal As Admin in Tenant Setting and Added as Member In Workspace
@@ -142,8 +145,9 @@ PowerBI Source would be able to ingest below listed metadata of that particular
- Endorsement as tag
- Dashboards
- Reports
- - Dashboard's Tiles
- - Report's Pages
+ - Dashboard Tiles
+ - Report Pages
+ - App
If you don't want to add a service principal as a member in your workspace, then you can enable the `admin_apis_only: true` in recipe to use PowerBI Admin API only.
@@ -154,7 +158,7 @@ Caveats of setting `admin_apis_only` to `true`:
### Basic Ingestion: Service Principal As Member In Workspace
-If you have added service principal as `member` in workspace then PowerBI Source would be able ingest below metadata of that particular workspace
+If you have added service principal as `member` in workspace then PowerBI Source would be able to ingest below metadata of that particular workspace
- Dashboards
- Reports
diff --git a/metadata-ingestion/examples/recipes/bigquery_to_datahub.dhub.yaml b/metadata-ingestion/examples/recipes/bigquery_to_datahub.dhub.yaml
index 86f4898d9d502..4210d0599a215 100644
--- a/metadata-ingestion/examples/recipes/bigquery_to_datahub.dhub.yaml
+++ b/metadata-ingestion/examples/recipes/bigquery_to_datahub.dhub.yaml
@@ -41,6 +41,23 @@ source:
# deny:
# - "*.*.*"
#storage_project_id: project-id-1234567
+ ## Lineage with GCS Source
+ # include_column_lineage_with_gcs: true/false
+ # gcs_lineage_config:
+ # path_specs:
+ # - include: "gs://my-bucket/foo/tests/bar.avro"
+ # - include: "gs://my-bucket/foo/tests/*.*"
+ # - include: "gs://my-bucket/foo/tests/{table}/*.avro"
+ # - include: "gs://my-bucket/foo/tests/{table}/*/*.avro"
+ # - include: "gs://my-bucket/foo/tests/{table}/*.*"
+ # - include: "gs://my-bucket/{dept}/tests/{table}/*.avro"
+ # - include: "gs://my-bucket/{dept}/tests/{table}/{partition_key[0]}={partition[0]}/{partition_key[1]}={partition[1]}/*.avro"
+ # - include: "gs://my-bucket/{dept}/tests/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.avro"
+ # - include: "gs://my-bucket/{dept}/tests/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.*"
+ # - include: "gs://my-bucket/*/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.*"
+ # - include: "gs://my-bucket/*/*/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.*"
+ # strip_urls: false
+
## see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
sink:
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 12614f9ff36b2..0da0329dc8c8a 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -42,7 +42,6 @@
"python-dateutil>=2.8.0",
"tabulate",
"progressbar2",
- "termcolor>=1.0.0",
"psutil>=5.8.0",
"Deprecated",
"humanfriendly",
@@ -142,7 +141,7 @@
# https://github.com/great-expectations/great_expectations/pull/5382/files
# datahub does not depend on traitlets directly but great expectations does.
# https://github.com/ipython/traitlets/issues/741
- "traitlets<5.2.2",
+ "traitlets!=5.2.2",
"greenlet",
*cachetools_lib,
}
@@ -546,7 +545,6 @@
"types-pyOpenSSL",
"types-click-spinner>=0.1.13.1",
"types-ujson>=5.2.0",
- "types-termcolor>=1.0.0",
"types-Deprecated",
"types-protobuf>=4.21.0.1",
"sqlalchemy2-stubs",
diff --git a/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py b/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py
index afac38e29722e..8ec4d3ad24937 100644
--- a/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py
+++ b/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py
@@ -363,7 +363,7 @@ def remove_owner(urn: str, owner_urn: str) -> None:
with get_default_graph() as graph:
_abort_if_non_existent_urn(graph, urn, "remove owners")
for mcp in dataproduct_patcher.build():
- print(json.dumps(mcp.to_obj()))
+ click.echo(json.dumps(mcp.to_obj()))
graph.emit(mcp)
diff --git a/metadata-ingestion/src/datahub/cli/timeline_cli.py b/metadata-ingestion/src/datahub/cli/timeline_cli.py
index 08672528abb5d..37089e6f051f0 100644
--- a/metadata-ingestion/src/datahub/cli/timeline_cli.py
+++ b/metadata-ingestion/src/datahub/cli/timeline_cli.py
@@ -6,7 +6,6 @@
import click
from requests import Response
-from termcolor import colored
from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
@@ -44,14 +43,14 @@ def pretty_id(id: Optional[str]) -> str:
assert schema_field_key is not None
field_path = schema_field_key.fieldPath
- return f"{colored('field','cyan')}:{colored(pretty_field_path(field_path),'white')}"
+ return f"{click.style('field', fg='cyan')}:{click.style(pretty_field_path(field_path), fg='white')}"
if id.startswith("[version=2.0]"):
- return f"{colored('field','cyan')}:{colored(pretty_field_path(id),'white')}"
+ return f"{click.style('field', fg='cyan')}:{click.style(pretty_field_path(id), fg='white')}"
if id.startswith("urn:li:dataset"):
dataset_key = dataset_urn_to_key(id)
if dataset_key:
- return f"{colored('dataset','cyan')}:{colored(dataset_key.platform[len('urn:li:dataPlatform:'):],'white')}:{colored(dataset_key.name,'white')}"
+ return f"{click.style('dataset', fg='cyan')}:{click.style(dataset_key.platform[len('urn:li:dataPlatform:'):], fg='white')}:{click.style(dataset_key.name, fg='white')}"
# failed to prettify, return original
return id
@@ -196,8 +195,8 @@ def timeline(
else "red"
)
- print(
- f"{colored(change_instant,'cyan')} - {colored(change_txn['semVer'],change_color)}"
+ click.echo(
+ f"{click.style(change_instant, fg='cyan')} - {click.style(change_txn['semVer'], fg=change_color)}"
)
if change_txn["changeEvents"] is not None:
for change_event in change_txn["changeEvents"]:
@@ -216,8 +215,8 @@ def timeline(
or change_event.get("entityUrn")
or ""
)
- print(
- f"\t{colored(change_event.get('changeType') or change_event.get('operation'),event_change_color)} {change_event.get('category')} {target_string} {element_string}: {change_event['description']}"
+ click.echo(
+ f"\t{click.style(change_event.get('changeType') or change_event.get('operation'), fg=event_change_color)} {change_event.get('category')} {target_string} {element_string}: {change_event['description']}"
)
else:
click.echo(
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index a16a3df57d1bc..81fc7e5717686 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -647,11 +647,11 @@ def _get_text_color(self, running: bool, failures: bool, warnings: bool) -> str:
return "cyan"
else:
if failures:
- return "bright_red"
+ return "red"
elif warnings:
- return "bright_yellow"
+ return "yellow"
else:
- return "bright_green"
+ return "green"
def has_failures(self) -> bool:
return bool(
@@ -674,7 +674,7 @@ def pretty_print_summary(
else:
click.echo()
click.secho("Cli report:", bold=True)
- click.secho(self.cli_report.as_string())
+ click.echo(self.cli_report.as_string())
click.secho(f"Source ({self.source_type}) report:", bold=True)
click.echo(self.source.get_report().as_string())
click.secho(f"Sink ({self.sink_type}) report:", bold=True)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index a1bbb9dd6b0b9..76c2fbf48ccab 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -309,6 +309,16 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
self.bq_schema_extractor.table_refs,
)
+ # Lineage BQ to GCS
+ if (
+ self.config.include_table_lineage
+ and self.bq_schema_extractor.external_tables
+ ):
+ for dataset_urn, table in self.bq_schema_extractor.external_tables.items():
+ yield from self.lineage_extractor.gen_lineage_workunits_for_external_table(
+ dataset_urn, table.ddl, graph=self.ctx.graph
+ )
+
def get_report(self) -> BigQueryV2Report:
return self.report
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index 331b583423093..ad293c702a520 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -21,6 +21,7 @@
from datahub.ingestion.glossary.classification_mixin import (
ClassificationSourceConfigMixin,
)
+from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, SQLFilterConfig
from datahub.ingestion.source.state.stateful_ingestion_base import (
StatefulLineageConfigMixin,
@@ -206,6 +207,39 @@ def get_sql_alchemy_url(self) -> str:
return "bigquery://"
+class GcsLineageProviderConfig(ConfigModel):
+ """
+ Any source that produces gcs lineage from/to Datasets should inherit this class.
+ """
+
+ path_specs: List[PathSpec] = Field(
+ default=[],
+ description="List of PathSpec. See below the details about PathSpec",
+ )
+
+ strip_urls: bool = Field(
+ default=True,
+ description="Strip filename from gcs url. It only applies if path_specs are not specified.",
+ )
+
+ ignore_non_path_spec_path: bool = Field(
+ default=False,
+ description="Ignore paths that are not match in path_specs. It only applies if path_specs are specified.",
+ )
+
+
+class GcsDatasetLineageProviderConfigBase(ConfigModel):
+ """
+ Any source that produces gcs lineage from/to Datasets should inherit this class.
+ This is needeed to group all lineage related configs under `gcs_lineage_config` config property.
+ """
+
+ gcs_lineage_config: GcsLineageProviderConfig = Field(
+ default=GcsLineageProviderConfig(),
+ description="Common config for gcs lineage generation",
+ )
+
+
class BigQueryFilterConfig(SQLFilterConfig):
project_ids: List[str] = Field(
default_factory=list,
@@ -328,6 +362,7 @@ class BigQueryIdentifierConfig(
class BigQueryV2Config(
+ GcsDatasetLineageProviderConfigBase,
BigQueryConnectionConfig,
BigQueryBaseConfig,
BigQueryFilterConfig,
@@ -473,6 +508,11 @@ def have_table_data_read_permission(self) -> bool:
description="Option to enable/disable lineage generation. Is enabled by default.",
)
+ include_column_lineage_with_gcs: bool = Field(
+ default=True,
+ description="When enabled, column-level lineage will be extracted from the gcs.",
+ )
+
max_query_duration: timedelta = Field(
default=timedelta(minutes=15),
description="Correction to pad start_time and end_time with. For handling the case where the read happens within our time range but the query completion event is delayed and happens after the configured end time.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
index 88c6eb1885f3b..7e8b2931282ff 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
@@ -157,6 +157,8 @@ class BigQueryV2Report(
num_filtered_query_events: int = 0
num_usage_query_hash_collisions: int = 0
num_operational_stats_workunits_emitted: int = 0
+ num_lineage_dropped_gcs_path: int = 0
+
snapshots_scanned: int = 0
# view lineage
@@ -185,6 +187,7 @@ class BigQueryV2Report(
usage_start_time: Optional[datetime] = None
usage_end_time: Optional[datetime] = None
stateful_usage_ingestion_enabled: bool = False
+ num_skipped_external_table_lineage: int = 0
queries_extractor: Optional[BigQueryQueriesExtractorReport] = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py
index 6361d5f266cb7..4f18c22c108a6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py
@@ -121,6 +121,7 @@ class BigqueryTable(BaseTable):
columns_ignore_from_profiling: List[str] = field(default_factory=list)
external: bool = False
constraints: List[BigqueryTableConstraint] = field(default_factory=list)
+ table_type: Optional[str] = None
@dataclass
@@ -377,6 +378,7 @@ def _make_bigquery_table(
return BigqueryTable(
name=table.table_name,
created=table.created,
+ table_type=table.table_type,
last_altered=(
datetime.fromtimestamp(
table.get("last_altered") / 1000, tz=timezone.utc
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py
index dc53e2f74959e..907e5c12e99a1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py
@@ -204,6 +204,11 @@ def __init__(
self.view_definitions: FileBackedDict[str] = FileBackedDict()
# Maps snapshot ref -> Snapshot
self.snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot] = FileBackedDict()
+ # Add External BQ table
+ self.external_tables: Dict[str, BigqueryTable] = defaultdict()
+ self.bq_external_table_pattern = (
+ r".*create\s+external\s+table\s+`?(?:project_id\.)?.*`?"
+ )
bq_project = (
self.config.project_on_behalf
@@ -957,6 +962,15 @@ def gen_dataset_workunits(
project_id, dataset_name, table.name
)
+ # Added for bigquery to gcs lineage extraction
+ if (
+ isinstance(table, BigqueryTable)
+ and table.table_type == "EXTERNAL"
+ and table.ddl is not None
+ and re.search(self.bq_external_table_pattern, table.ddl, re.IGNORECASE)
+ ):
+ self.external_tables[dataset_urn] = table
+
status = Status(removed=False)
yield MetadataChangeProposalWrapper(
entityUrn=dataset_urn, aspect=status
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
index c9d0738bea7dc..b542992a7924a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
@@ -1,6 +1,8 @@
import collections
import itertools
+import json
import logging
+import re
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import (
@@ -15,17 +17,20 @@
Tuple,
Union,
)
+from urllib.parse import urlparse
import humanfriendly
import sqlglot
from google.cloud.datacatalog import lineage_v1
from google.cloud.logging_v2.client import Client as GCPLoggingClient
+from datahub.api.entities.dataset.dataset import Dataset
from datahub.configuration.pattern_utils import is_schema_allowed
from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.source_helpers import auto_workunit
from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.graph.client import DataHubGraph
from datahub.ingestion.source.bigquery_v2.bigquery_audit import (
AuditLogEntry,
BigQueryAuditMetadata,
@@ -51,16 +56,19 @@
BQ_FILTER_RULE_TEMPLATE_V2_LINEAGE,
bigquery_audit_metadata_query_template_lineage,
)
+from datahub.ingestion.source.gcs import gcs_utils
from datahub.ingestion.source.state.redundant_run_skip_handler import (
RedundantLineageRunSkipHandler,
)
from datahub.ingestion.source_report.ingestion_stage import LINEAGE_EXTRACTION
+from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaMetadata
from datahub.metadata.schema_classes import (
AuditStampClass,
DatasetLineageTypeClass,
FineGrainedLineageClass,
FineGrainedLineageDownstreamTypeClass,
FineGrainedLineageUpstreamTypeClass,
+ SchemaMetadataClass,
UpstreamClass,
UpstreamLineageClass,
)
@@ -247,6 +255,7 @@ def __init__(
format_queries=True,
)
self.report.sql_aggregator = self.aggregator.report
+ self.gcs_uris_regex = re.compile(r"uris=\[([^\]]+)\]")
def get_time_window(self) -> Tuple[datetime, datetime]:
if self.redundant_run_skip_handler:
@@ -918,3 +927,190 @@ def test_capability(self, project_id: str) -> None:
def report_status(self, step: str, status: bool) -> None:
if self.redundant_run_skip_handler:
self.redundant_run_skip_handler.report_current_run_status(step, status)
+
+ def gen_lineage_workunits_for_external_table(
+ self,
+ dataset_urn: str,
+ ddl: Optional[str],
+ graph: Optional[DataHubGraph] = None,
+ ) -> Iterable[MetadataWorkUnit]:
+
+ if not ddl:
+ return
+
+ # Expect URIs in `uris=[""]` format
+ uris_match = self.gcs_uris_regex.search(ddl)
+ if not uris_match:
+ self.report.num_skipped_external_table_lineage += 1
+ logger.warning(f"Unable to parse GCS URI from the provided DDL {ddl}.")
+ return
+
+ uris_str = uris_match.group(1)
+ try:
+ source_uris = json.loads(f"[{uris_str}]")
+ except json.JSONDecodeError as e:
+ self.report.num_skipped_external_table_lineage += 1
+ logger.warning(
+ f"Json load failed on loading source uri with error: {e}. The field value was: {uris_str}"
+ )
+ return
+
+ lineage_info = self.get_lineage_for_external_table(
+ dataset_urn=dataset_urn,
+ source_uris=source_uris,
+ graph=graph,
+ )
+
+ if lineage_info:
+ yield MetadataChangeProposalWrapper(
+ entityUrn=dataset_urn, aspect=lineage_info
+ ).as_workunit()
+
+ def get_lineage_for_external_table(
+ self,
+ dataset_urn: str,
+ source_uris: List[str],
+ graph: Optional[DataHubGraph] = None,
+ ) -> Optional[UpstreamLineageClass]:
+
+ upstreams_list: List[UpstreamClass] = []
+ fine_grained_lineages: List[FineGrainedLineageClass] = []
+ gcs_urns: Set[str] = set()
+
+ for source_uri in source_uris:
+ # Check that storage_location have the gs:// prefix.
+ # Right now we are only supporting GCS lineage
+ if not gcs_utils.is_gcs_uri(source_uri):
+ continue
+ gcs_path = self._get_gcs_path(source_uri)
+
+ if gcs_path is None:
+ continue
+
+ path = gcs_utils.strip_gcs_prefix(gcs_path)
+ urn = mce_builder.make_dataset_urn_with_platform_instance(
+ platform="gcs",
+ name=path,
+ env=self.config.env,
+ platform_instance=(
+ self.config.platform_instance
+ if self.config.platform_instance is not None
+ else None
+ ),
+ )
+ gcs_urns.add(urn)
+
+ upstreams_list.extend(
+ [
+ UpstreamClass(
+ dataset=source_dataset_urn,
+ type=DatasetLineageTypeClass.COPY,
+ )
+ for source_dataset_urn in gcs_urns
+ ]
+ )
+
+ if not upstreams_list:
+ return None
+
+ if self.config.include_column_lineage_with_gcs:
+ assert graph
+ schema_metadata: Optional[SchemaMetadataClass] = graph.get_schema_metadata(
+ dataset_urn
+ )
+ for gcs_dataset_urn in gcs_urns:
+ schema_metadata_for_gcs: Optional[
+ SchemaMetadataClass
+ ] = graph.get_schema_metadata(gcs_dataset_urn)
+ if schema_metadata and schema_metadata_for_gcs:
+ fine_grained_lineage = self.get_fine_grained_lineages_with_gcs(
+ dataset_urn,
+ gcs_dataset_urn,
+ schema_metadata,
+ schema_metadata_for_gcs,
+ )
+ if not fine_grained_lineage:
+ logger.warning(
+ f"Failed to retrieve fine-grained lineage for dataset {dataset_urn} and GCS {gcs_dataset_urn}. "
+ f"Check schema metadata: {schema_metadata} and GCS metadata: {schema_metadata_for_gcs}."
+ )
+ continue
+
+ fine_grained_lineages.extend(fine_grained_lineage)
+
+ upstream_lineage = UpstreamLineageClass(
+ upstreams=upstreams_list, fineGrainedLineages=fine_grained_lineages or None
+ )
+ return upstream_lineage
+
+ def _get_gcs_path(self, path: str) -> Optional[str]:
+ if self.config.gcs_lineage_config:
+ for path_spec in self.config.gcs_lineage_config.path_specs:
+ if not path_spec.allowed(path):
+ logger.debug(
+ f"Skipping gcs path {path} as it does not match any path spec."
+ )
+ self.report.num_lineage_dropped_gcs_path += 1
+ continue
+
+ _, table_path = path_spec.extract_table_name_and_path(path)
+ return table_path
+
+ if (
+ self.config.gcs_lineage_config.ignore_non_path_spec_path
+ and len(self.config.gcs_lineage_config.path_specs) > 0
+ ):
+ self.report.num_lineage_dropped_gcs_path += 1
+ logger.debug(
+ f"Skipping gcs path {path} as it does not match any path spec."
+ )
+ return None
+
+ if self.config.gcs_lineage_config.strip_urls:
+ if "/" in urlparse(path).path:
+ return str(path.rsplit("/", 1)[0])
+
+ return path
+
+ def get_fine_grained_lineages_with_gcs(
+ self,
+ dataset_urn: str,
+ gcs_dataset_urn: str,
+ schema_metadata: SchemaMetadata,
+ schema_metadata_for_gcs: SchemaMetadata,
+ ) -> Optional[List[FineGrainedLineageClass]]:
+ def simplify_field_path(field_path):
+ return Dataset._simplify_field_path(field_path)
+
+ if schema_metadata and schema_metadata_for_gcs:
+ fine_grained_lineages: List[FineGrainedLineageClass] = []
+ for field in schema_metadata.fields:
+ field_path_v1 = simplify_field_path(field.fieldPath)
+ matching_gcs_field = next(
+ (
+ f
+ for f in schema_metadata_for_gcs.fields
+ if simplify_field_path(f.fieldPath) == field_path_v1
+ ),
+ None,
+ )
+ if matching_gcs_field:
+ fine_grained_lineages.append(
+ FineGrainedLineageClass(
+ downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
+ downstreams=[
+ mce_builder.make_schema_field_urn(
+ dataset_urn, field_path_v1
+ )
+ ],
+ upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
+ upstreams=[
+ mce_builder.make_schema_field_urn(
+ gcs_dataset_urn,
+ simplify_field_path(matching_gcs_field.fieldPath),
+ )
+ ],
+ )
+ )
+ return fine_grained_lineages
+ return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
index b6aa8c1f5f1f1..7271bf6102639 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
@@ -70,6 +70,7 @@ class BIAssetSubTypes(StrEnum):
# PowerBI
POWERBI_TILE = "PowerBI Tile"
POWERBI_PAGE = "PowerBI Page"
+ POWERBI_APP = "App"
# Mode
MODE_REPORT = "Report"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py
index 1e15f6b395ca5..e40e284d6e0a4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py
@@ -144,8 +144,8 @@ class FivetranSourceReport(StaleEntityRemovalSourceReport):
def report_connectors_scanned(self, count: int = 1) -> None:
self.connectors_scanned += count
- def report_connectors_dropped(self, model: str) -> None:
- self.filtered_connectors.append(model)
+ def report_connectors_dropped(self, connector: str) -> None:
+ self.filtered_connectors.append(connector)
class PlatformDetail(ConfigModel):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py
index 907bfa3a167aa..21c967e162891 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py
@@ -76,7 +76,7 @@ def __init__(self, config: FivetranSourceConfig, ctx: PipelineContext):
self.audit_log = FivetranLogAPI(self.config.fivetran_log_config)
- def _extend_lineage(self, connector: Connector, datajob: DataJob) -> None:
+ def _extend_lineage(self, connector: Connector, datajob: DataJob) -> Dict[str, str]:
input_dataset_urn_list: List[DatasetUrn] = []
output_dataset_urn_list: List[DatasetUrn] = []
fine_grained_lineage: List[FineGrainedLineage] = []
@@ -93,8 +93,11 @@ def _extend_lineage(self, connector: Connector, datajob: DataJob) -> None:
connector.connector_type
]
else:
- logger.info(
- f"Fivetran connector source type: {connector.connector_type} is not supported to mapped with Datahub dataset entity."
+ self.report.info(
+ title="Guessing source platform for lineage",
+ message="We encountered a connector type that we don't fully support yet. "
+ "We will attempt to guess the platform based on the connector type.",
+ context=f"{connector.connector_name} (connector_id: {connector.connector_id}, connector_type: {connector.connector_type})",
)
source_details.platform = connector.connector_type
@@ -170,7 +173,19 @@ def _extend_lineage(self, connector: Connector, datajob: DataJob) -> None:
datajob.inlets.extend(input_dataset_urn_list)
datajob.outlets.extend(output_dataset_urn_list)
datajob.fine_grained_lineages.extend(fine_grained_lineage)
- return None
+
+ return dict(
+ **{
+ f"source.{k}": str(v)
+ for k, v in source_details.dict().items()
+ if v is not None
+ },
+ **{
+ f"destination.{k}": str(v)
+ for k, v in destination_details.dict().items()
+ if v is not None
+ },
+ )
def _generate_dataflow_from_connector(self, connector: Connector) -> DataFlow:
return DataFlow(
@@ -196,23 +211,23 @@ def _generate_datajob_from_connector(self, connector: Connector) -> DataJob:
owners={owner_email} if owner_email else set(),
)
- job_property_bag: Dict[str, str] = {}
- allowed_connection_keys = [
- Constant.PAUSED,
- Constant.SYNC_FREQUENCY,
- Constant.DESTINATION_ID,
- ]
- for key in allowed_connection_keys:
- if hasattr(connector, key) and getattr(connector, key) is not None:
- job_property_bag[key] = repr(getattr(connector, key))
- datajob.properties = job_property_bag
-
# Map connector source and destination table with dataset entity
# Also extend the fine grained lineage of column if include_column_lineage is True
- self._extend_lineage(connector=connector, datajob=datajob)
-
+ lineage_properties = self._extend_lineage(connector=connector, datajob=datajob)
# TODO: Add fine grained lineages of dataset after FineGrainedLineageDownstreamType.DATASET enabled
+ connector_properties: Dict[str, str] = {
+ "connector_id": connector.connector_id,
+ "connector_type": connector.connector_type,
+ "paused": str(connector.paused),
+ "sync_frequency": str(connector.sync_frequency),
+ "destination_id": connector.destination_id,
+ }
+ datajob.properties = {
+ **connector_properties,
+ **lineage_properties,
+ }
+
return datajob
def _generate_dpi_from_job(self, job: Job, datajob: DataJob) -> DataProcessInstance:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py
index 79f9d513bfb7c..529002270cdd9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py
@@ -259,20 +259,23 @@ def get_allowed_connectors_list(
logger.info("Fetching connector list")
connector_list = self._query(self.fivetran_log_query.get_connectors_query())
for connector in connector_list:
+ connector_id = connector[Constant.CONNECTOR_ID]
connector_name = connector[Constant.CONNECTOR_NAME]
if not connector_patterns.allowed(connector_name):
- report.report_connectors_dropped(connector_name)
+ report.report_connectors_dropped(
+ f"{connector_name} (connector_id: {connector_id}, dropped due to filter pattern)"
+ )
continue
if not destination_patterns.allowed(
destination_id := connector[Constant.DESTINATION_ID]
):
report.report_connectors_dropped(
- f"{connector_name} (destination_id: {destination_id})"
+ f"{connector_name} (connector_id: {connector_id}, destination_id: {destination_id})"
)
continue
connectors.append(
Connector(
- connector_id=connector[Constant.CONNECTOR_ID],
+ connector_id=connector_id,
connector_name=connector_name,
connector_type=connector[Constant.CONNECTOR_TYPE_ID],
paused=connector[Constant.PAUSED],
diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py
index 39c4d7712b4fc..65378928b244d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py
@@ -1,8 +1,8 @@
from typing import List
# Safeguards to prevent fetching massive amounts of data.
-MAX_TABLE_LINEAGE_PER_CONNECTOR = 50
-MAX_COLUMN_LINEAGE_PER_CONNECTOR = 500
+MAX_TABLE_LINEAGE_PER_CONNECTOR = 120
+MAX_COLUMN_LINEAGE_PER_CONNECTOR = 1000
MAX_JOBS_PER_CONNECTOR = 500
@@ -33,6 +33,7 @@ def get_connectors_query(self) -> str:
FROM {self.db_clause}connector
WHERE
_fivetran_deleted = FALSE
+QUALIFY ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY _fivetran_synced DESC) = 1
"""
def get_users_query(self) -> str:
@@ -86,21 +87,29 @@ def get_table_lineage_query(self, connector_ids: List[str]) -> str:
return f"""\
SELECT
- stm.connector_id as connector_id,
- stm.id as source_table_id,
- stm.name as source_table_name,
- ssm.name as source_schema_name,
- dtm.id as destination_table_id,
- dtm.name as destination_table_name,
- dsm.name as destination_schema_name
-FROM {self.db_clause}table_lineage as tl
-JOIN {self.db_clause}source_table_metadata as stm on tl.source_table_id = stm.id
-JOIN {self.db_clause}destination_table_metadata as dtm on tl.destination_table_id = dtm.id
-JOIN {self.db_clause}source_schema_metadata as ssm on stm.schema_id = ssm.id
-JOIN {self.db_clause}destination_schema_metadata as dsm on dtm.schema_id = dsm.id
-WHERE stm.connector_id IN ({formatted_connector_ids})
-QUALIFY ROW_NUMBER() OVER (PARTITION BY stm.connector_id ORDER BY tl.created_at DESC) <= {MAX_TABLE_LINEAGE_PER_CONNECTOR}
-ORDER BY stm.connector_id, tl.created_at DESC
+ *
+FROM (
+ SELECT
+ stm.connector_id as connector_id,
+ stm.id as source_table_id,
+ stm.name as source_table_name,
+ ssm.name as source_schema_name,
+ dtm.id as destination_table_id,
+ dtm.name as destination_table_name,
+ dsm.name as destination_schema_name,
+ tl.created_at as created_at,
+ ROW_NUMBER() OVER (PARTITION BY stm.connector_id, stm.id, dtm.id ORDER BY tl.created_at DESC) as table_combo_rn
+ FROM {self.db_clause}table_lineage as tl
+ JOIN {self.db_clause}source_table_metadata as stm on tl.source_table_id = stm.id
+ JOIN {self.db_clause}destination_table_metadata as dtm on tl.destination_table_id = dtm.id
+ JOIN {self.db_clause}source_schema_metadata as ssm on stm.schema_id = ssm.id
+ JOIN {self.db_clause}destination_schema_metadata as dsm on dtm.schema_id = dsm.id
+ WHERE stm.connector_id IN ({formatted_connector_ids})
+)
+-- Ensure that we only get back one entry per source and destination pair.
+WHERE table_combo_rn = 1
+QUALIFY ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY created_at DESC) <= {MAX_TABLE_LINEAGE_PER_CONNECTOR}
+ORDER BY connector_id, created_at DESC
"""
def get_column_lineage_query(self, connector_ids: List[str]) -> str:
@@ -109,19 +118,31 @@ def get_column_lineage_query(self, connector_ids: List[str]) -> str:
return f"""\
SELECT
- scm.table_id as source_table_id,
- dcm.table_id as destination_table_id,
- scm.name as source_column_name,
- dcm.name as destination_column_name
-FROM {self.db_clause}column_lineage as cl
-JOIN {self.db_clause}source_column_metadata as scm
- ON cl.source_column_id = scm.id
-JOIN {self.db_clause}destination_column_metadata as dcm
- ON cl.destination_column_id = dcm.id
--- Only joining source_table_metadata to get the connector_id.
-JOIN {self.db_clause}source_table_metadata as stm
- ON scm.table_id = stm.id
-WHERE stm.connector_id IN ({formatted_connector_ids})
-QUALIFY ROW_NUMBER() OVER (PARTITION BY stm.connector_id ORDER BY cl.created_at DESC) <= {MAX_COLUMN_LINEAGE_PER_CONNECTOR}
-ORDER BY stm.connector_id, cl.created_at DESC
+ source_table_id,
+ destination_table_id,
+ source_column_name,
+ destination_column_name
+FROM (
+ SELECT
+ stm.connector_id as connector_id,
+ scm.table_id as source_table_id,
+ dcm.table_id as destination_table_id,
+ scm.name as source_column_name,
+ dcm.name as destination_column_name,
+ cl.created_at as created_at,
+ ROW_NUMBER() OVER (PARTITION BY stm.connector_id, cl.source_column_id, cl.destination_column_id ORDER BY cl.created_at DESC) as column_combo_rn
+ FROM {self.db_clause}column_lineage as cl
+ JOIN {self.db_clause}source_column_metadata as scm
+ ON cl.source_column_id = scm.id
+ JOIN {self.db_clause}destination_column_metadata as dcm
+ ON cl.destination_column_id = dcm.id
+ -- Only joining source_table_metadata to get the connector_id.
+ JOIN {self.db_clause}source_table_metadata as stm
+ ON scm.table_id = stm.id
+ WHERE stm.connector_id IN ({formatted_connector_ids})
+)
+-- Ensure that we only get back one entry per (connector, source column, destination column) pair.
+WHERE column_combo_rn = 1
+QUALIFY ROW_NUMBER() OVER (PARTITION BY connector_id ORDER BY created_at DESC) <= {MAX_COLUMN_LINEAGE_PER_CONNECTOR}
+ORDER BY connector_id, created_at DESC
"""
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index 0716a658b61c6..8a3f8ed6131a2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -473,6 +473,11 @@ class PowerBiDashboardSourceConfig(
"To maintain backward compatibility, this is set to False.",
)
+ extract_app: bool = pydantic.Field(
+ default=False,
+ description="Whether to ingest workspace app. Requires DataHub server 0.14.2+.",
+ )
+
@root_validator(skip_on_failure=True)
def validate_extract_column_level_lineage(cls, values: Dict) -> Dict:
flags = [
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index f5c0aedb329cd..72336afbaacd0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -4,6 +4,7 @@
#
#########################################################
import logging
+from datetime import datetime
from typing import Iterable, List, Optional, Tuple, Union
import datahub.emitter.mce_builder as builder
@@ -59,6 +60,7 @@
FineGrainedLineageUpstreamType,
)
from datahub.metadata.schema_classes import (
+ AuditStampClass,
BrowsePathsClass,
ChangeTypeClass,
ChartInfoClass,
@@ -70,6 +72,7 @@
DatasetLineageTypeClass,
DatasetProfileClass,
DatasetPropertiesClass,
+ EdgeClass,
GlobalTagsClass,
OtherSchemaClass,
OwnerClass,
@@ -1006,7 +1009,9 @@ def to_chart_mcps(
)
# Browse path
- browse_path = BrowsePathsClass(paths=[f"/powerbi/{workspace.name}"])
+ browse_path = BrowsePathsClass(
+ paths=[f"/{Constant.PLATFORM_NAME}/{workspace.name}"]
+ )
browse_path_mcp = self.new_mcp(
entity_urn=chart_urn,
aspect=browse_path,
@@ -1306,6 +1311,95 @@ def extract_independent_datasets(
)
)
+ def emit_app(
+ self, workspace: powerbi_data_classes.Workspace
+ ) -> Iterable[MetadataChangeProposalWrapper]:
+ if workspace.app is None:
+ return
+
+ if not self.source_config.extract_app:
+ self.reporter.info(
+ title="App Ingestion Is Disabled",
+ message="You are missing workspace app metadata. Please set flag `extract_app` to `true` in recipe to ingest workspace app.",
+ context=f"workspace-name={workspace.name}, app-name = {workspace.app.name}",
+ )
+ return
+
+ assets_within_app: List[EdgeClass] = [
+ EdgeClass(
+ destinationUrn=builder.make_dashboard_urn(
+ platform=self.source_config.platform_name,
+ platform_instance=self.source_config.platform_instance,
+ name=powerbi_data_classes.Dashboard.get_urn_part_by_id(
+ app_dashboard.original_dashboard_id
+ ),
+ )
+ )
+ for app_dashboard in workspace.app.dashboards
+ ]
+
+ assets_within_app.extend(
+ [
+ EdgeClass(
+ destinationUrn=builder.make_dashboard_urn(
+ platform=self.source_config.platform_name,
+ platform_instance=self.source_config.platform_instance,
+ name=powerbi_data_classes.Report.get_urn_part_by_id(
+ app_report.original_report_id
+ ),
+ )
+ )
+ for app_report in workspace.app.reports
+ ]
+ )
+
+ if assets_within_app:
+ logger.debug(
+ f"Emitting metadata-workunits for app {workspace.app.name}({workspace.app.id})"
+ )
+
+ app_urn: str = builder.make_dashboard_urn(
+ platform=self.source_config.platform_name,
+ platform_instance=self.source_config.platform_instance,
+ name=powerbi_data_classes.App.get_urn_part_by_id(workspace.app.id),
+ )
+
+ dashboard_info: DashboardInfoClass = DashboardInfoClass(
+ title=workspace.app.name,
+ description=workspace.app.description
+ if workspace.app.description
+ else workspace.app.name,
+ # lastModified=workspace.app.last_update,
+ lastModified=ChangeAuditStamps(
+ lastModified=AuditStampClass(
+ actor="urn:li:corpuser:unknown",
+ time=int(
+ datetime.strptime(
+ workspace.app.last_update, "%Y-%m-%dT%H:%M:%S.%fZ"
+ ).timestamp()
+ ),
+ )
+ if workspace.app.last_update
+ else None
+ ),
+ dashboards=assets_within_app,
+ )
+
+ # Browse path
+ browse_path: BrowsePathsClass = BrowsePathsClass(
+ paths=[f"/powerbi/{workspace.name}"]
+ )
+
+ yield from MetadataChangeProposalWrapper.construct_many(
+ entityUrn=app_urn,
+ aspects=(
+ dashboard_info,
+ browse_path,
+ StatusClass(removed=False),
+ SubTypesClass(typeNames=[BIAssetSubTypes.POWERBI_APP]),
+ ),
+ )
+
def get_workspace_workunit(
self, workspace: powerbi_data_classes.Workspace
) -> Iterable[MetadataWorkUnit]:
@@ -1318,6 +1412,8 @@ def get_workspace_workunit(
# Return workunit to a Datahub Ingestion framework
yield workunit
+ yield from auto_workunit(self.emit_app(workspace=workspace))
+
for dashboard in workspace.dashboards:
try:
# Fetch PowerBi users for dashboards
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py
index d54b4a42b742e..9407ef7a51b58 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py
@@ -37,6 +37,35 @@ class DatasetKey(ContainerKey):
dataset: str
+@dataclass
+class AppDashboard:
+ id: str
+ original_dashboard_id: str
+
+
+@dataclass
+class AppReport:
+ id: str
+ original_report_id: str
+
+
+@dataclass
+class App:
+ id: str
+ name: str
+ description: Optional[str]
+ last_update: Optional[str]
+ dashboards: List["AppDashboard"]
+ reports: List["AppReport"]
+
+ def get_urn_part(self):
+ return App.get_urn_part_by_id(self.id)
+
+ @staticmethod
+ def get_urn_part_by_id(id_: str) -> str:
+ return f"apps.{id_}"
+
+
@dataclass
class Workspace:
id: str
@@ -49,6 +78,7 @@ class Workspace:
dashboard_endorsements: Dict[str, List[str]]
scan_result: dict
independent_datasets: List["PowerBIDataset"]
+ app: Optional["App"]
def get_urn_part(self, workspace_id_as_urn_part: Optional[bool] = False) -> str:
# shouldn't use workspace name, as they can be the same?
@@ -235,7 +265,11 @@ class Report:
tags: List[str]
def get_urn_part(self):
- return f"reports.{self.id}"
+ return Report.get_urn_part_by_id(self.id)
+
+ @staticmethod
+ def get_urn_part_by_id(id_: str) -> str:
+ return f"reports.{id_}"
@dataclass
@@ -273,7 +307,11 @@ class Dashboard:
webUrl: Optional[str]
def get_urn_part(self):
- return f"dashboards.{self.id}"
+ return Dashboard.get_urn_part_by_id(self.id)
+
+ @staticmethod
+ def get_urn_part_by_id(id_: str) -> str:
+ return f"dashboards.{id_}"
def __members(self):
return (self.id,)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py
index 8849e19ea8622..f8fff2391d10b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py
@@ -13,6 +13,7 @@
from datahub.configuration.common import AllowDenyPattern, ConfigurationError
from datahub.ingestion.source.powerbi.config import Constant
from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import (
+ App,
Column,
Dashboard,
Measure,
@@ -143,6 +144,13 @@ def get_dataset_parameters(
def get_users(self, workspace_id: str, entity: str, entity_id: str) -> List[User]:
pass
+ @abstractmethod
+ def _get_app(
+ self,
+ app_id: str,
+ ) -> Optional[Dict]:
+ pass
+
def _get_authority_url(self):
return f"{DataResolverBase.AUTHORITY}{self.__tenant_id}"
@@ -411,6 +419,37 @@ def itr_pages(
page_number += 1
+ def get_app(
+ self,
+ app_id: str,
+ ) -> Optional[App]:
+
+ raw_app: Optional[Dict] = self._get_app(
+ app_id=app_id,
+ )
+
+ if raw_app is None:
+ return None
+
+ assert (
+ Constant.ID in raw_app
+ ), f"{Constant.ID} is required field not present in server response"
+
+ assert (
+ Constant.NAME in raw_app
+ ), f"{Constant.NAME} is required field not present in server response"
+
+ return App(
+ id=raw_app[Constant.ID],
+ name=raw_app[Constant.NAME],
+ description=raw_app.get(Constant.DESCRIPTION),
+ last_update=raw_app.get(Constant.LAST_UPDATE),
+ dashboards=[], # dashboards and reports of App are available in scan-result response
+ reports=[], # There is an App section in documentation https://learn.microsoft.com/en-us/rest/api/power-bi/dashboards/get-dashboards-in-group#code-try-0
+ # However the report API mentioned in that section is not returning the reports
+ # We will collect these details from the scan-result.
+ )
+
class RegularAPIResolver(DataResolverBase):
# Regular access endpoints
@@ -680,6 +719,15 @@ def profile_dataset(
table.column_count = column_count
+ def _get_app(
+ self,
+ app_id: str,
+ ) -> Optional[Dict]:
+ # [Date: 2024/10/18] As per API doc, the service principal approach is not supported for regular API
+ # https://learn.microsoft.com/en-us/rest/api/power-bi/apps/get-app
+
+ return None
+
class AdminAPIResolver(DataResolverBase):
# Admin access endpoints
@@ -993,3 +1041,22 @@ def profile_dataset(
) -> None:
logger.debug("Profile dataset is unsupported in Admin API")
return None
+
+ def _get_app(
+ self,
+ app_id: str,
+ ) -> Optional[Dict]:
+
+ app_endpoint = self.API_ENDPOINTS[Constant.GET_WORKSPACE_APP].format(
+ POWERBI_ADMIN_BASE_URL=DataResolverBase.ADMIN_BASE_URL,
+ APP_ID=app_id,
+ )
+ # Hit PowerBi
+ logger.debug(f"Request to app URL={app_endpoint}")
+
+ for page in self.itr_pages(endpoint=app_endpoint):
+ for app in page:
+ if Constant.ID in app and app_id == app[Constant.ID]:
+ return app
+
+ return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py
index 37793bc32980b..b67f257d9eb5b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py
@@ -13,6 +13,9 @@
from datahub.ingestion.source.powerbi.rest_api_wrapper import data_resolver
from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import (
FIELD_TYPE_MAPPING,
+ App,
+ AppDashboard,
+ AppReport,
Column,
Dashboard,
Measure,
@@ -264,6 +267,7 @@ def get_workspaces(self) -> List[Workspace]:
dashboard_endorsements={},
scan_result={},
independent_datasets=[],
+ app=None, # It will be populated in _fill_metadata_from_scan_result method
)
for workspace in groups
]
@@ -423,6 +427,87 @@ def _get_workspace_datasets(self, workspace: Workspace) -> dict:
dataset_instance.tables.append(table)
return dataset_map
+ def get_app(
+ self,
+ app_id: str,
+ ) -> Optional[App]:
+ return self.__admin_api_resolver.get_app(
+ app_id=app_id,
+ )
+
+ def _populate_app_details(
+ self, workspace: Workspace, workspace_metadata: Dict
+ ) -> None:
+ # App_id is not present at the root level of workspace_metadata.
+ # It can be found in the workspace_metadata.dashboards or workspace_metadata.reports lists.
+
+ # Workspace_metadata contains duplicate entries for all dashboards and reports that we have included
+ # in the app.
+ # The duplicate entries for a report contain key `originalReportObjectId` referencing to
+ # an actual report id of workspace. The duplicate entries for a dashboard contain `displayName` where
+ # displayName is generated from displayName of original dashboard with prefix "App"
+ app_id: Optional[str] = None
+ app_reports: List[AppReport] = []
+ # Filter app reports
+ for report in workspace_metadata.get(Constant.REPORTS) or []:
+ if report.get(Constant.APP_ID):
+ app_reports.append(
+ AppReport(
+ id=report[Constant.ID],
+ original_report_id=report[Constant.ORIGINAL_REPORT_OBJECT_ID],
+ )
+ )
+ if app_id is None: # In PowerBI one workspace can have one app
+ app_id = report.get(Constant.APP_ID)
+
+ raw_app_dashboards: List[Dict] = []
+ # Filter app dashboards
+ for dashboard in workspace_metadata.get(Constant.DASHBOARDS) or []:
+ if dashboard.get(Constant.APP_ID):
+ raw_app_dashboards.append(dashboard)
+ if app_id is None: # In PowerBI, one workspace contains one app
+ app_id = report[Constant.APP_ID]
+
+ # workspace doesn't have an App. Above two loops can be avoided
+ # if app_id is available at root level in workspace_metadata
+ if app_id is None:
+ logger.debug(f"Workspace {workspace.name} does not contain an app.")
+ return
+
+ app: Optional[App] = self.get_app(app_id=app_id)
+ if app is None:
+ self.__reporter.info(
+ title="App Not Found",
+ message="The workspace includes an app, but its metadata is missing from the API response.",
+ context=f"workspace_name={workspace.name}",
+ )
+ return
+
+ # Map to find out which dashboards belongs to the App
+ workspace_dashboard_map: Dict[str, Dict] = {
+ raw_dashboard[Constant.DISPLAY_NAME]: raw_dashboard
+ for raw_dashboard in raw_app_dashboards
+ }
+
+ app_dashboards: List[AppDashboard] = []
+ for dashboard in workspace_metadata.get(Constant.DASHBOARDS) or []:
+ app_dashboard_display_name = f"[App] {dashboard[Constant.DISPLAY_NAME]}" # A Dashboard is considered part of an App if the workspace_metadata contains a Dashboard with a label formatted as "[App]
".
+ if (
+ app_dashboard_display_name in workspace_dashboard_map
+ ): # This dashboard is part of the App
+ app_dashboards.append(
+ AppDashboard(
+ id=workspace_dashboard_map[app_dashboard_display_name][
+ Constant.ID
+ ],
+ original_dashboard_id=dashboard[Constant.ID],
+ )
+ )
+
+ app.reports = app_reports
+ app.dashboards = app_dashboards
+ workspace.app = app
+
def _fill_metadata_from_scan_result(
self,
workspaces: List[Workspace],
@@ -463,6 +548,7 @@ def _fill_metadata_from_scan_result(
dashboard_endorsements={},
scan_result={},
independent_datasets=[],
+ app=None, # It is getting set from scan-result
)
cur_workspace.scan_result = workspace_metadata
cur_workspace.datasets = self._get_workspace_datasets(cur_workspace)
@@ -482,6 +568,10 @@ def _fill_metadata_from_scan_result(
"false "
)
+ self._populate_app_details(
+ workspace=cur_workspace,
+ workspace_metadata=workspace_metadata,
+ )
workspaces.append(cur_workspace)
return workspaces
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
index bd987c2da7c76..11827bace4b5a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
@@ -462,8 +462,6 @@ def _create_table(
datetime.fromtimestamp(obj.updated_at / 1000, tz=timezone.utc)
if obj.updated_at
else None
- if obj.updated_at
- else None
),
updated_by=obj.updated_by,
table_id=obj.table_id,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
index a00a52ae54207..f4579376a3b3a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
@@ -12,8 +12,10 @@
class UnityCatalogUsagePerfReport(Report):
get_queries_timer: PerfTimer = field(default_factory=PerfTimer)
sql_parsing_timer: PerfTimer = field(default_factory=PerfTimer)
+ spark_sql_parsing_timer: PerfTimer = field(default_factory=PerfTimer)
aggregator_add_event_timer: PerfTimer = field(default_factory=PerfTimer)
gen_operation_timer: PerfTimer = field(default_factory=PerfTimer)
+ query_fingerprinting_timer: PerfTimer = field(default_factory=PerfTimer)
@dataclass
@@ -32,6 +34,7 @@ class UnityCatalogReport(IngestionStageReport, ProfilingSqlReport):
num_external_upstreams_unsupported: int = 0
num_queries: int = 0
+ num_unique_queries: int = 0
num_queries_dropped_parse_failure: int = 0
num_queries_missing_table: int = 0 # Can be due to pattern filter
num_queries_duplicate_table: int = 0
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
index 08482c9d2fa3b..8c42ac81b98cf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
@@ -22,6 +22,7 @@
from datahub.ingestion.source.unity.report import UnityCatalogReport
from datahub.ingestion.source.usage.usage_common import UsageAggregator
from datahub.metadata.schema_classes import OperationClass
+from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
logger = logging.getLogger(__name__)
@@ -76,6 +77,7 @@ def _get_workunits_internal(
self, table_refs: Set[TableReference]
) -> Iterable[MetadataWorkUnit]:
table_map = defaultdict(list)
+ query_hashes = set()
for ref in table_refs:
table_map[ref.table].append(ref)
table_map[f"{ref.schema}.{ref.table}"].append(ref)
@@ -85,6 +87,13 @@ def _get_workunits_internal(
for query in self._get_queries():
self.report.num_queries += 1
with current_timer.pause():
+ with self.report.usage_perf_report.query_fingerprinting_timer:
+ query_hashes.add(
+ get_query_fingerprint(
+ query.query_text, "databricks", fast=True
+ )
+ )
+ self.report.num_unique_queries = len(query_hashes)
table_info = self._parse_query(query, table_map)
if table_info is not None:
if self.config.include_operational_stats:
@@ -166,7 +175,8 @@ def _parse_query(
with self.report.usage_perf_report.sql_parsing_timer:
table_info = self._parse_query_via_lineage_runner(query.query_text)
if table_info is None and query.statement_type == QueryStatementType.SELECT:
- table_info = self._parse_query_via_spark_sql_plan(query.query_text)
+ with self.report.usage_perf_report.spark_sql_parsing_timer:
+ table_info = self._parse_query_via_spark_sql_plan(query.query_text)
if table_info is None:
self.report.num_queries_dropped_parse_failure += 1
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_ownership.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_ownership.py
index 54be2e5fac1e3..b107a62c905b4 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_ownership.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_ownership.py
@@ -13,9 +13,7 @@
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.graph.client import DataHubGraph
-from datahub.ingestion.transformer.dataset_transformer import (
- DatasetOwnershipTransformer,
-)
+from datahub.ingestion.transformer.dataset_transformer import OwnershipTransformer
from datahub.metadata.schema_classes import (
BrowsePathsV2Class,
MetadataChangeProposalClass,
@@ -37,7 +35,7 @@ class AddDatasetOwnershipConfig(TransformerSemanticsConfigModel):
is_container: bool = False
-class AddDatasetOwnership(DatasetOwnershipTransformer):
+class AddDatasetOwnership(OwnershipTransformer):
"""Transformer that adds owners to datasets according to a callback function."""
ctx: PipelineContext
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py
index 42dd54f4a584a..00b3a9ba59f92 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py
@@ -27,6 +27,22 @@ def entity_types(self) -> List[str]:
return ["dataset"]
+class OwnershipTransformer(
+ DatasetTransformer, SingleAspectTransformer, metaclass=ABCMeta
+):
+ def aspect_name(self) -> str:
+ return "ownership"
+
+ def entity_types(self) -> List[str]:
+ return [
+ "dataset",
+ "dataJob",
+ "dataFlow",
+ "chart",
+ "dashboard",
+ ]
+
+
class TagTransformer(BaseTransformer, SingleAspectTransformer, metaclass=ABCMeta):
"""Transformer that does transform sequentially on each tag."""
@@ -47,11 +63,6 @@ def entity_types(self) -> List[str]:
return ["container"]
-class DatasetOwnershipTransformer(DatasetTransformer, metaclass=ABCMeta):
- def aspect_name(self) -> str:
- return "ownership"
-
-
class DatasetDomainTransformer(DatasetTransformer, metaclass=ABCMeta):
def aspect_name(self) -> str:
return "domains"
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/pattern_cleanup_ownership.py b/metadata-ingestion/src/datahub/ingestion/transformer/pattern_cleanup_ownership.py
index 8ef61ab9679e6..f17546d6f7299 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/pattern_cleanup_ownership.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/pattern_cleanup_ownership.py
@@ -4,9 +4,7 @@
import datahub.emitter.mce_builder as builder
from datahub.configuration.common import ConfigModel
from datahub.ingestion.api.common import PipelineContext
-from datahub.ingestion.transformer.dataset_transformer import (
- DatasetOwnershipTransformer,
-)
+from datahub.ingestion.transformer.dataset_transformer import OwnershipTransformer
from datahub.metadata.schema_classes import (
OwnerClass,
OwnershipClass,
@@ -20,7 +18,7 @@ class PatternCleanUpOwnershipConfig(ConfigModel):
pattern_for_cleanup: List[str]
-class PatternCleanUpOwnership(DatasetOwnershipTransformer):
+class PatternCleanUpOwnership(OwnershipTransformer):
"""Transformer that clean the ownership URN."""
ctx: PipelineContext
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/remove_dataset_ownership.py b/metadata-ingestion/src/datahub/ingestion/transformer/remove_dataset_ownership.py
index f5d71a4340554..934e2a13d5631 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/remove_dataset_ownership.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/remove_dataset_ownership.py
@@ -3,9 +3,7 @@
from datahub.configuration.common import ConfigModel
from datahub.emitter.mce_builder import Aspect
from datahub.ingestion.api.common import PipelineContext
-from datahub.ingestion.transformer.dataset_transformer import (
- DatasetOwnershipTransformer,
-)
+from datahub.ingestion.transformer.dataset_transformer import OwnershipTransformer
from datahub.metadata.schema_classes import OwnershipClass
@@ -13,7 +11,7 @@ class ClearDatasetOwnershipConfig(ConfigModel):
pass
-class SimpleRemoveDatasetOwnership(DatasetOwnershipTransformer):
+class SimpleRemoveDatasetOwnership(OwnershipTransformer):
"""Transformer that clears all owners on each dataset."""
def __init__(self, config: ClearDatasetOwnershipConfig, ctx: PipelineContext):
diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
index 255d4c6e6bb74..5c3a6b5b533a0 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py
@@ -134,7 +134,7 @@ class QueryMetadata:
upstreams: List[UrnStr] # this is direct upstreams, which may be temp tables
column_lineage: List[ColumnLineageInfo]
- column_usage: Dict[UrnStr, Set[UrnStr]]
+ column_usage: Dict[UrnStr, Set[UrnStr]] # TODO: Change to an OrderedSet
confidence_score: float
used_temp_tables: bool = True
@@ -1426,7 +1426,7 @@ def _gen_query(
for upstream in query.upstreams:
query_subject_urns.add(upstream)
if self.generate_query_subject_fields:
- for column in query.column_usage.get(upstream, []):
+ for column in sorted(query.column_usage.get(upstream, [])):
query_subject_urns.add(
builder.make_schema_field_urn(upstream, column)
)
diff --git a/metadata-ingestion/src/datahub/upgrade/upgrade.py b/metadata-ingestion/src/datahub/upgrade/upgrade.py
index d940dfd78a82e..dd2829ba0d236 100644
--- a/metadata-ingestion/src/datahub/upgrade/upgrade.py
+++ b/metadata-ingestion/src/datahub/upgrade/upgrade.py
@@ -1,15 +1,14 @@
import asyncio
import contextlib
import logging
-import sys
from datetime import datetime, timedelta, timezone
from functools import wraps
from typing import Any, Callable, Optional, Tuple, TypeVar
+import click
import humanfriendly
from packaging.version import Version
from pydantic import BaseModel
-from termcolor import colored
from datahub import __version__
from datahub.cli.config_utils import load_client_config
@@ -277,8 +276,8 @@ def maybe_print_upgrade_message( # noqa: C901
if not version_stats:
log.debug("No version stats found")
return
- else:
- log.debug(f"Version stats found: {version_stats}")
+
+ log.debug(f"Version stats found: {version_stats}")
current_release_date = version_stats.client.current.release_date
latest_release_date = (
version_stats.client.latest.release_date
@@ -325,50 +324,54 @@ def maybe_print_upgrade_message( # noqa: C901
if client_server_compat < 0:
with contextlib.suppress(Exception):
assert version_stats
- print(
- colored("❗Client-Server Incompatible❗", "yellow"),
- colored(
+ click.echo(
+ click.style("❗Client-Server Incompatible❗", fg="yellow")
+ + " "
+ + click.style(
f"Your client version {version_stats.client.current.version} is newer than your server version {version_stats.server.current.version}. Downgrading the cli to {version_stats.server.current.version} is recommended.\n",
- "cyan",
- ),
- colored(
+ fg="cyan",
+ )
+ + click.style(
f"➡️ Downgrade via `\"pip install 'acryl-datahub=={version_stats.server.current.version}'\"",
- "cyan",
- ),
+ fg="cyan",
+ )
)
elif client_server_compat > 0:
with contextlib.suppress(Exception):
assert version_stats
- print(
- colored("❗Client-Server Incompatible❗", "red"),
- colored(
+ click.echo(
+ click.style("❗Client-Server Incompatible❗", fg="red")
+ + " "
+ + click.style(
f"Your client version {version_stats.client.current.version} is older than your server version {version_stats.server.current.version}. Upgrading the cli to {version_stats.server.current.version} is recommended.\n",
- "cyan",
- ),
- colored(
+ fg="cyan",
+ )
+ + click.style(
f"➡️ Upgrade via \"pip install 'acryl-datahub=={version_stats.server.current.version}'\"",
- "cyan",
- ),
+ fg="cyan",
+ )
)
elif client_server_compat == 0 and encourage_cli_upgrade:
with contextlib.suppress(Exception):
- print(
- colored("💡 Upgrade cli!", "yellow"),
- colored(
+ click.echo(
+ click.style("💡 Upgrade cli!", fg="yellow")
+ + " "
+ + click.style(
f"You seem to be running an old version of datahub cli: {current_version} {get_days(current_release_date)}. Latest version is {latest_version} {get_days(latest_release_date)}.\nUpgrade via \"pip install -U 'acryl-datahub'\"",
- "cyan",
- ),
+ fg="cyan",
+ )
)
elif encourage_quickstart_upgrade:
try:
assert version_stats
- print(
- colored("💡 Upgrade available!", "yellow"),
- colored(
+ click.echo(
+ click.style("💡 Upgrade available!", fg="yellow")
+ + " "
+ + click.style(
f'You seem to be running a slightly old quickstart image {get_days(version_stats.server.current.release_date)}. Run "datahub docker quickstart" to get the latest updates without losing any data!',
- "cyan",
+ fg="cyan",
),
- file=sys.stderr,
+ err=True,
)
except Exception as e:
log.debug(f"Failed to suggest quickstart upgrade due to {e}")
diff --git a/metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_empty_connection_user_golden.json b/metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_empty_connection_user_golden.json
index 29b186978a76a..0f8f4cc64e7ca 100644
--- a/metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_empty_connection_user_golden.json
+++ b/metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_empty_connection_user_golden.json
@@ -17,6 +17,22 @@
"lastRunId": "no-run-id-provided"
}
},
+{
+ "entityType": "dataFlow",
+ "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
{
"entityType": "dataFlow",
"entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
@@ -62,9 +78,17 @@
"aspect": {
"json": {
"customProperties": {
+ "connector_id": "calendar_elected",
+ "connector_type": "postgres",
"paused": "False",
"sync_frequency": "1440",
- "destination_id": "'interval_unconstitutional'"
+ "destination_id": "interval_unconstitutional",
+ "source.platform": "postgres",
+ "source.env": "DEV",
+ "source.database": "postgres_db",
+ "destination.platform": "snowflake",
+ "destination.env": "PROD",
+ "destination.database": "test_database"
},
"name": "postgres",
"type": {
@@ -79,6 +103,22 @@
"lastRunId": "no-run-id-provided"
}
},
+{
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
{
"entityType": "dataJob",
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
@@ -150,13 +190,18 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "ownership",
"aspect": {
"json": {
- "removed": false
+ "owners": [],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:fivetran"
+ }
}
},
"systemMetadata": {
@@ -166,13 +211,13 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)",
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "globalTags",
"aspect": {
"json": {
- "removed": false
+ "tags": []
}
},
"systemMetadata": {
@@ -182,18 +227,13 @@
}
},
{
- "entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
"changeType": "UPSERT",
- "aspectName": "ownership",
+ "aspectName": "status",
"aspect": {
"json": {
- "owners": [],
- "ownerTypes": {},
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:fivetran"
- }
+ "removed": false
}
},
"systemMetadata": {
@@ -203,13 +243,13 @@
}
},
{
- "entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)",
"changeType": "UPSERT",
- "aspectName": "globalTags",
+ "aspectName": "status",
"aspect": {
"json": {
- "tags": []
+ "removed": false
}
},
"systemMetadata": {
@@ -304,8 +344,8 @@
"json": {
"timestampMillis": 1695191853000,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED"
}
@@ -325,8 +365,8 @@
"json": {
"timestampMillis": 1695191885000,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -427,8 +467,8 @@
"json": {
"timestampMillis": 1696343730000,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED"
}
@@ -448,8 +488,8 @@
"json": {
"timestampMillis": 1696343732000,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -550,8 +590,8 @@
"json": {
"timestampMillis": 1696343755000,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED"
}
@@ -571,8 +611,8 @@
"json": {
"timestampMillis": 1696343790000,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -587,38 +627,6 @@
"lastRunId": "no-run-id-provided"
}
},
-{
- "entityType": "dataFlow",
- "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
- "changeType": "UPSERT",
- "aspectName": "status",
- "aspect": {
- "json": {
- "removed": false
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "powerbi-test",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
- "changeType": "UPSERT",
- "aspectName": "status",
- "aspect": {
- "json": {
- "removed": false
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "powerbi-test",
- "lastRunId": "no-run-id-provided"
- }
-},
{
"entityType": "dataProcessInstance",
"entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
diff --git a/metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_golden.json b/metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_golden.json
index 0cd3bb83f90f5..22933f3483e76 100644
--- a/metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_golden.json
+++ b/metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_golden.json
@@ -17,6 +17,22 @@
"lastRunId": "no-run-id-provided"
}
},
+{
+ "entityType": "dataFlow",
+ "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
{
"entityType": "dataFlow",
"entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
@@ -62,9 +78,17 @@
"aspect": {
"json": {
"customProperties": {
+ "connector_id": "calendar_elected",
+ "connector_type": "postgres",
"paused": "False",
"sync_frequency": "1440",
- "destination_id": "'interval_unconstitutional'"
+ "destination_id": "interval_unconstitutional",
+ "source.platform": "postgres",
+ "source.env": "DEV",
+ "source.database": "postgres_db",
+ "destination.platform": "snowflake",
+ "destination.env": "PROD",
+ "destination.database": "test_database"
},
"name": "postgres",
"type": {
@@ -79,6 +103,22 @@
"lastRunId": "no-run-id-provided"
}
},
+{
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
{
"entityType": "dataJob",
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
@@ -150,13 +190,26 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "ownership",
"aspect": {
"json": {
- "removed": false
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:abc.xyz@email.com",
+ "type": "DEVELOPER",
+ "source": {
+ "type": "SERVICE"
+ }
+ }
+ ],
+ "ownerTypes": {},
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:fivetran"
+ }
}
},
"systemMetadata": {
@@ -166,13 +219,13 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)",
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "globalTags",
"aspect": {
"json": {
- "removed": false
+ "tags": []
}
},
"systemMetadata": {
@@ -182,26 +235,13 @@
}
},
{
- "entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
"changeType": "UPSERT",
- "aspectName": "ownership",
+ "aspectName": "status",
"aspect": {
"json": {
- "owners": [
- {
- "owner": "urn:li:corpuser:abc.xyz@email.com",
- "type": "DEVELOPER",
- "source": {
- "type": "SERVICE"
- }
- }
- ],
- "ownerTypes": {},
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:fivetran"
- }
+ "removed": false
}
},
"systemMetadata": {
@@ -211,13 +251,13 @@
}
},
{
- "entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)",
"changeType": "UPSERT",
- "aspectName": "globalTags",
+ "aspectName": "status",
"aspect": {
"json": {
- "tags": []
+ "removed": false
}
},
"systemMetadata": {
@@ -312,8 +352,8 @@
"json": {
"timestampMillis": 1695191853000,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED"
}
@@ -333,8 +373,8 @@
"json": {
"timestampMillis": 1695191885000,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -435,8 +475,8 @@
"json": {
"timestampMillis": 1696343730000,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED"
}
@@ -456,8 +496,8 @@
"json": {
"timestampMillis": 1696343732000,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -558,8 +598,8 @@
"json": {
"timestampMillis": 1696343755000,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "STARTED"
}
@@ -579,8 +619,8 @@
"json": {
"timestampMillis": 1696343790000,
"partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
},
"status": "COMPLETE",
"result": {
@@ -595,38 +635,6 @@
"lastRunId": "no-run-id-provided"
}
},
-{
- "entityType": "dataFlow",
- "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
- "changeType": "UPSERT",
- "aspectName": "status",
- "aspect": {
- "json": {
- "removed": false
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "powerbi-test",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
- "changeType": "UPSERT",
- "aspectName": "status",
- "aspect": {
- "json": {
- "removed": false
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "powerbi-test",
- "lastRunId": "no-run-id-provided"
- }
-},
{
"entityType": "dataProcessInstance",
"entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_app_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_app_ingest.json
new file mode 100644
index 0000000000000..5988b14977552
--- /dev/null
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_app_ingest.json
@@ -0,0 +1,242 @@
+[
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,apps.2A4D0E82-E7A4-45B1-BD72-2A2CF82C9CB6)",
+ "changeType": "UPSERT",
+ "aspectName": "dashboardInfo",
+ "aspect": {
+ "json": {
+ "customProperties": {},
+ "title": "Finance",
+ "description": "The finance app",
+ "charts": [],
+ "datasets": [],
+ "dashboards": [
+ {
+ "destinationUrn": "urn:li:dashboard:(powerbi,dashboards.744B07E3-FAA7-4BD7-BD17-3220BF0F6301)"
+ },
+ {
+ "destinationUrn": "urn:li:dashboard:(powerbi,reports.455AB99B-E110-46E6-90D3-F015CABD1156)"
+ }
+ ],
+ "lastModified": {
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 1547372813,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,apps.2A4D0E82-E7A4-45B1-BD72-2A2CF82C9CB6)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePaths",
+ "aspect": {
+ "json": {
+ "paths": [
+ "/powerbi/Workspace For App Testing"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,apps.2A4D0E82-E7A4-45B1-BD72-2A2CF82C9CB6)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,apps.2A4D0E82-E7A4-45B1-BD72-2A2CF82C9CB6)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "App"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,apps.2A4D0E82-E7A4-45B1-BD72-2A2CF82C9CB6)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "Workspace For App Testing"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,dashboards.A700E2C1-D008-42DF-AFCA-A70A87D0B2A3)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePaths",
+ "aspect": {
+ "json": {
+ "paths": [
+ "/powerbi/Workspace For App Testing"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,dashboards.A700E2C1-D008-42DF-AFCA-A70A87D0B2A3)",
+ "changeType": "PATCH",
+ "aspectName": "dashboardInfo",
+ "aspect": {
+ "json": [
+ {
+ "op": "add",
+ "path": "/customProperties/chartCount",
+ "value": "0"
+ },
+ {
+ "op": "add",
+ "path": "/customProperties/workspaceName",
+ "value": "Workspace For App Testing"
+ },
+ {
+ "op": "add",
+ "path": "/customProperties/workspaceId",
+ "value": "8F756DE6-26AD-45FF-A201-44276FF1F561"
+ },
+ {
+ "op": "add",
+ "path": "/title",
+ "value": "test_dashboard"
+ },
+ {
+ "op": "add",
+ "path": "/description",
+ "value": "Description of test dashboard"
+ },
+ {
+ "op": "add",
+ "path": "/dashboardUrl",
+ "value": "https://localhost/dashboards/web/1"
+ },
+ {
+ "op": "add",
+ "path": "/lastModified",
+ "value": {
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ }
+ ]
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,dashboards.A700E2C1-D008-42DF-AFCA-A70A87D0B2A3)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,dashboards.A700E2C1-D008-42DF-AFCA-A70A87D0B2A3)",
+ "changeType": "UPSERT",
+ "aspectName": "dashboardKey",
+ "aspect": {
+ "json": {
+ "dashboardTool": "powerbi",
+ "dashboardId": "powerbi.linkedin.com/dashboards/A700E2C1-D008-42DF-AFCA-A70A87D0B2A3"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,dashboards.A700E2C1-D008-42DF-AFCA-A70A87D0B2A3)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "Workspace For App Testing"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/mock_data/workspace_with_app_mock_response.json b/metadata-ingestion/tests/integration/powerbi/mock_data/workspace_with_app_mock_response.json
new file mode 100644
index 0000000000000..712982741ccaa
--- /dev/null
+++ b/metadata-ingestion/tests/integration/powerbi/mock_data/workspace_with_app_mock_response.json
@@ -0,0 +1,149 @@
+{
+ "https://api.powerbi.com/v1.0/myorg/groups?%24skip=0&%24top=1000": {
+ "method": "GET",
+ "status_code": 200,
+ "json": {
+ "value": [
+ {
+ "id": "8F756DE6-26AD-45FF-A201-44276FF1F561",
+ "isReadOnly": true,
+ "name": "Workspace For App Testing",
+ "type": "Workspace",
+ "state": "Active"
+ }
+ ]
+ }
+ },
+ "https://api.powerbi.com/v1.0/myorg/groups?%24skip=1000&%24top=1000": {
+ "method": "GET",
+ "status_code": 200,
+ "json": {
+ "value": []
+ }
+ },
+ "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanResult/6147FCEB-7531-4449-8FB6-1F7A5431BF2D": {
+ "method": "GET",
+ "status_code": 200,
+ "json": {
+ "workspaces": [
+ {
+ "id": "8F756DE6-26AD-45FF-A201-44276FF1F561",
+ "name": "Workspace For App Testing",
+ "type": "Workspace",
+ "state": "Active",
+ "reports": [
+ {
+ "reportType": "PowerBIReport",
+ "id": "455AB99B-E110-46E6-90D3-F015CABD1156",
+ "name": "GitHub Progress",
+ "datasetId": "2F99BE64-673D-4DA8-BF4F-02629A1F2C8F",
+ "createdDateTime": "2024-10-01T06:27:00.51",
+ "modifiedDateTime": "2024-10-01T06:27:00.51",
+ "modifiedBy": "abc@fake.com",
+ "createdBy": "abc@fake.com",
+ "modifiedById": "97ABB057-CB8B-480D-AA1C-B3E7F0A16EC5",
+ "createdById": "97ABB057-CB8B-480D-AA1C-B3E7F0A16EC5",
+ "datasetWorkspaceId": "8F756DE6-26AD-45FF-A201-44276FF1F561",
+ "users": [
+ {
+ "reportUserAccessRight": "Owner",
+ "emailAddress": "abc@fake.com",
+ "displayName": "John Smith",
+ "identifier": "abc@fake.com",
+ "graphId": "97ABB057-CB8B-480D-AA1C-B3E7F0A16EC5",
+ "principalType": "User",
+ "userType": "Member"
+ }
+ ]
+ },
+ {
+ "reportType": "PowerBIReport",
+ "id": "ecc35189-e67a-4d8a-9037-403317bd5808",
+ "name": "[App] GitHub Progress",
+ "datasetId": "2F99BE64-673D-4DA8-BF4F-02629A1F2C8F",
+ "appId": "2A4D0E82-E7A4-45B1-BD72-2A2CF82C9CB6",
+ "createdDateTime": "2024-10-01T06:29:34.75",
+ "modifiedDateTime": "2024-10-01T07:22:55.397",
+ "originalReportObjectId": "455AB99B-E110-46E6-90D3-F015CABD1156",
+ "modifiedBy": "abc@fake.com",
+ "createdBy": "abc@fake.com",
+ "modifiedById": "97ABB057-CB8B-480D-AA1C-B3E7F0A16EC5",
+ "createdById": "97ABB057-CB8B-480D-AA1C-B3E7F0A16EC5",
+ "datasetWorkspaceId": "8F756DE6-26AD-45FF-A201-44276FF1F561"
+ }
+ ],
+ "dashboards": [
+ {
+ "id": "744B07E3-FAA7-4BD7-BD17-3220BF0F6301",
+ "displayName": "Pet Overview",
+ "isReadOnly": false,
+ "tiles": [
+ ]
+ },
+ {
+ "id": "DB85B962-74BA-4821-900B-59AC5F70AADE",
+ "appId": "2A4D0E82-E7A4-45B1-BD72-2A2CF82C9CB6",
+ "displayName": "[App] Pet Overview",
+ "isReadOnly": false,
+ "tiles": [
+ ]
+ }
+ ],
+ "datasets": []
+ }
+ ]
+ }
+ },
+ "https://api.powerbi.com/v1.0/myorg/groups/8F756DE6-26AD-45FF-A201-44276FF1F561/dashboards": {
+ "method": "GET",
+ "status_code": 200,
+ "json": {
+ "value": [
+ {
+ "id": "A700E2C1-D008-42DF-AFCA-A70A87D0B2A3",
+ "isReadOnly": true,
+ "displayName": "test_dashboard",
+ "description": "Description of test dashboard",
+ "embedUrl": "https://localhost/dashboards/embed/1",
+ "webUrl": "https://localhost/dashboards/web/1"
+ }
+ ]
+ }
+ },
+ "https://api.powerbi.com/v1.0/myorg/admin/workspaces/scanStatus/6147FCEB-7531-4449-8FB6-1F7A5431BF2D": {
+ "method": "GET",
+ "status_code": 200,
+ "json": {
+ "status": "SUCCEEDED"
+ }
+ },
+ "https://api.powerbi.com/v1.0/myorg/groups/8F756DE6-26AD-45FF-A201-44276FF1F561/dashboards/A700E2C1-D008-42DF-AFCA-A70A87D0B2A3/tiles": {
+ "method": "GET",
+ "status_code": 200,
+ "json": {
+ "value": []
+ }
+ },
+ "https://api.powerbi.com/v1.0/myorg/admin/apps?%24skip=0&%24top=1000": {
+ "method": "GET",
+ "status_code": 200,
+ "json": {
+ "value": [
+ {
+ "id": "2A4D0E82-E7A4-45B1-BD72-2A2CF82C9CB6",
+ "description": "The finance app",
+ "name": "Finance",
+ "publishedBy": "Bill",
+ "lastUpdate": "2024-09-26T04:20:34.513Z"
+ }
+ ]
+ }
+ },
+ "https://api.powerbi.com/v1.0/myorg/admin/apps?%24skip=1000&%24top=1000": {
+ "method": "GET",
+ "status_code": 200,
+ "json": {
+ "value": []
+ }
+ }
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 78cf103107477..0f360d44c38cb 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -80,6 +80,9 @@ def scan_init_response(request, context):
"C5DA6EA8-625E-4AB1-90B6-CAEA0BF9F492": {
"id": "81B02907-E2A3-45C3-B505-3781839C8CAA",
},
+ "8F756DE6-26AD-45FF-A201-44276FF1F561": {
+ "id": "6147FCEB-7531-4449-8FB6-1F7A5431BF2D",
+ },
}
return w_id_vs_response[workspace_id]
@@ -1013,6 +1016,7 @@ def validate_pipeline(pipeline: Pipeline) -> None:
dashboard_endorsements={},
scan_result={},
independent_datasets=[],
+ app=None,
)
# Fetch actual reports
reports: List[Report] = cast(
@@ -1490,3 +1494,122 @@ def test_powerbi_cross_workspace_reference_info_message(
output_path=f"{tmp_path}/powerbi_mces.json",
golden_path=f"{test_resources_dir}/{golden_file}",
)
+
+
+def common_app_ingest(
+ pytestconfig: pytest.Config,
+ requests_mock: Any,
+ output_mcp_path: str,
+ override_config: dict = {},
+) -> Pipeline:
+ enable_logging()
+
+ register_mock_api(
+ pytestconfig=pytestconfig,
+ request_mock=requests_mock,
+ override_data=read_mock_data(
+ path=pytestconfig.rootpath
+ / "tests/integration/powerbi/mock_data/workspace_with_app_mock_response.json"
+ ),
+ )
+
+ config = default_source_config()
+
+ del config["workspace_id"]
+
+ config["workspace_id_pattern"] = {
+ "allow": [
+ "8F756DE6-26AD-45FF-A201-44276FF1F561",
+ ]
+ }
+
+ config.update(override_config)
+
+ pipeline = Pipeline.create(
+ {
+ "run_id": "powerbi-test",
+ "source": {
+ "type": "powerbi",
+ "config": {
+ **config,
+ },
+ },
+ "sink": {
+ "type": "file",
+ "config": {
+ "filename": output_mcp_path,
+ },
+ },
+ }
+ )
+
+ pipeline.run()
+ pipeline.raise_from_status()
+
+ return pipeline
+
+
+@freeze_time(FROZEN_TIME)
+@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
+@pytest.mark.integration
+def test_powerbi_app_ingest(
+ mock_msal: MagicMock,
+ pytestconfig: pytest.Config,
+ tmp_path: str,
+ mock_time: datetime.datetime,
+ requests_mock: Any,
+) -> None:
+
+ common_app_ingest(
+ pytestconfig=pytestconfig,
+ requests_mock=requests_mock,
+ output_mcp_path=f"{tmp_path}/powerbi_mces.json",
+ override_config={
+ "extract_app": True,
+ },
+ )
+
+ golden_file = "golden_test_app_ingest.json"
+
+ test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
+
+ mce_helpers.check_golden_file(
+ pytestconfig,
+ output_path=f"{tmp_path}/powerbi_mces.json",
+ golden_path=f"{test_resources_dir}/{golden_file}",
+ )
+
+
+@freeze_time(FROZEN_TIME)
+@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
+@pytest.mark.integration
+def test_powerbi_app_ingest_info_message(
+ mock_msal: MagicMock,
+ pytestconfig: pytest.Config,
+ tmp_path: str,
+ mock_time: datetime.datetime,
+ requests_mock: Any,
+) -> None:
+
+ pipeline = common_app_ingest(
+ pytestconfig=pytestconfig,
+ requests_mock=requests_mock,
+ output_mcp_path=f"{tmp_path}/powerbi_mces.json",
+ )
+
+ assert isinstance(pipeline.source, PowerBiDashboardSource) # to silent the lint
+
+ info_entries: dict = pipeline.source.reporter._structured_logs._entries.get(
+ StructuredLogLevel.INFO, {}
+ ) # type :ignore
+
+ is_entry_present: bool = False
+ # Printing INFO entries
+ for key, entry in info_entries.items():
+ if entry.title == "App Ingestion Is Disabled":
+ is_entry_present = True
+ break
+
+ assert (
+ is_entry_present
+ ), "The extract_app flag should be set to false by default. We need to keep this flag as false until all GMS instances are updated to the latest release."
diff --git a/metadata-ingestion/tests/test_helpers/mce_helpers.py b/metadata-ingestion/tests/test_helpers/mce_helpers.py
index 9ee4642bfe6eb..3b59481d8cb02 100644
--- a/metadata-ingestion/tests/test_helpers/mce_helpers.py
+++ b/metadata-ingestion/tests/test_helpers/mce_helpers.py
@@ -82,6 +82,7 @@ def check_golden_file(
golden_path: Union[str, os.PathLike],
ignore_paths: Sequence[str] = (),
ignore_paths_v2: Sequence[str] = (),
+ ignore_order: bool = True,
) -> None:
update_golden = pytestconfig.getoption("--update-golden-files")
copy_output = pytestconfig.getoption("--copy-output-files")
@@ -92,6 +93,7 @@ def check_golden_file(
copy_output=copy_output,
ignore_paths=ignore_paths,
ignore_paths_v2=ignore_paths_v2,
+ ignore_order=ignore_order,
)
@@ -100,6 +102,7 @@ def check_goldens_stream(
outputs: List,
golden_path: Union[str, os.PathLike],
ignore_paths: Sequence[str] = (),
+ ignore_order: bool = True,
) -> None:
with tempfile.NamedTemporaryFile() as f:
write_metadata_file(pathlib.Path(f.name), outputs)
@@ -109,6 +112,7 @@ def check_goldens_stream(
output_path=f.name,
golden_path=golden_path,
ignore_paths=ignore_paths,
+ ignore_order=ignore_order,
)
diff --git a/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py b/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py
index 7456f2fd1d91c..415977b0f8467 100644
--- a/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py
+++ b/metadata-ingestion/tests/unit/bigquery/test_bigquery_lineage.py
@@ -1,19 +1,25 @@
import datetime
-from typing import Dict, List, Set
+from typing import Dict, List, Optional, Set
import pytest
+import datahub.metadata.schema_classes as models
+from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.source.bigquery_v2.bigquery_audit import (
BigQueryTableRef,
QueryEvent,
)
-from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
+from datahub.ingestion.source.bigquery_v2.bigquery_config import (
+ BigQueryV2Config,
+ GcsLineageProviderConfig,
+)
from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
from datahub.ingestion.source.bigquery_v2.common import BigQueryIdentifierBuilder
from datahub.ingestion.source.bigquery_v2.lineage import (
BigqueryLineageExtractor,
LineageEdge,
)
+from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
from datahub.sql_parsing.schema_resolver import SchemaResolver
@@ -135,3 +141,220 @@ def test_column_level_lineage(lineage_entries: List[QueryEvent]) -> None:
upstream_lineage.fineGrainedLineages
and len(upstream_lineage.fineGrainedLineages) == 2
)
+
+
+def test_lineage_for_external_bq_table(mock_datahub_graph_instance):
+
+ pipeline_context = PipelineContext(run_id="bq_gcs_lineage")
+ pipeline_context.graph = mock_datahub_graph_instance
+
+ def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass:
+ return models.SchemaMetadataClass(
+ schemaName="sample_schema",
+ platform="urn:li:dataPlatform:gcs", # important <- platform must be an urn
+ version=0,
+ hash="",
+ platformSchema=models.OtherSchemaClass(
+ rawSchema="__insert raw schema here__"
+ ),
+ fields=[
+ models.SchemaFieldClass(
+ fieldPath="age",
+ type=models.SchemaFieldDataTypeClass(type=models.NumberTypeClass()),
+ nativeDataType="int",
+ ),
+ models.SchemaFieldClass(
+ fieldPath="firstname",
+ type=models.SchemaFieldDataTypeClass(type=models.StringTypeClass()),
+ nativeDataType="VARCHAR(100)",
+ ),
+ models.SchemaFieldClass(
+ fieldPath="lastname",
+ type=models.SchemaFieldDataTypeClass(type=models.StringTypeClass()),
+ nativeDataType="VARCHAR(100)",
+ ),
+ ],
+ )
+
+ pipeline_context.graph.get_schema_metadata = fake_schema_metadata # type: ignore
+ path_specs: List[PathSpec] = [
+ PathSpec(include="gs://bigquery_data/{table}/*.parquet"),
+ PathSpec(include="gs://bigquery_data/customer3/{table}/*.parquet"),
+ ]
+ gcs_lineage_config: GcsLineageProviderConfig = GcsLineageProviderConfig(
+ path_specs=path_specs
+ )
+
+ config = BigQueryV2Config(
+ include_table_lineage=True,
+ include_column_lineage_with_gcs=True,
+ gcs_lineage_config=gcs_lineage_config,
+ )
+
+ report = BigQueryV2Report()
+ extractor: BigqueryLineageExtractor = BigqueryLineageExtractor(
+ config,
+ report,
+ schema_resolver=SchemaResolver(platform="bigquery"),
+ identifiers=BigQueryIdentifierBuilder(config, report),
+ )
+
+ upstream_lineage = extractor.get_lineage_for_external_table(
+ dataset_urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,my_project.my_dataset.my_table,PROD)",
+ source_uris=[
+ "gs://bigquery_data/customer1/*.parquet",
+ "gs://bigquery_data/customer2/*.parquet",
+ "gs://bigquery_data/customer3/my_table/*.parquet",
+ ],
+ graph=pipeline_context.graph,
+ )
+
+ expected_schema_field_urns = [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:gcs,bigquery_data/customer1,PROD),age)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:gcs,bigquery_data/customer1,PROD),firstname)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:gcs,bigquery_data/customer1,PROD),lastname)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:gcs,bigquery_data/customer2,PROD),age)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:gcs,bigquery_data/customer2,PROD),firstname)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:gcs,bigquery_data/customer2,PROD),lastname)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:gcs,bigquery_data/customer3/my_table,PROD),age)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:gcs,bigquery_data/customer3/my_table,PROD),firstname)",
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:gcs,bigquery_data/customer3/my_table,PROD),lastname)",
+ ]
+ assert upstream_lineage
+ assert len(upstream_lineage.upstreams) == 3
+ assert (
+ upstream_lineage.fineGrainedLineages
+ and len(upstream_lineage.fineGrainedLineages) == 9
+ )
+ # Extracting column URNs from upstream_lineage.upstreams
+ actual_schema_field_urns = [
+ fine_grained_lineage.upstreams[0]
+ if fine_grained_lineage.upstreams is not None
+ else []
+ for fine_grained_lineage in upstream_lineage.fineGrainedLineages
+ ]
+ assert all(
+ urn in expected_schema_field_urns for urn in actual_schema_field_urns
+ ), "Some expected column URNs are missing from fine grained lineage."
+
+
+def test_lineage_for_external_bq_table_no_column_lineage(mock_datahub_graph_instance):
+
+ pipeline_context = PipelineContext(run_id="bq_gcs_lineage")
+ pipeline_context.graph = mock_datahub_graph_instance
+
+ def fake_schema_metadata(entity_urn: str) -> Optional[models.SchemaMetadataClass]:
+ return None
+
+ pipeline_context.graph.get_schema_metadata = fake_schema_metadata # type: ignore
+ path_specs: List[PathSpec] = [
+ PathSpec(include="gs://bigquery_data/{table}/*.parquet"),
+ PathSpec(include="gs://bigquery_data/customer3/{table}/*.parquet"),
+ ]
+ gcs_lineage_config: GcsLineageProviderConfig = GcsLineageProviderConfig(
+ path_specs=path_specs
+ )
+
+ config = BigQueryV2Config(
+ include_table_lineage=True,
+ include_column_lineage_with_gcs=True,
+ gcs_lineage_config=gcs_lineage_config,
+ )
+
+ report = BigQueryV2Report()
+ extractor: BigqueryLineageExtractor = BigqueryLineageExtractor(
+ config,
+ report,
+ schema_resolver=SchemaResolver(platform="bigquery"),
+ identifiers=BigQueryIdentifierBuilder(config, report),
+ )
+
+ upstream_lineage = extractor.get_lineage_for_external_table(
+ dataset_urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,my_project.my_dataset.my_table,PROD)",
+ source_uris=[
+ "gs://bigquery_data/customer1/*.parquet",
+ "gs://bigquery_data/customer2/*.parquet",
+ "gs://bigquery_data/customer3/my_table/*.parquet",
+ ],
+ graph=pipeline_context.graph,
+ )
+
+ expected_dataset_urns = [
+ "urn:li:dataset:(urn:li:dataPlatform:gcs,bigquery_data/customer1,PROD)",
+ "urn:li:dataset:(urn:li:dataPlatform:gcs,bigquery_data/customer2,PROD)",
+ "urn:li:dataset:(urn:li:dataPlatform:gcs,bigquery_data/customer3/my_table,PROD)",
+ ]
+ assert upstream_lineage
+ assert len(upstream_lineage.upstreams) == 3
+ # Extracting dataset URNs from upstream_lineage.upstreams
+ actual_dataset_urns = [upstream.dataset for upstream in upstream_lineage.upstreams]
+ assert all(
+ urn in actual_dataset_urns for urn in expected_dataset_urns
+ ), "Some expected dataset URNs are missing from upstream lineage."
+ assert upstream_lineage.fineGrainedLineages is None
+
+
+def test_lineage_for_external_table_with_non_gcs_uri(mock_datahub_graph_instance):
+ pipeline_context = PipelineContext(run_id="non_gcs_lineage")
+ pipeline_context.graph = mock_datahub_graph_instance
+
+ config = BigQueryV2Config(
+ include_table_lineage=True,
+ include_column_lineage_with_gcs=False, # Column lineage disabled for simplicity
+ )
+ report = BigQueryV2Report()
+ extractor: BigqueryLineageExtractor = BigqueryLineageExtractor(
+ config,
+ report,
+ schema_resolver=SchemaResolver(platform="bigquery"),
+ identifiers=BigQueryIdentifierBuilder(config, report),
+ )
+
+ upstream_lineage = extractor.get_lineage_for_external_table(
+ dataset_urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,my_project.my_dataset.my_table,PROD)",
+ source_uris=[
+ "https://some_non_gcs_path/customer1/file.csv",
+ "https://another_path/file.txt",
+ ],
+ graph=pipeline_context.graph,
+ )
+
+ assert upstream_lineage is None
+
+
+def test_lineage_for_external_table_path_not_matching_specs(
+ mock_datahub_graph_instance,
+):
+ pipeline_context = PipelineContext(run_id="path_not_matching_lineage")
+ pipeline_context.graph = mock_datahub_graph_instance
+
+ path_specs: List[PathSpec] = [
+ PathSpec(include="gs://different_data/db2/db3/{table}/*.parquet"),
+ ]
+ gcs_lineage_config: GcsLineageProviderConfig = GcsLineageProviderConfig(
+ path_specs=path_specs, ignore_non_path_spec_path=True
+ )
+ config = BigQueryV2Config(
+ include_table_lineage=True,
+ include_column_lineage_with_gcs=False,
+ gcs_lineage_config=gcs_lineage_config,
+ )
+
+ report = BigQueryV2Report()
+ extractor: BigqueryLineageExtractor = BigqueryLineageExtractor(
+ config,
+ report,
+ schema_resolver=SchemaResolver(platform="bigquery"),
+ identifiers=BigQueryIdentifierBuilder(config, report),
+ )
+
+ upstream_lineage = extractor.get_lineage_for_external_table(
+ dataset_urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,my_project.my_dataset.my_table,PROD)",
+ source_uris=[
+ "gs://bigquery_data/customer1/*.parquet",
+ "gs://bigquery_data/customer2/*.parquet",
+ ],
+ graph=pipeline_context.graph,
+ )
+
+ assert upstream_lineage is None
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_add_known_query_lineage.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_add_known_query_lineage.json
index 94c8947dba9ff..0d8822736c95e 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_add_known_query_lineage.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_add_known_query_lineage.json
@@ -85,26 +85,6 @@
}
}
},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)",
- "changeType": "UPSERT",
- "aspectName": "operation",
- "aspect": {
- "json": {
- "timestampMillis": 1707182625000,
- "partitionSpec": {
- "type": "FULL_TABLE",
- "partition": "FULL_TABLE_SNAPSHOT"
- },
- "operationType": "INSERT",
- "customProperties": {
- "query_urn": "urn:li:query:6ed1d12fbf2ccc8138ceec08cc35b981030d6d004bfad9743c7afd84260fa63f"
- },
- "lastUpdatedTimestamp": 20000
- }
- }
-},
{
"entityType": "query",
"entityUrn": "urn:li:query:6ed1d12fbf2ccc8138ceec08cc35b981030d6d004bfad9743c7afd84260fa63f",
@@ -142,5 +122,25 @@
"platform": "urn:li:dataPlatform:redshift"
}
}
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "operation",
+ "aspect": {
+ "json": {
+ "timestampMillis": 1707182625000,
+ "partitionSpec": {
+ "partition": "FULL_TABLE_SNAPSHOT",
+ "type": "FULL_TABLE"
+ },
+ "operationType": "INSERT",
+ "customProperties": {
+ "query_urn": "urn:li:query:6ed1d12fbf2ccc8138ceec08cc35b981030d6d004bfad9743c7afd84260fa63f"
+ },
+ "lastUpdatedTimestamp": 20000
+ }
+ }
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_column_lineage_deduplication.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_column_lineage_deduplication.json
index d3ec384316818..290ee7091df49 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_column_lineage_deduplication.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_column_lineage_deduplication.json
@@ -164,10 +164,10 @@
"entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),a)"
},
{
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),c)"
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),b)"
},
{
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),b)"
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD),c)"
},
{
"entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)"
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json
index f6d781b356ee9..750b2c4a92fd0 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json
@@ -227,59 +227,6 @@
}
}
},
-{
- "entityType": "query",
- "entityUrn": "urn:li:query:234a2904c367a6cc02d76cf358cd86937ec9e14af03e5539b5edb0b6df5db3dc",
- "changeType": "UPSERT",
- "aspectName": "queryProperties",
- "aspect": {
- "json": {
- "statement": {
- "value": "CREATE TABLE foo_staging AS\nSELECT\n a,\n b\nFROM foo_dep",
- "language": "SQL"
- },
- "source": "SYSTEM",
- "created": {
- "time": 0,
- "actor": "urn:li:corpuser:_ingestion"
- },
- "lastModified": {
- "time": 1707182625000,
- "actor": "urn:li:corpuser:_ingestion"
- }
- }
- }
-},
-{
- "entityType": "query",
- "entityUrn": "urn:li:query:234a2904c367a6cc02d76cf358cd86937ec9e14af03e5539b5edb0b6df5db3dc",
- "changeType": "UPSERT",
- "aspectName": "querySubjects",
- "aspect": {
- "json": {
- "subjects": [
- {
- "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_dep,PROD)"
- },
- {
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_dep,PROD),b)"
- },
- {
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_dep,PROD),a)"
- },
- {
- "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD)"
- },
- {
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD),a)"
- },
- {
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD),b)"
- }
- ]
- }
- }
-},
{
"entityType": "query",
"entityUrn": "urn:li:query:a30d42497a737321ece461fa17344c3ba3588fdee736016acb59a00cec955a0c",
@@ -291,17 +238,6 @@
}
}
},
-{
- "entityType": "query",
- "entityUrn": "urn:li:query:234a2904c367a6cc02d76cf358cd86937ec9e14af03e5539b5edb0b6df5db3dc",
- "changeType": "UPSERT",
- "aspectName": "dataPlatformInstance",
- "aspect": {
- "json": {
- "platform": "urn:li:dataPlatform:redshift"
- }
- }
-},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_downstream,PROD)",
@@ -353,6 +289,70 @@
}
}
},
+{
+ "entityType": "query",
+ "entityUrn": "urn:li:query:e4b3b60ab99e0f0bc1629ea82a5d7705a30dbd98a3923d599b39fb68624ea58d",
+ "changeType": "UPSERT",
+ "aspectName": "queryProperties",
+ "aspect": {
+ "json": {
+ "statement": {
+ "value": "CREATE TABLE foo_downstream AS\nSELECT\n a,\n b\nFROM foo_staging",
+ "language": "SQL"
+ },
+ "source": "SYSTEM",
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:_ingestion"
+ },
+ "lastModified": {
+ "time": 1707182625000,
+ "actor": "urn:li:corpuser:_ingestion"
+ }
+ }
+ }
+},
+{
+ "entityType": "query",
+ "entityUrn": "urn:li:query:e4b3b60ab99e0f0bc1629ea82a5d7705a30dbd98a3923d599b39fb68624ea58d",
+ "changeType": "UPSERT",
+ "aspectName": "querySubjects",
+ "aspect": {
+ "json": {
+ "subjects": [
+ {
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD)"
+ },
+ {
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD),a)"
+ },
+ {
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD),b)"
+ },
+ {
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_downstream,PROD)"
+ },
+ {
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_downstream,PROD),a)"
+ },
+ {
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_downstream,PROD),b)"
+ }
+ ]
+ }
+ }
+},
+{
+ "entityType": "query",
+ "entityUrn": "urn:li:query:e4b3b60ab99e0f0bc1629ea82a5d7705a30dbd98a3923d599b39fb68624ea58d",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:redshift"
+ }
+ }
+},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD)",
@@ -406,13 +406,13 @@
},
{
"entityType": "query",
- "entityUrn": "urn:li:query:e4b3b60ab99e0f0bc1629ea82a5d7705a30dbd98a3923d599b39fb68624ea58d",
+ "entityUrn": "urn:li:query:234a2904c367a6cc02d76cf358cd86937ec9e14af03e5539b5edb0b6df5db3dc",
"changeType": "UPSERT",
"aspectName": "queryProperties",
"aspect": {
"json": {
"statement": {
- "value": "CREATE TABLE foo_downstream AS\nSELECT\n a,\n b\nFROM foo_staging",
+ "value": "CREATE TABLE foo_staging AS\nSELECT\n a,\n b\nFROM foo_dep",
"language": "SQL"
},
"source": "SYSTEM",
@@ -429,43 +429,43 @@
},
{
"entityType": "query",
- "entityUrn": "urn:li:query:e4b3b60ab99e0f0bc1629ea82a5d7705a30dbd98a3923d599b39fb68624ea58d",
- "changeType": "UPSERT",
- "aspectName": "dataPlatformInstance",
- "aspect": {
- "json": {
- "platform": "urn:li:dataPlatform:redshift"
- }
- }
-},
-{
- "entityType": "query",
- "entityUrn": "urn:li:query:e4b3b60ab99e0f0bc1629ea82a5d7705a30dbd98a3923d599b39fb68624ea58d",
+ "entityUrn": "urn:li:query:234a2904c367a6cc02d76cf358cd86937ec9e14af03e5539b5edb0b6df5db3dc",
"changeType": "UPSERT",
"aspectName": "querySubjects",
"aspect": {
"json": {
"subjects": [
{
- "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD)"
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_dep,PROD)"
},
{
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD),b)"
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_dep,PROD),a)"
},
{
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD),a)"
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_dep,PROD),b)"
},
{
- "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_downstream,PROD)"
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD)"
},
{
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_downstream,PROD),a)"
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD),a)"
},
{
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_downstream,PROD),b)"
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD),b)"
}
]
}
}
+},
+{
+ "entityType": "query",
+ "entityUrn": "urn:li:query:234a2904c367a6cc02d76cf358cd86937ec9e14af03e5539b5edb0b6df5db3dc",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:redshift"
+ }
+ }
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename_with_temp.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename_with_temp.json
index abae5da02135d..a4ac349c3c455 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename_with_temp.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename_with_temp.json
@@ -85,10 +85,10 @@
"entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.baz,PROD)"
},
{
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.baz,PROD),b)"
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.baz,PROD),a)"
},
{
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.baz,PROD),a)"
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.baz,PROD),b)"
},
{
"entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)"
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json
index 1992bced039be..171a1bd3753e2 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json
@@ -177,24 +177,6 @@
}
}
},
-{
- "entityType": "query",
- "entityUrn": "urn:li:query:6f71602f39d01a39b3f8bd411c74c5ac08dc4b90bc3d49b257089acb19fa8559",
- "changeType": "UPSERT",
- "aspectName": "querySubjects",
- "aspect": {
- "json": {
- "subjects": [
- {
- "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_swap,PROD)"
- },
- {
- "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_backup,PROD)"
- }
- ]
- }
- }
-},
{
"entityType": "query",
"entityUrn": "urn:li:query:3865108263e5f0670e6506f5747392f8315a72039cbfde1c4be4dd9a71bdd500",
@@ -220,24 +202,28 @@
},
{
"entityType": "query",
- "entityUrn": "urn:li:query:6f71602f39d01a39b3f8bd411c74c5ac08dc4b90bc3d49b257089acb19fa8559",
+ "entityUrn": "urn:li:query:3865108263e5f0670e6506f5747392f8315a72039cbfde1c4be4dd9a71bdd500",
"changeType": "UPSERT",
- "aspectName": "queryProperties",
+ "aspectName": "querySubjects",
"aspect": {
"json": {
- "statement": {
- "value": "CREATE TABLE person_info_backup AS\nSELECT\n *\nFROM person_info_swap",
- "language": "SQL"
- },
- "source": "SYSTEM",
- "created": {
- "time": 0,
- "actor": "urn:li:corpuser:_ingestion"
- },
- "lastModified": {
- "time": 1707182625000,
- "actor": "urn:li:corpuser:_ingestion"
- }
+ "subjects": [
+ {
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_swap,PROD)"
+ },
+ {
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD)"
+ },
+ {
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),a)"
+ },
+ {
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),b)"
+ },
+ {
+ "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),c)"
+ }
+ ]
}
}
},
@@ -279,7 +265,30 @@
},
{
"entityType": "query",
- "entityUrn": "urn:li:query:3865108263e5f0670e6506f5747392f8315a72039cbfde1c4be4dd9a71bdd500",
+ "entityUrn": "urn:li:query:6f71602f39d01a39b3f8bd411c74c5ac08dc4b90bc3d49b257089acb19fa8559",
+ "changeType": "UPSERT",
+ "aspectName": "queryProperties",
+ "aspect": {
+ "json": {
+ "statement": {
+ "value": "CREATE TABLE person_info_backup AS\nSELECT\n *\nFROM person_info_swap",
+ "language": "SQL"
+ },
+ "source": "SYSTEM",
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:_ingestion"
+ },
+ "lastModified": {
+ "time": 1707182625000,
+ "actor": "urn:li:corpuser:_ingestion"
+ }
+ }
+ }
+},
+{
+ "entityType": "query",
+ "entityUrn": "urn:li:query:6f71602f39d01a39b3f8bd411c74c5ac08dc4b90bc3d49b257089acb19fa8559",
"changeType": "UPSERT",
"aspectName": "querySubjects",
"aspect": {
@@ -289,16 +298,7 @@
"entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_swap,PROD)"
},
{
- "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD)"
- },
- {
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),a)"
- },
- {
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),b)"
- },
- {
- "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),c)"
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_backup,PROD)"
}
]
}
@@ -342,17 +342,40 @@
},
{
"entityType": "query",
- "entityUrn": "urn:li:query:481d0392ffeffdafd198d94e0a9f778dd722b60daa47083a32800b99ea21f86f",
+ "entityUrn": "urn:li:query:4b1fad909083e1ed5c47c146bd01247ed4d6295d175c34f9065b8fc6000fc7ae",
+ "changeType": "UPSERT",
+ "aspectName": "queryProperties",
+ "aspect": {
+ "json": {
+ "statement": {
+ "value": "CREATE TABLE person_info_incremental AS\nSELECT\n *\nFROM person_info_dep",
+ "language": "SQL"
+ },
+ "source": "SYSTEM",
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:_ingestion"
+ },
+ "lastModified": {
+ "time": 1707182625000,
+ "actor": "urn:li:corpuser:_ingestion"
+ }
+ }
+ }
+},
+{
+ "entityType": "query",
+ "entityUrn": "urn:li:query:4b1fad909083e1ed5c47c146bd01247ed4d6295d175c34f9065b8fc6000fc7ae",
"changeType": "UPSERT",
"aspectName": "querySubjects",
"aspect": {
"json": {
"subjects": [
{
- "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_incremental,PROD)"
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_dep,PROD)"
},
{
- "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_swap,PROD)"
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_incremental,PROD)"
}
]
}
@@ -362,22 +385,10 @@
"entityType": "query",
"entityUrn": "urn:li:query:4b1fad909083e1ed5c47c146bd01247ed4d6295d175c34f9065b8fc6000fc7ae",
"changeType": "UPSERT",
- "aspectName": "queryProperties",
+ "aspectName": "dataPlatformInstance",
"aspect": {
"json": {
- "statement": {
- "value": "CREATE TABLE person_info_incremental AS\nSELECT\n *\nFROM person_info_dep",
- "language": "SQL"
- },
- "source": "SYSTEM",
- "created": {
- "time": 0,
- "actor": "urn:li:corpuser:_ingestion"
- },
- "lastModified": {
- "time": 1707182625000,
- "actor": "urn:li:corpuser:_ingestion"
- }
+ "platform": "urn:li:dataPlatform:snowflake"
}
}
},
@@ -421,31 +432,13 @@
},
{
"entityType": "query",
- "entityUrn": "urn:li:query:4b1fad909083e1ed5c47c146bd01247ed4d6295d175c34f9065b8fc6000fc7ae",
- "changeType": "UPSERT",
- "aspectName": "querySubjects",
- "aspect": {
- "json": {
- "subjects": [
- {
- "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_dep,PROD)"
- },
- {
- "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_incremental,PROD)"
- }
- ]
- }
- }
-},
-{
- "entityType": "query",
- "entityUrn": "urn:li:query:481d0392ffeffdafd198d94e0a9f778dd722b60daa47083a32800b99ea21f86f",
+ "entityUrn": "urn:li:query:d29a1c8ed6d4d77efb290260234e5eee56f98311a5631d0a12213798077d1a68",
"changeType": "UPSERT",
"aspectName": "queryProperties",
"aspect": {
"json": {
"statement": {
- "value": "INSERT INTO person_info_swap\nSELECT\n *\nFROM person_info_incremental",
+ "value": "ALTER TABLE dev.public.person_info SWAP WITH dev.public.person_info_swap",
"language": "SQL"
},
"source": "SYSTEM",
@@ -462,7 +455,25 @@
},
{
"entityType": "query",
- "entityUrn": "urn:li:query:4b1fad909083e1ed5c47c146bd01247ed4d6295d175c34f9065b8fc6000fc7ae",
+ "entityUrn": "urn:li:query:d29a1c8ed6d4d77efb290260234e5eee56f98311a5631d0a12213798077d1a68",
+ "changeType": "UPSERT",
+ "aspectName": "querySubjects",
+ "aspect": {
+ "json": {
+ "subjects": [
+ {
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD)"
+ },
+ {
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_swap,PROD)"
+ }
+ ]
+ }
+ }
+},
+{
+ "entityType": "query",
+ "entityUrn": "urn:li:query:d29a1c8ed6d4d77efb290260234e5eee56f98311a5631d0a12213798077d1a68",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@@ -473,13 +484,13 @@
},
{
"entityType": "query",
- "entityUrn": "urn:li:query:d29a1c8ed6d4d77efb290260234e5eee56f98311a5631d0a12213798077d1a68",
+ "entityUrn": "urn:li:query:481d0392ffeffdafd198d94e0a9f778dd722b60daa47083a32800b99ea21f86f",
"changeType": "UPSERT",
"aspectName": "queryProperties",
"aspect": {
"json": {
"statement": {
- "value": "ALTER TABLE dev.public.person_info SWAP WITH dev.public.person_info_swap",
+ "value": "INSERT INTO person_info_swap\nSELECT\n *\nFROM person_info_incremental",
"language": "SQL"
},
"source": "SYSTEM",
@@ -498,16 +509,23 @@
"entityType": "query",
"entityUrn": "urn:li:query:481d0392ffeffdafd198d94e0a9f778dd722b60daa47083a32800b99ea21f86f",
"changeType": "UPSERT",
- "aspectName": "dataPlatformInstance",
+ "aspectName": "querySubjects",
"aspect": {
"json": {
- "platform": "urn:li:dataPlatform:snowflake"
+ "subjects": [
+ {
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_incremental,PROD)"
+ },
+ {
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_swap,PROD)"
+ }
+ ]
}
}
},
{
"entityType": "query",
- "entityUrn": "urn:li:query:d29a1c8ed6d4d77efb290260234e5eee56f98311a5631d0a12213798077d1a68",
+ "entityUrn": "urn:li:query:481d0392ffeffdafd198d94e0a9f778dd722b60daa47083a32800b99ea21f86f",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@@ -515,23 +533,5 @@
"platform": "urn:li:dataPlatform:snowflake"
}
}
-},
-{
- "entityType": "query",
- "entityUrn": "urn:li:query:d29a1c8ed6d4d77efb290260234e5eee56f98311a5631d0a12213798077d1a68",
- "changeType": "UPSERT",
- "aspectName": "querySubjects",
- "aspect": {
- "json": {
- "subjects": [
- {
- "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD)"
- },
- {
- "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_swap,PROD)"
- }
- ]
- }
- }
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_temp_table.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_temp_table.json
index b348785d06431..bcd31b0aa0249 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_temp_table.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_temp_table.json
@@ -281,10 +281,10 @@
"json": {
"subjects": [
{
- "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_session3,PROD)"
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)"
},
{
- "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD)"
+ "entity": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_session3,PROD)"
}
]
}
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py
index eb64efd6693de..b1ad9eb5c15d7 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py
@@ -1,3 +1,4 @@
+import functools
import os
import pathlib
from datetime import datetime, timezone
@@ -31,6 +32,10 @@
RESOURCE_DIR = pathlib.Path(__file__).parent / "aggregator_goldens"
FROZEN_TIME = "2024-02-06T01:23:45Z"
+check_goldens_stream = functools.partial(
+ mce_helpers.check_goldens_stream, ignore_order=False
+)
+
def _ts(ts: int) -> datetime:
return datetime.fromtimestamp(ts, tz=timezone.utc)
@@ -56,7 +61,7 @@ def test_basic_lineage(pytestconfig: pytest.Config, tmp_path: pathlib.Path) -> N
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_basic_lineage.json",
@@ -108,7 +113,7 @@ def test_overlapping_inserts(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_overlapping_inserts.json",
@@ -167,7 +172,7 @@ def test_temp_table(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_temp_table.json",
@@ -229,7 +234,7 @@ def test_multistep_temp_table(pytestconfig: pytest.Config) -> None:
)
== 4
)
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_multistep_temp_table.json",
@@ -305,7 +310,7 @@ def test_overlapping_inserts_from_temp_tables(pytestconfig: pytest.Config) -> No
assert len(report.queries_with_non_authoritative_session) == 1
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_overlapping_inserts_from_temp_tables.json",
@@ -354,7 +359,7 @@ def test_aggregate_operations(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_aggregate_operations.json",
@@ -392,7 +397,7 @@ def test_view_lineage(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_view_lineage.json",
@@ -423,7 +428,7 @@ def test_known_lineage_mapping(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_known_lineage_mapping.json",
@@ -461,7 +466,7 @@ def test_column_lineage_deduplication(pytestconfig: pytest.Config) -> None:
# not get any credit for a and b, as they are already covered by query 2,
# which came later and hence has higher precedence.
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_column_lineage_deduplication.json",
@@ -506,7 +511,7 @@ def test_add_known_query_lineage(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_add_known_query_lineage.json",
@@ -564,7 +569,7 @@ def test_table_rename(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_table_rename.json",
@@ -624,7 +629,7 @@ def test_table_rename_with_temp(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_table_rename_with_temp.json",
@@ -711,7 +716,7 @@ def test_table_swap(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_table_swap.json",
@@ -881,7 +886,7 @@ def test_table_swap_with_temp(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_table_swap_with_temp.json",
@@ -908,7 +913,7 @@ def test_create_table_query_mcps(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_create_table_query_mcps.json",
@@ -943,7 +948,7 @@ def test_table_lineage_via_temp_table_disordered_add(
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR
@@ -993,7 +998,7 @@ def test_basic_usage(pytestconfig: pytest.Config) -> None:
mcps = list(aggregator.gen_metadata())
- mce_helpers.check_goldens_stream(
+ check_goldens_stream(
pytestconfig,
outputs=mcps,
golden_path=RESOURCE_DIR / "test_basic_usage.json",
diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py
index 4e9a38cb37ae6..389f7b70b3311 100644
--- a/metadata-ingestion/tests/unit/test_transform_dataset.py
+++ b/metadata-ingestion/tests/unit/test_transform_dataset.py
@@ -220,7 +220,7 @@ def make_dataset_with_properties() -> models.MetadataChangeEventClass:
)
-def test_simple_dataset_ownership_transformation(mock_time):
+def test_dataset_ownership_transformation(mock_time):
no_owner_aspect = make_generic_dataset()
with_owner_aspect = make_dataset_with_owner()
@@ -254,7 +254,7 @@ def test_simple_dataset_ownership_transformation(mock_time):
transformer.transform([RecordEnvelope(input, metadata={}) for input in inputs])
)
- assert len(outputs) == len(inputs) + 1
+ assert len(outputs) == len(inputs) + 2
# Check the first entry.
first_ownership_aspect = builder.get_aspect_if_available(
@@ -287,11 +287,21 @@ def test_simple_dataset_ownership_transformation(mock_time):
]
)
+ third_ownership_aspect = outputs[4].record.aspect
+ assert third_ownership_aspect
+ assert len(third_ownership_aspect.owners) == 2
+ assert all(
+ [
+ owner.type == models.OwnershipTypeClass.DATAOWNER and owner.typeUrn is None
+ for owner in second_ownership_aspect.owners
+ ]
+ )
+
# Verify that the third entry is unchanged.
assert inputs[2] == outputs[2].record
# Verify that the last entry is EndOfStream
- assert inputs[3] == outputs[4].record
+ assert inputs[-1] == outputs[-1].record
def test_simple_dataset_ownership_with_type_transformation(mock_time):
@@ -1003,6 +1013,7 @@ def test_pattern_dataset_ownership_transformation(mock_time):
"rules": {
".*example1.*": [builder.make_user_urn("person1")],
".*example2.*": [builder.make_user_urn("person2")],
+ ".*dag_abc.*": [builder.make_user_urn("person2")],
}
},
"ownership_type": "DATAOWNER",
@@ -1014,7 +1025,9 @@ def test_pattern_dataset_ownership_transformation(mock_time):
transformer.transform([RecordEnvelope(input, metadata={}) for input in inputs])
)
- assert len(outputs) == len(inputs) + 1 # additional MCP due to the no-owner MCE
+ assert (
+ len(outputs) == len(inputs) + 2
+ ) # additional MCP due to the no-owner MCE + datajob
# Check the first entry.
assert inputs[0] == outputs[0].record
@@ -1042,6 +1055,16 @@ def test_pattern_dataset_ownership_transformation(mock_time):
]
)
+ third_ownership_aspect = outputs[4].record.aspect
+ assert third_ownership_aspect
+ assert len(third_ownership_aspect.owners) == 1
+ assert all(
+ [
+ owner.type == models.OwnershipTypeClass.DATAOWNER
+ for owner in third_ownership_aspect.owners
+ ]
+ )
+
# Verify that the third entry is unchanged.
assert inputs[2] == outputs[2].record
@@ -1122,14 +1145,14 @@ def fake_get_aspect(
pipeline_context.graph.get_aspect = fake_get_aspect # type: ignore
# No owner aspect for the first dataset
- no_owner_aspect = models.MetadataChangeEventClass(
+ no_owner_aspect_dataset = models.MetadataChangeEventClass(
proposedSnapshot=models.DatasetSnapshotClass(
urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,example1,PROD)",
aspects=[models.StatusClass(removed=False)],
),
)
# Dataset with an existing owner
- with_owner_aspect = models.MetadataChangeEventClass(
+ with_owner_aspect_dataset = models.MetadataChangeEventClass(
proposedSnapshot=models.DatasetSnapshotClass(
urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,example2,PROD)",
aspects=[
@@ -1148,8 +1171,7 @@ def fake_get_aspect(
),
)
- # Not a dataset, should be ignored
- not_a_dataset = models.MetadataChangeEventClass(
+ datajob = models.MetadataChangeEventClass(
proposedSnapshot=models.DataJobSnapshotClass(
urn="urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_456)",
aspects=[
@@ -1163,9 +1185,9 @@ def fake_get_aspect(
)
inputs = [
- no_owner_aspect,
- with_owner_aspect,
- not_a_dataset,
+ no_owner_aspect_dataset,
+ with_owner_aspect_dataset,
+ datajob,
EndOfStream(),
]
@@ -1176,6 +1198,7 @@ def fake_get_aspect(
"rules": {
".*example1.*": [builder.make_user_urn("person1")],
".*example2.*": [builder.make_user_urn("person2")],
+ ".*dag_abc.*": [builder.make_user_urn("person3")],
}
},
"ownership_type": "DATAOWNER",
@@ -1188,9 +1211,9 @@ def fake_get_aspect(
transformer.transform([RecordEnvelope(input, metadata={}) for input in inputs])
)
- assert len(outputs) == len(inputs) + 3
+ assert len(outputs) == len(inputs) + 4
- # Check the first entry.
+ # Check that DatasetSnapshotClass has not changed
assert inputs[0] == outputs[0].record
# Check the ownership for the first dataset (example1)
@@ -1217,12 +1240,16 @@ def fake_get_aspect(
]
)
+ third_ownership_aspect = outputs[4].record.aspect
+ assert third_ownership_aspect
+ assert len(third_ownership_aspect.owners) == 1 # new for datajob
+
# Check container ownerships
for i in range(2):
- container_ownership_aspect = outputs[i + 4].record.aspect
+ container_ownership_aspect = outputs[i + 5].record.aspect
assert container_ownership_aspect
ownership = json.loads(container_ownership_aspect.value.decode("utf-8"))
- assert len(ownership) == 2
+ assert len(ownership) == 3
assert ownership[0]["value"]["owner"] == builder.make_user_urn("person1")
assert ownership[1]["value"]["owner"] == builder.make_user_urn("person2")