diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 9545bcf34e3145..14656b6ca907d8 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -64,7 +64,7 @@ jobs: echo "full_tag=$(get_tag)-full" >> $GITHUB_OUTPUT echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT echo "unique_slim_tag=$(get_unique_tag)-slim" >> $GITHUB_OUTPUT - echo "unique_full_tag=$(get_unique_tag)-full" >> $GITHUB_OUTPUT + echo "unique_full_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT - name: Check whether publishing enabled id: publish @@ -507,7 +507,7 @@ jobs: platforms: linux/amd64,linux/arm64/v8 - name: Compute DataHub Ingestion (Base-Slim) Tag id: tag - run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }}" >> $GITHUB_OUTPUT + run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT datahub_ingestion_base_full_build: name: Build and Push DataHub Ingestion (Base-Full) Docker Image runs-on: ubuntu-latest @@ -573,13 +573,13 @@ jobs: datahub-ingestion: - 'docker/datahub-ingestion/**' - name: Build codegen - if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }} + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }} run: ./gradlew :metadata-ingestion:codegen - name: Download Base Image uses: ishworkh/docker-image-artifact-download@v1 if: ${{ needs.setup.outputs.publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }} with: - image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }} + image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }} - name: Build and push Slim Image if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }} uses: ./.github/actions/docker-custom-build-and-push @@ -589,7 +589,7 @@ jobs: ${{ env.DATAHUB_INGESTION_IMAGE }} build-args: | BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }} - DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }} + DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }} RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }} APP_ENV=slim tags: ${{ needs.setup.outputs.slim_tag }} @@ -601,7 +601,7 @@ jobs: platforms: linux/amd64,linux/arm64/v8 - name: Compute Tag id: tag - run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_slim_tag || 'head' }}" >> $GITHUB_OUTPUT + run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT datahub_ingestion_slim_scan: permissions: contents: read # for actions/checkout to fetch code @@ -656,7 +656,7 @@ jobs: datahub-ingestion: - 'docker/datahub-ingestion/**' - name: Build codegen - if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }} + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }} run: ./gradlew :metadata-ingestion:codegen - name: Download Base Image uses: ishworkh/docker-image-artifact-download@v1 diff --git a/build.gradle b/build.gradle index 07a0e6ad1f49fd..0a94991b131aac 100644 --- a/build.gradle +++ b/build.gradle @@ -289,6 +289,11 @@ subprojects { } // https://docs.gradle.org/current/userguide/performance.html maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1 + + if (project.configurations.getByName("testImplementation").getDependencies() + .any{ it.getName() == "testng" }) { + useTestNG() + } } afterEvaluate { diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java index 54b8d23bab301d..52d06f73dcfab9 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.auth; +import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableList; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.TestUtils; @@ -8,6 +9,10 @@ import com.linkedin.datahub.graphql.generated.ListAccessTokenResult; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResult; import graphql.schema.DataFetchingEnvironment; import java.util.Collections; import org.mockito.Mockito; @@ -36,14 +41,17 @@ public void testGetSuccess() throws Exception { Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); final EntityClient mockClient = Mockito.mock(EntityClient.class); - Mockito.when(Mockito.eq(mockClient.filter( - Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME), - Mockito.eq(buildFilter(filters, Collections.emptyList())), - Mockito.notNull(), - Mockito.eq(input.getStart()), - Mockito.eq(input.getCount()), - Mockito.eq(getAuthentication(mockEnv))))) - .thenReturn(null); + final Authentication testAuth = getAuthentication(mockEnv); + Mockito.when(mockClient.search( + Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME), + Mockito.eq(""), + Mockito.eq(buildFilter(filters, Collections.emptyList())), + Mockito.any(SortCriterion.class), + Mockito.eq(input.getStart()), + Mockito.eq(input.getCount()), + Mockito.eq(testAuth), + Mockito.any(SearchFlags.class))) + .thenReturn(new SearchResult().setFrom(0).setNumEntities(0).setPageSize(0).setEntities(new SearchEntityArray())); final ListAccessTokensResolver resolver = new ListAccessTokensResolver(mockClient); final ListAccessTokenResult listAccessTokenResult = resolver.get(mockEnv).get(); diff --git a/datahub-web-react/src/app/ingest/source/builder/constants.ts b/datahub-web-react/src/app/ingest/source/builder/constants.ts index 61667a941765c3..dba8e8bb1dce6b 100644 --- a/datahub-web-react/src/app/ingest/source/builder/constants.ts +++ b/datahub-web-react/src/app/ingest/source/builder/constants.ts @@ -27,6 +27,7 @@ import powerbiLogo from '../../../../images/powerbilogo.png'; import modeLogo from '../../../../images/modelogo.png'; import databricksLogo from '../../../../images/databrickslogo.png'; import verticaLogo from '../../../../images/verticalogo.png'; +import mlflowLogo from '../../../../images/mlflowlogo.png'; import dynamodbLogo from '../../../../images/dynamodblogo.png'; export const ATHENA = 'athena'; @@ -64,6 +65,8 @@ export const MARIA_DB = 'mariadb'; export const MARIA_DB_URN = `urn:li:dataPlatform:${MARIA_DB}`; export const METABASE = 'metabase'; export const METABASE_URN = `urn:li:dataPlatform:${METABASE}`; +export const MLFLOW = 'mlflow'; +export const MLFLOW_URN = `urn:li:dataPlatform:${MLFLOW}`; export const MODE = 'mode'; export const MODE_URN = `urn:li:dataPlatform:${MODE}`; export const MONGO_DB = 'mongodb'; @@ -119,6 +122,7 @@ export const PLATFORM_URN_TO_LOGO = { [LOOKER_URN]: lookerLogo, [MARIA_DB_URN]: mariadbLogo, [METABASE_URN]: metabaseLogo, + [MLFLOW_URN]: mlflowLogo, [MODE_URN]: modeLogo, [MONGO_DB_URN]: mongodbLogo, [MSSQL_URN]: mssqlLogo, diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json index b4ea2db018bd84..1bd5b6f1f768b5 100644 --- a/datahub-web-react/src/app/ingest/source/builder/sources.json +++ b/datahub-web-react/src/app/ingest/source/builder/sources.json @@ -181,6 +181,13 @@ "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/metabase/", "recipe": "source:\n type: metabase\n config:\n # Coordinates\n connect_uri:\n\n # Credentials\n username: root\n password: example" }, + { + "urn": "urn:li:dataPlatform:mlflow", + "name": "mlflow", + "displayName": "MLflow", + "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mlflow/", + "recipe": "source:\n type: mlflow\n config:\n tracking_uri: tracking_uri" + }, { "urn": "urn:li:dataPlatform:mode", "name": "mode", diff --git a/datahub-web-react/src/images/mlflowlogo.png b/datahub-web-react/src/images/mlflowlogo.png new file mode 100644 index 00000000000000..e724d1affbc14d Binary files /dev/null and b/datahub-web-react/src/images/mlflowlogo.png differ diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile index 3d47f796173704..564cc19cc9a5f9 100644 --- a/docker/datahub-ingestion-base/Dockerfile +++ b/docker/datahub-ingestion-base/Dockerfile @@ -1,7 +1,7 @@ ARG APP_ENV=full ARG BASE_IMAGE=base -FROM golang:1-alpine3.17 AS binary +FROM golang:1-alpine3.17 AS dockerize-binary ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder @@ -41,7 +41,7 @@ RUN apt-get update && apt-get install -y -qq \ && rm -rf /var/lib/apt/lists/* /var/cache/apk/* # compiled against newer golang for security fixes -COPY --from=binary /go/bin/dockerize /usr/local/bin +COPY --from=dockerize-binary /go/bin/dockerize /usr/local/bin COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index 8b726df5e88420..0132ceaa9b1a95 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -1,7 +1,7 @@ # Defining environment ARG APP_ENV=full ARG BASE_IMAGE=acryldata/datahub-ingestion-base -ARG DOCKER_VERSION=latest +ARG DOCKER_VERSION=head FROM $BASE_IMAGE:$DOCKER_VERSION as base USER 0 diff --git a/docker/datahub-ingestion/Dockerfile-slim-only b/docker/datahub-ingestion/Dockerfile-slim-only index 9ae116f839aa07..cb8c27ab463c48 100644 --- a/docker/datahub-ingestion/Dockerfile-slim-only +++ b/docker/datahub-ingestion/Dockerfile-slim-only @@ -1,6 +1,6 @@ # Defining environment ARG BASE_IMAGE=acryldata/datahub-ingestion-base -ARG DOCKER_VERSION=latest +ARG DOCKER_VERSION=head-slim FROM $BASE_IMAGE:$DOCKER_VERSION as base USER 0 diff --git a/docker/postgres-setup/init.sh b/docker/postgres-setup/init.sh index 6c0adc8c69bddf..afc9bdfe4c6688 100755 --- a/docker/postgres-setup/init.sh +++ b/docker/postgres-setup/init.sh @@ -1,8 +1,13 @@ #!/bin/sh export PGPASSWORD=$POSTGRES_PASSWORD +POSTGRES_CREATE_DB=${POSTGRES_CREATE_DB:-true} +POSTGRES_CREATE_DB_CONNECTION_DB=${POSTGRES_CREATE_DB_CONNECTION_DB:-postgres} + # workaround create database if not exists, check https://stackoverflow.com/a/36591842 -psql -U $POSTGRES_USERNAME -h $POSTGRES_HOST -p $POSTGRES_PORT -tc "SELECT 1 FROM pg_database WHERE datname = '${DATAHUB_DB_NAME}'" | grep -q 1 || psql -U $POSTGRES_USERNAME -h $POSTGRES_HOST -p $POSTGRES_PORT -c "CREATE DATABASE ${DATAHUB_DB_NAME}" +if [ "$POSTGRES_CREATE_DB" = true ]; then + psql -d "$POSTGRES_CREATE_DB_CONNECTION_DB" -U "$POSTGRES_USERNAME" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -tc "SELECT 1 FROM pg_database WHERE datname = '${DATAHUB_DB_NAME}'" | grep -q 1 || psql -d "$POSTGRES_CREATE_DB_CONNECTION_DB" -U "$POSTGRES_USERNAME" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -c "CREATE DATABASE ${DATAHUB_DB_NAME}" +fi sed -e "s/DATAHUB_DB_NAME/${DATAHUB_DB_NAME}/g" /init.sql | tee -a /tmp/init-final.sql -psql -d $DATAHUB_DB_NAME -U $POSTGRES_USERNAME -h $POSTGRES_HOST -p $POSTGRES_PORT < /tmp/init-final.sql +psql -d "$DATAHUB_DB_NAME" -U "$POSTGRES_USERNAME" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" < /tmp/init-final.sql diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index a6779424aa5811..fb0a73a6c35692 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -12,13 +12,11 @@ module.exports = { organizationName: "datahub-project", // Usually your GitHub org/user name. projectName: "datahub", // Usually your repo name. staticDirectories: ["static", "genStatic"], - stylesheets: [ - "https://fonts.googleapis.com/css2?family=Manrope:wght@400;600&display=swap", - ], + stylesheets: ["https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap"], noIndex: isSaas, customFields: { isSaas: isSaas, - markpromptProjectKey: process.env.DOCUSAURUS_MARKPROMPT_PROJECT_KEY || 'IeF3CUFCUQWuouZ8MP5Np9nES52QAtaA', + markpromptProjectKey: process.env.DOCUSAURUS_MARKPROMPT_PROJECT_KEY || "IeF3CUFCUQWuouZ8MP5Np9nES52QAtaA", }, themeConfig: { ...(!isSaas && { @@ -35,12 +33,8 @@ module.exports = { title: null, logo: { alt: "DataHub Logo", - src: `img/${ - isSaas ? "acryl" : "datahub" - }-logo-color-light-horizontal.svg`, - srcDark: `img/${ - isSaas ? "acryl" : "datahub" - }-logo-color-dark-horizontal.svg`, + src: `img/${isSaas ? "acryl" : "datahub"}-logo-color-light-horizontal.svg`, + srcDark: `img/${isSaas ? "acryl" : "datahub"}-logo-color-dark-horizontal.svg`, }, items: [ { @@ -50,7 +44,8 @@ module.exports = { position: "right", }, { - href: "/integrations", + to: "/integrations", + activeBasePath: "integrations", label: "Integrations", position: "right", }, @@ -70,8 +65,8 @@ module.exports = { position: "right", }, { - type: 'docsVersionDropdown', - position: 'right', + type: "docsVersionDropdown", + position: "right", dropdownActiveClassDisabled: true, }, { @@ -201,9 +196,7 @@ module.exports = { blog: false, theme: { customCss: [ - isSaas - ? require.resolve("./src/styles/acryl.scss") - : require.resolve("./src/styles/datahub.scss"), + isSaas ? require.resolve("./src/styles/acryl.scss") : require.resolve("./src/styles/datahub.scss"), require.resolve("./src/styles/global.scss"), require.resolve("./src/styles/sphinx.scss"), require.resolve("./src/styles/config-table.scss"), @@ -220,10 +213,7 @@ module.exports = { ], ], plugins: [ - [ - "@docusaurus/plugin-ideal-image", - { quality: 100, sizes: [320, 640, 1280, 1440, 1600] }, - ], + ["@docusaurus/plugin-ideal-image", { quality: 100, sizes: [320, 640, 1280, 1440, 1600] }], "docusaurus-plugin-sass", [ "docusaurus-graphql-plugin", diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 03ea38fd622d4d..b07cd0b03ce118 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -81,6 +81,13 @@ module.exports = { "docs/quick-ingestion-guides/powerbi/configuration", ], }, + { + Looker: [ + "docs/quick-ingestion-guides/looker/overview", + "docs/quick-ingestion-guides/looker/setup", + "docs/quick-ingestion-guides/looker/configuration", + ], + }, ], }, { @@ -437,7 +444,7 @@ module.exports = { Observability: [ "docs/managed-datahub/observe/freshness-assertions", "docs/managed-datahub/observe/volume-assertions", - "docs/managed-datahub/observe/custom-assertions", + "docs/managed-datahub/observe/custom-sql-assertions", ], }, ], diff --git a/docs-website/src/components/Feedback/styles.module.scss b/docs-website/src/components/Feedback/styles.module.scss index b0fa3d7d1bd2bc..ee22f6b0550126 100644 --- a/docs-website/src/components/Feedback/styles.module.scss +++ b/docs-website/src/components/Feedback/styles.module.scss @@ -37,11 +37,11 @@ } .feedbackText { + font-family: var(--ifm-font-family-base); width: 100%; border: var(--ifm-hr-border-color) 1px solid; border-radius: 0.4rem; padding: 0.4rem; - font-family: "Manrope", sans-serif; } .feedbackButton { diff --git a/docs-website/src/components/MarkpromptHelp/markprompthelp.module.scss b/docs-website/src/components/MarkpromptHelp/markprompthelp.module.scss index 270877cd04a9ff..0d874cad11790f 100644 --- a/docs-website/src/components/MarkpromptHelp/markprompthelp.module.scss +++ b/docs-website/src/components/MarkpromptHelp/markprompthelp.module.scss @@ -325,7 +325,6 @@ button { padding-left: 1.5714286em; } .MarkpromptAnswer ol > li::marker { - font-weight: 400; color: var(--markprompt-foreground); } .MarkpromptAnswer ul > li::marker { @@ -454,7 +453,6 @@ button { background-color: var(--markprompt-muted); border: 1px solid var(--markprompt-border); overflow-x: auto; - font-weight: 400; font-size: 0.8571429em; line-height: 1.6666667; margin-top: 1.6666667em; diff --git a/docs-website/src/pages/_components/Hero/hero.module.scss b/docs-website/src/pages/_components/Hero/hero.module.scss index c2103bb0782bd2..6e4a623f469d51 100644 --- a/docs-website/src/pages/_components/Hero/hero.module.scss +++ b/docs-website/src/pages/_components/Hero/hero.module.scss @@ -74,3 +74,26 @@ margin-right: 0.5rem; } } + +.quickstartContent { + text-align: center; + padding: 2rem 0; + height: 100%; + margin: 2rem 0; + background: #34394d; + border-radius: var(--ifm-card-border-radius); +} + +.quickstartTitle { + color: #fafafa; +} + +.quickstartSubtitle { + font-size: 1.1rem; + color: gray; +} + +.quickstartCodeblock { + text-align: left; + padding: 0 20vh; +} diff --git a/docs-website/src/pages/_components/Hero/index.js b/docs-website/src/pages/_components/Hero/index.js index 22b406dce037ef..ffa298b27a8225 100644 --- a/docs-website/src/pages/_components/Hero/index.js +++ b/docs-website/src/pages/_components/Hero/index.js @@ -34,12 +34,11 @@ const Hero = ({}) => { complexity of your data ecosystem.
- Built with ❤️ by{" "} - {" "} + Built with ❤️ by {" "} Acryl Data {" "} - and LinkedIn. + and LinkedIn.
Get Started → @@ -51,10 +50,10 @@ const Hero = ({}) => {Run the following command to get started with DataHub.
-Run the following command to get started with DataHub.
+-- **Raise incident**: Automatically raise a new DataHub Incident for the Table whenever the Custom Assertion is failing. This +- **Raise incident**: Automatically raise a new DataHub Incident for the Table whenever the Custom SQL Assertion is failing. This may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when an incident is created due to an Assertion failure. -- **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Custom Assertion. Note that +- **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Custom SQL Assertion. Note that any other incidents will not be impacted. 1. Click **Save**. -And that's it! DataHub will now begin to monitor your Custom Assertion for the table. +And that's it! DataHub will now begin to monitor your Custom SQL Assertion for the table. -To view the time of the next Custom Assertion evaluation, simply click **Custom** and then click on your +To view the time of the next Custom SQL Assertion evaluation, simply click **Custom** and then click on your new Assertion:
@@ -198,12 +198,12 @@ Once your assertion has run, you will begin to see Success or Failure status for
-## Stopping a Custom Assertion +## Stopping a Custom SQL Assertion -In order to temporarily stop the evaluation of a Custom Assertion: +In order to temporarily stop the evaluation of a Custom SQL Assertion: 1. Navigate to the **Validations** tab of the Table with the assertion -2. Click **Custom** to open the Custom Assertions list +2. Click **Custom** to open the Custom SQL Assertions list 3. Click the three-dot menu on the right side of the assertion you want to disable 4. Click **Stop** @@ -211,16 +211,16 @@ In order to temporarily stop the evaluation of a Custom Assertion: -To resume the Custom Assertion, simply click **Turn On**. +To resume the Custom SQL Assertion, simply click **Turn On**.-## Creating Custom Assertions via API +## Creating Custom SQL Assertions via API -Under the hood, Acryl DataHub implements Custom Assertion Monitoring using two "entity" concepts: +Under the hood, Acryl DataHub implements Custom SQL Assertion Monitoring using two "entity" concepts: - **Assertion**: The specific expectation for the custom assertion, e.g. "The table was changed in the past 7 hours" or "The table is changed on a schedule of every day by 8am". This is the "what". @@ -233,15 +233,15 @@ Note that to create or delete Assertions and Monitors for a specific entity on D #### GraphQL -In order to create a Custom Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 -GraphQL mutation queries to create a Custom Assertion entity and create an Assertion Monitor entity responsible for evaluating it. +In order to create a Custom SQL Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 +GraphQL mutation queries to create a Custom SQL Assertion entity and create an Assertion Monitor entity responsible for evaluating it. -Start by creating the Custom Assertion entity using the `createSqlAssertion` query and hang on to the 'urn' field of the Assertion entity +Start by creating the Custom SQL Assertion entity using the `createSqlAssertion` query and hang on to the 'urn' field of the Assertion entity you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`. ##### Examples -To create a Custom Assertion Entity that checks whether a query result is greater than 100: +To create a Custom SQL Assertion Entity that checks whether a query result is greater than 100: ```json mutation createSqlAssertion { @@ -265,7 +265,7 @@ mutation createSqlAssertion { } ``` -The supported custom assertion types are `METRIC` and `METRIC_CHANGE`. If you choose `METRIC_CHANGE`, +The supported assertion types are `METRIC` and `METRIC_CHANGE`. If you choose `METRIC_CHANGE`, you will need to provide a `changeType` parameter with either `ABSOLUTE` or `PERCENTAGE` values. The supported operator types are `EQUAL_TO`, `NOT_EQUAL_TO`, `GREATER_THAN`, `GREATER_THAN_OR_EQUAL_TO`, `LESS_THAN`, `LESS_THAN_OR_EQUAL_TO`, and `BETWEEN` (requires minValue, maxValue). The supported parameter types are `NUMBER`. diff --git a/docs/managed-datahub/observe/freshness-assertions.md b/docs/managed-datahub/observe/freshness-assertions.md index c5d4ca9081b43d..82de423f6f2de2 100644 --- a/docs/managed-datahub/observe/freshness-assertions.md +++ b/docs/managed-datahub/observe/freshness-assertions.md @@ -125,7 +125,7 @@ Change Source types vary by the platform, but generally fall into these categori - **DataHub Operation**: A DataHub "Operation" aspect contains timeseries information used to describe changes made to an entity. Using this option avoids contacting your data platform, and instead uses the DataHub Operation metadata to evaluate Freshness Assertions. This relies on Operations being reported to DataHub, either via ingestion or via use of the DataHub APIs (see [Report Operation via API](#reporting-operations-via-api)). - Note if you have not configured an ingestion source through DataHub, then this may be the only option available. + Note if you have not configured an ingestion source through DataHub, then this may be the only option available. By default, any operation type found will be considered a valid change. Use the **Operation Types** dropdown when selecting this option to specify which operation types should be considered valid changes. You may choose from one of DataHub's standard Operation Types, or specify a "Custom" Operation Type by typing in the name of the Operation Type. Using either of the column value approaches (**Last Modified Column** or **High Watermark Column**) to determine whether a Table has changed can be useful because it can be customized to determine whether specific types of important changes have been made to a given Table. Because it does not involve system warehouse tables, it is also easily portable across Data Warehouse and Data Lake providers. diff --git a/docs/modeling/extending-the-metadata-model.md b/docs/modeling/extending-the-metadata-model.md index be2d7d795de701..ba101be16b98e7 100644 --- a/docs/modeling/extending-the-metadata-model.md +++ b/docs/modeling/extending-the-metadata-model.md @@ -16,7 +16,6 @@ An important question that will arise once you've decided to extend the metadata - The green lines represent pathways that will lead to lesser friction for you to maintain your code long term. The red lines represent higher risk of conflicts in the future. We are working hard to move the majority of model extension use-cases to no-code / low-code pathways to ensure that you can extend the core metadata model without having to maintain a custom fork of DataHub. We will refer to the two options as the **open-source fork** and **custom repository** approaches in the rest of the document below. @@ -92,10 +91,11 @@ the annotation model. Define the entity within an `entity-registry.yml` file. Depending on your approach, the location of this file may vary. More on that in steps [4](#step-4-choose-a-place-to-store-your-model-extension) and [5](#step-5-attaching-your-non-key-aspects-to-the-entity). Example: + ```yaml - - name: dashboard - doc: A container of related data assets. - keyAspect: dashboardKey +- name: dashboard + doc: A container of related data assets. + keyAspect: dashboardKey ``` - name: The entity name/type, this will be present as a part of the Urn. @@ -196,8 +196,8 @@ The Aspect has four key components: its properties, the @Aspect annotation, the can be defined as PDL primitives, enums, records, or collections ( see [pdl schema documentation](https://linkedin.github.io/rest.li/pdl_schema)) references to other entities, of type Urn or optionally `
- +
Then navigate to the `Ownership Types` tab under the `Management` section. @@ -61,7 +61,7 @@ Inside the form, you can choose a name for your Ownership Type. You can also add Don't worry, this can be changed later.- +
Once you've chosen a name and a description, click 'Save' to create the new Ownership Type. @@ -162,13 +162,13 @@ You can assign an owner with a custom ownership type to an entity either using t On an Entity's profile page, use the right sidebar to locate the Owners section.- +
Click 'Add Owners', select the owner you want and then search for the Custom Ownership Type you'd like to add this asset to. When you're done, click 'Add'.- +
To remove ownership from an asset, click the 'x' icon on the Owner label. diff --git a/docs/quick-ingestion-guides/looker/configuration.md b/docs/quick-ingestion-guides/looker/configuration.md new file mode 100644 index 00000000000000..d9ba1907b006ef --- /dev/null +++ b/docs/quick-ingestion-guides/looker/configuration.md @@ -0,0 +1,212 @@ +--- +title: Configuration +--- +# Configuring Looker & LookML Connector + +Now that you have created a DataHub-specific API key with the relevant access in [the prior step](setup.md), it's time to set up a connection via the DataHub UI. + +## Configure Secrets + +You must create two secrets to configure a connection with Looker or LookerML. + +* `LOOKER_CLIENT_ID` +* `LOOKER_CLIENT_SECRET` + +On your DataHub instance, navigate to the **Ingestion** tab in your screen's top right corner. + ++ +
+ +:::note +If you do not see the Ingestion tab, please get in touch with your DataHub admin to grant you the correct permissions. +::: + +Navigate to the **Secrets** tab and click **Create new secret**. + ++ +
+ +First, create a secret for the **Client Id**. The value should be the **Client Id** of the API key created in the [prior step](http://localhost:3000/docs/next/quick-ingestion-guides/looker/setup#create-an-api-key). + ++ +
+ +Then, create a secret for the **Client Secret**. The value should be the **Client Secret** of the API key created in the [prior step](http://localhost:3000/docs/next/quick-ingestion-guides/looker/setup#create-an-api-key). + ++ +
+ + +## Configure Looker Ingestion + +### Configure Recipe + +Navigate to the **Sources** tab and click **Create new source**. + ++ +
+ +Choose `Looker`. + ++ +
+ +Enter the details into the Looker Recipe. + +* **Base URL:** This is your looker instance URL. (i.e. `https://+ +
+ + After completing the recipe, click **Next**. + +### Schedule Execution + +Now, it's time to schedule a recurring ingestion pipeline to extract metadata from your Looker instance regularly. + +Decide how regularly you want this ingestion to run-- day, month, year, hour, minute, etc. Select from the dropdown. + ++ +
+ +Ensure you've configured your correct timezone. + ++ +
+ +Finally, click **Next** when you are done. + +### Finish Up + +Name your ingestion source, then click **Save and Run**. + ++ +
+ +You will now find your new ingestion source running. + ++ +
+ +## Configure LookML Connector + +Now that you have created a DataHub-specific API key and Deploy Key with the relevant access in [the prior step](setup.md), it's time to set up a connection via the DataHub UI. + +### Configure Recipe + +Navigate to the **Sources** tab and click **Create new source**. + ++ +
+ +Choose `LooML`. + ++ +
+ +Enter the details into the Looker Recipe. You need to set a minimum 5 fields in the recipe for this quick ingestion guide: + +* **GitHub Repository:** This is your GitHub repository where LookML models are stored. You can provide the full URL (example: https://gitlab.com/gitlab-org/gitlab) or organization/repo; in this case, the connector assume it is a GitHub repo +* **GitHub Deploy Key:** Copy the content of `looker_datahub_deploy_key` and paste into this filed. +* **Looker Base URL:** This is your looker instance URL. (i.e. https://abc.cloud.looker.com) +* **Looker Client ID:** Use the secret LOOKER_CLIENT_ID with the format `${LOOKER_CLIENT_ID}`. +* **Looker Client Secret:** Use the secret LOOKER_CLIENT_SECRET with the format `${LOOKER_CLIENT_SECRET}`. + +Your recipe should look something like this: + ++ +
+ + +After completing the recipe, click **Next**. + +### Schedule Execution + +Now, it's time to schedule a recurring ingestion pipeline to extract metadata from your Looker instance regularly. + +Decide how regularly you want this ingestion to run-- day, month, year, hour, minute, etc. Select from the dropdown. + ++ +
+ +Ensure you've configured your correct timezone. ++ +
+ +Click **Next** when you are done. + +### Finish Up + +Name your ingestion source, then click **Save and Run**. ++ +
+ +You will now find your new ingestion source running. + ++ +
+ +## Validate Ingestion Runs + +View the latest status of ingestion runs on the Ingestion page. + ++ +
+ +Click the `+` sign to expand the complete list of historical runs and outcomes; click **Details** to see the results of a specific run. + ++ +
+ +From the Ingestion Run Details page, pick **View All** to see which entities were ingested. + ++ +
+ +Pick an entity from the list to manually validate if it contains the detail you expected. + ++ +
+ + +**Congratulations!** You've successfully set up Looker & LookML as an ingestion source for DataHub! + +*Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!* diff --git a/docs/quick-ingestion-guides/looker/overview.md b/docs/quick-ingestion-guides/looker/overview.md new file mode 100644 index 00000000000000..843d704526bcc4 --- /dev/null +++ b/docs/quick-ingestion-guides/looker/overview.md @@ -0,0 +1,52 @@ +--- +title: Overview +--- +# Looker & LookML Ingestion Guide: Overview + +## What You Will Get Out of This Guide + +This guide will help you set up the Looker & LookML connectors to begin ingesting metadata into DataHub. +Upon completing this guide, you will have a recurring ingestion pipeline to extract metadata from Looker & LookML and load it into DataHub. + +### Looker + +Looker connector will ingest Looker asset types: + +* [Dashboards](https://cloud.google.com/looker/docs/dashboards) +* [Charts](https://cloud.google.com/looker/docs/creating-visualizations) +* [Explores](https://cloud.google.com/looker/docs/reference/param-explore-explore) +* [Schemas](https://developers.looker.com/api/explorer/4.0/methods/Metadata/connection_schemas) +* [Owners of Dashboards](https://cloud.google.com/looker/docs/creating-user-defined-dashboards) + +:::note + +To get complete Looker metadata integration (including Looker views and lineage to the underlying warehouse tables), you must also use the [lookml](https://datahubproject.io/docs/generated/ingestion/sources/looker#module-lookml) connector. + +::: + + +### LookML + +LookMl connector will include the following LookML asset types: + +* [LookML views from model files in a project](https://cloud.google.com/looker/docs/reference/param-view-view) +* [Metadata for dimensions](https://cloud.google.com/looker/docs/reference/param-field-dimension) +* [Metadata for measures](https://cloud.google.com/looker/docs/reference/param-measure-types) +* [Dimension Groups as tag](https://cloud.google.com/looker/docs/reference/param-field-dimension-group) + +:::note + +To get complete Looker metadata integration (including Looker views and lineage to the underlying warehouse tables), you must also use the [looker](https://datahubproject.io/docs/generated/ingestion/sources/looker#module-looker) connector. + +::: + +## Next Steps +Please continue to the [setup guide](setup.md), where we'll describe the prerequisites. + +### Reference + +If you want to ingest metadata from Looker using the DataHub CLI, check out the following resources: +* Learn about CLI Ingestion in the [Introduction to Metadata Ingestion](../../../metadata-ingestion/README.md) +* [Looker Ingestion Source](https://datahubproject.io/docs/generated/ingestion/sources/Looker) + +*Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!* diff --git a/docs/quick-ingestion-guides/looker/setup.md b/docs/quick-ingestion-guides/looker/setup.md new file mode 100644 index 00000000000000..c08de116895ea5 --- /dev/null +++ b/docs/quick-ingestion-guides/looker/setup.md @@ -0,0 +1,156 @@ +--- +title: Setup +--- + +# Looker & LookML Ingestion Guide: Setup + +## Looker Prerequisites + +To configure ingestion from Looker, you'll first have to ensure you have an API key to access the Looker resources. + +### Login To Looker Instance + +Login to your Looker instance(e.g. `https://+ +
+ ++ +
+ +### Create A New Permission Set + +On **Roles Panel**, click **New Permission Set**. + ++ +
+ +Set a name for the new permission set (e.g., *DataHub Connector Permission Set*) and select the following permissions. + ++ +
+ +### Create A Role + +On the **Roles** Panel, click **New Role**. + ++ +
+ +Set the name for the new role (e.g., *DataHub Extractor*) and set the following fields on this window. + +- Set **Permission Set** to permission set created in previous step (i.e *DataHub Connector Permission Set*) +- Set **Model Set** to `All` + +Finally, click **New Role** at the bottom of the page. + ++ +
+ +### Create A New User + +On the **Admin** Panel, click **Users** to open the users panel. + ++ +
+ +Click **Add Users**. + ++ +
+ +On **Adding a new user**, set details in the following fields. + +- Add user's **Email Addresses**. +- Set **Roles** to the role created in previous step (e.g. *DataHub Extractor*) + +Finally, click **Save**. + ++ +
+ +### Create An API Key + +On the **User** Panel, click on the newly created user. + ++ +
+ +Click **Edit Keys** to open the **API Key** Panel. + ++ +
+ +On the **API Key** Panel, click **New API Key** to generate a new **Client ID** and **Client Secret**. ++ +
+ +## LookML Prerequisites + +Follow the below steps to create the GitHub Deploy Key. + +### Generate a private-public SSH key pair + +```bash + ssh-keygen -t rsa -f looker_datahub_deploy_key +``` + +This will typically generate two files like the one below. +* `looker_datahub_deploy_key` (private key) +* `looker_datahub_deploy_key.pub` (public key) + + +### Add Deploy Key to GitHub Repository + +First, log in to [GitHub](https://github.com). + +Navigate to **GitHub Repository** -> **Settings** -> **Deploy Keys** and add a public key (e.g. `looker_datahub_deploy_key.pub`) as deploy key with read access. + ++ +
+ +Make a note of the private key file. You must paste the file's contents into the GitHub Deploy Key field later while [configuring](./configuration.md) ingestion on the DataHub Portal. + +## Next Steps + +Once you've done all the above steps, it's time to move on to [configuring the actual ingestion source](configuration.md) within DataHub. + +_Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!_ \ No newline at end of file diff --git a/metadata-ingestion/.gitignore b/metadata-ingestion/.gitignore index 673c8e0995872b..acc15c45988698 100644 --- a/metadata-ingestion/.gitignore +++ b/metadata-ingestion/.gitignore @@ -8,6 +8,7 @@ bq_credentials.json junit.*.xml /tmp *.bak +custom-package/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index 3b1aae0b24f885..a0fef614528cbe 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -176,7 +176,7 @@ The `deploy` subcommand of the `ingest` command tree allows users to upload thei datahub ingest deploy -n