Skip to content

Commit

Permalink
Merge branch 'master' into ps-ingestion-memory-profile
Browse files Browse the repository at this point in the history
  • Loading branch information
pedro93 authored Sep 26, 2023
2 parents e434e7a + 0a869dd commit fec63b5
Show file tree
Hide file tree
Showing 44 changed files with 1,633 additions and 224 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
echo "full_tag=$(get_tag)-full" >> $GITHUB_OUTPUT
echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
echo "unique_slim_tag=$(get_unique_tag)-slim" >> $GITHUB_OUTPUT
echo "unique_full_tag=$(get_unique_tag)-full" >> $GITHUB_OUTPUT
echo "unique_full_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT
- name: Check whether publishing enabled
id: publish
Expand Down Expand Up @@ -501,7 +501,7 @@ jobs:
platforms: linux/amd64,linux/arm64/v8
- name: Compute DataHub Ingestion (Base-Slim) Tag
id: tag
run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
datahub_ingestion_base_full_build:
name: Build and Push DataHub Ingestion (Base-Full) Docker Image
runs-on: ubuntu-latest
Expand Down Expand Up @@ -567,13 +567,13 @@ jobs:
datahub-ingestion:
- 'docker/datahub-ingestion/**'
- name: Build codegen
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }}
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
run: ./gradlew :metadata-ingestion:codegen
- name: Download Base Image
uses: ishworkh/docker-image-artifact-download@v1
if: ${{ needs.setup.outputs.publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }}
with:
image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }}
image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}
- name: Build and push Slim Image
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
uses: ./.github/actions/docker-custom-build-and-push
Expand All @@ -583,7 +583,7 @@ jobs:
${{ env.DATAHUB_INGESTION_IMAGE }}
build-args: |
BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }}
DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}
RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
APP_ENV=slim
tags: ${{ needs.setup.outputs.slim_tag }}
Expand All @@ -595,7 +595,7 @@ jobs:
platforms: linux/amd64,linux/arm64/v8
- name: Compute Tag
id: tag
run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_slim_tag || 'head' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
datahub_ingestion_slim_scan:
permissions:
contents: read # for actions/checkout to fetch code
Expand Down Expand Up @@ -650,7 +650,7 @@ jobs:
datahub-ingestion:
- 'docker/datahub-ingestion/**'
- name: Build codegen
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }}
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
run: ./gradlew :metadata-ingestion:codegen
- name: Download Base Image
uses: ishworkh/docker-image-artifact-download@v1
Expand Down
5 changes: 5 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,11 @@ subprojects {
}
// https://docs.gradle.org/current/userguide/performance.html
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1

if (project.configurations.getByName("testImplementation").getDependencies()
.any{ it.getName() == "testng" }) {
useTestNG()
}
}

afterEvaluate {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.linkedin.datahub.graphql.resolvers.auth;

import com.datahub.authentication.Authentication;
import com.google.common.collect.ImmutableList;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.TestUtils;
Expand All @@ -8,6 +9,10 @@
import com.linkedin.datahub.graphql.generated.ListAccessTokenResult;
import com.linkedin.entity.client.EntityClient;
import com.linkedin.metadata.Constants;
import com.linkedin.metadata.query.SearchFlags;
import com.linkedin.metadata.query.filter.SortCriterion;
import com.linkedin.metadata.search.SearchEntityArray;
import com.linkedin.metadata.search.SearchResult;
import graphql.schema.DataFetchingEnvironment;
import java.util.Collections;
import org.mockito.Mockito;
Expand Down Expand Up @@ -36,14 +41,17 @@ public void testGetSuccess() throws Exception {
Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input);

final EntityClient mockClient = Mockito.mock(EntityClient.class);
Mockito.when(Mockito.eq(mockClient.filter(
Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME),
Mockito.eq(buildFilter(filters, Collections.emptyList())),
Mockito.notNull(),
Mockito.eq(input.getStart()),
Mockito.eq(input.getCount()),
Mockito.eq(getAuthentication(mockEnv)))))
.thenReturn(null);
final Authentication testAuth = getAuthentication(mockEnv);
Mockito.when(mockClient.search(
Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME),
Mockito.eq(""),
Mockito.eq(buildFilter(filters, Collections.emptyList())),
Mockito.any(SortCriterion.class),
Mockito.eq(input.getStart()),
Mockito.eq(input.getCount()),
Mockito.eq(testAuth),
Mockito.any(SearchFlags.class)))
.thenReturn(new SearchResult().setFrom(0).setNumEntities(0).setPageSize(0).setEntities(new SearchEntityArray()));

final ListAccessTokensResolver resolver = new ListAccessTokensResolver(mockClient);
final ListAccessTokenResult listAccessTokenResult = resolver.get(mockEnv).get();
Expand Down
4 changes: 2 additions & 2 deletions docker/datahub-ingestion-base/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ARG APP_ENV=full
ARG BASE_IMAGE=base

FROM golang:1-alpine3.17 AS binary
FROM golang:1-alpine3.17 AS dockerize-binary

ENV DOCKERIZE_VERSION v0.6.1
WORKDIR /go/src/github.com/jwilder
Expand Down Expand Up @@ -41,7 +41,7 @@ RUN apt-get update && apt-get install -y -qq \
&& rm -rf /var/lib/apt/lists/* /var/cache/apk/*

# compiled against newer golang for security fixes
COPY --from=binary /go/bin/dockerize /usr/local/bin
COPY --from=dockerize-binary /go/bin/dockerize /usr/local/bin

COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt
COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh
Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-ingestion/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Defining environment
ARG APP_ENV=full
ARG BASE_IMAGE=acryldata/datahub-ingestion-base
ARG DOCKER_VERSION=latest
ARG DOCKER_VERSION=head

FROM $BASE_IMAGE:$DOCKER_VERSION as base
USER 0
Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-ingestion/Dockerfile-slim-only
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Defining environment
ARG BASE_IMAGE=acryldata/datahub-ingestion-base
ARG DOCKER_VERSION=latest
ARG DOCKER_VERSION=head-slim

FROM $BASE_IMAGE:$DOCKER_VERSION as base
USER 0
Expand Down
9 changes: 7 additions & 2 deletions docker/postgres-setup/init.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
#!/bin/sh
export PGPASSWORD=$POSTGRES_PASSWORD

POSTGRES_CREATE_DB=${POSTGRES_CREATE_DB:-true}
POSTGRES_CREATE_DB_CONNECTION_DB=${POSTGRES_CREATE_DB_CONNECTION_DB:-postgres}

# workaround create database if not exists, check https://stackoverflow.com/a/36591842
psql -U $POSTGRES_USERNAME -h $POSTGRES_HOST -p $POSTGRES_PORT -tc "SELECT 1 FROM pg_database WHERE datname = '${DATAHUB_DB_NAME}'" | grep -q 1 || psql -U $POSTGRES_USERNAME -h $POSTGRES_HOST -p $POSTGRES_PORT -c "CREATE DATABASE ${DATAHUB_DB_NAME}"
if [ "$POSTGRES_CREATE_DB" = true ]; then
psql -d "$POSTGRES_CREATE_DB_CONNECTION_DB" -U "$POSTGRES_USERNAME" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -tc "SELECT 1 FROM pg_database WHERE datname = '${DATAHUB_DB_NAME}'" | grep -q 1 || psql -d "$POSTGRES_CREATE_DB_CONNECTION_DB" -U "$POSTGRES_USERNAME" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -c "CREATE DATABASE ${DATAHUB_DB_NAME}"
fi

sed -e "s/DATAHUB_DB_NAME/${DATAHUB_DB_NAME}/g" /init.sql | tee -a /tmp/init-final.sql
psql -d $DATAHUB_DB_NAME -U $POSTGRES_USERNAME -h $POSTGRES_HOST -p $POSTGRES_PORT < /tmp/init-final.sql
psql -d "$DATAHUB_DB_NAME" -U "$POSTGRES_USERNAME" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" < /tmp/init-final.sql
4 changes: 2 additions & 2 deletions docs-website/src/pages/docs/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ const quickLinkContent = [
{
title: "Developer Guides",
icon: <CodeTwoTone />,
description: "Interact with DataHub programmatically ",
to: "/docs/cli",
description: "Interact with DataHub programmatically",
to: "/docs/api/datahub-apis",
},
{
title: "Feature Guides",
Expand Down
41 changes: 22 additions & 19 deletions docs/authorization/policies.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,28 +145,31 @@ For example, the following resource filter will apply the policy to datasets, ch

```json
{
"resource": {
"criteria": [
{
"field": "resource_type",
"values": [
"dataset",
"chart",
"dashboard"
],
"condition": "EQUALS"
},
{
"field": "domain",
"values": [
"urn:li:domain:domain1"
],
"condition": "EQUALS"
"resources": {
"filter": {
"criteria": [
{
"field": "RESOURCE_TYPE",
"condition": "EQUALS",
"values": [
"dataset",
"chart",
"dashboard"
]
},
{
"field": "DOMAIN",
"values": [
"urn:li:domain:domain1"
],
"condition": "EQUALS"
}
]
}
]
}
}
}
```
Where `resources` is inside the `info` aspect of a Policy.

Supported fields are as follows

Expand Down
1 change: 0 additions & 1 deletion docs/datahub_lite.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import TabItem from '@theme/TabItem';

DataHub Lite is a lightweight embeddable version of DataHub with no external dependencies. It is intended to enable local developer tooling use-cases such as simple access to metadata for scripts and other tools.
DataHub Lite is compatible with the DataHub metadata format and all the ingestion connectors that DataHub supports.
It was built as a reaction to [recap](https://github.com/recap-cloud/recap) to prove that a similar lightweight system could be built within DataHub quite easily.
Currently DataHub Lite uses DuckDB under the covers as its default storage layer, but that might change in the future.

## Features
Expand Down
Loading

0 comments on commit fec63b5

Please sign in to comment.