Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Nov 22, 2023
2 parents d6b848a + 15e68bb commit 1f74fca
Show file tree
Hide file tree
Showing 97 changed files with 1,587 additions and 2,565 deletions.
11 changes: 10 additions & 1 deletion .github/actions/docker-custom-build-and-push/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,20 @@ runs:
push: false
cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }}
cache-to: type=inline
- name: Single Tag
if: ${{ inputs.publish != 'true' }}
shell: bash
run: |
TAGS="""
${{ steps.docker_meta.outputs.tags }}
"""
echo "SINGLE_TAG=$(echo $TAGS | tr '\n' ' ' | awk -F' ' '{ print $1 }')" >> $GITHUB_OUTPUT
id: single_tag
- name: Upload image locally for testing (if not publishing)
uses: ishworkh/docker-image-artifact-upload@v1
if: ${{ inputs.publish != 'true' }}
with:
image: ${{ steps.docker_meta.outputs.tags }}
image: ${{ steps.single_tag.outputs.SINGLE_TAG }}

# Code for building multi-platform images and pushing to Docker Hub.
- name: Set up QEMU
Expand Down
6 changes: 3 additions & 3 deletions .github/scripts/docker_helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ export SHORT_SHA=$(get_short_sha)
echo "SHORT_SHA: $SHORT_SHA"

function get_tag {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g'),${SHORT_SHA}
}

function get_tag_slim {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim\,${SHORT_SHA}-slim,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g'),${SHORT_SHA}-slim
}

function get_tag_full {
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full\,${SHORT_SHA}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g'),${SHORT_SHA}-full
}

function get_python_docker_release_v {
Expand Down
26 changes: 26 additions & 0 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,26 @@ jobs:
unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
publish: ${{ steps.publish.outputs.publish }}
python_release_version: ${{ steps.tag.outputs.python_release_version }}
short_sha: ${{ steps.tag.outputs.short_sha }}
branch_name: ${{ steps.tag.outputs.branch_name }}
repository_name: ${{ steps.tag.outputs.repository_name }}
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Compute Tag
id: tag
run: |
source .github/scripts/docker_helpers.sh
echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT
echo "tag=$(get_tag)" >> $GITHUB_OUTPUT
echo "slim_tag=$(get_tag_slim)" >> $GITHUB_OUTPUT
echo "full_tag=$(get_tag_full)" >> $GITHUB_OUTPUT
echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
echo "unique_slim_tag=$(get_unique_tag_slim)" >> $GITHUB_OUTPUT
echo "unique_full_tag=$(get_unique_tag_full)" >> $GITHUB_OUTPUT
echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT
echo "branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT
echo "repository_name=${GITHUB_REPOSITORY#*/}" >> $GITHUB_OUTPUT
- name: Check whether publishing enabled
id: publish
env:
Expand Down Expand Up @@ -866,3 +872,23 @@ jobs:
job-status: ${{ job.status }}
slack-bot-token: ${{ secrets.SLACK_BOT_TOKEN }}
channel: github-activities
deploy_datahub_head:
name: Deploy to Datahub HEAD
runs-on: ubuntu-latest
needs:
[
setup,
smoke_test
]
steps:
- uses: aws-actions/configure-aws-credentials@v1
if: ${{ needs.setup.outputs.publish != 'false' }}
with:
aws-access-key-id: ${{ secrets.AWS_SQS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SQS_ACCESS_KEY }}
aws-region: us-west-2
- uses: isbang/[email protected]
if: ${{ needs.setup.outputs.publish != 'false' }}
with:
sqs-url: ${{ secrets.DATAHUB_HEAD_SYNC_QUEUE }}
message: '{ "command": "git-sync", "args" : {"repoName": "${{ needs.setup.outputs.repository_name }}", "repoOrg": "${{ github.repository_owner }}", "repoBranch": "${{ needs.setup.outputs.branch_name }}", "repoShaShort": "${{ needs.setup.outputs.short_sha }}" }}'
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ venv.bak/
dmypy.json
MANIFEST
*.pyc
.python-version

# Generated files
**/bin
Expand Down
28 changes: 15 additions & 13 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ buildscript {
ext.logbackClassic = '1.2.12'
ext.hadoop3Version = '3.3.5'
ext.kafkaVersion = '2.3.0'
ext.hazelcastVersion = '5.3.1'
ext.hazelcastVersion = '5.3.6'
ext.ebeanVersion = '12.16.1'

ext.docker_registry = 'linkedin'
Expand Down Expand Up @@ -53,7 +53,7 @@ project.ext.spec = [
'pegasus' : [
'd2' : 'com.linkedin.pegasus:d2:' + pegasusVersion,
'data' : 'com.linkedin.pegasus:data:' + pegasusVersion,
'dataAvro1_6' : 'com.linkedin.pegasus:data-avro-1_6:' + pegasusVersion,
'dataAvro': 'com.linkedin.pegasus:data-avro:' + pegasusVersion,
'generator': 'com.linkedin.pegasus:generator:' + pegasusVersion,
'restliCommon' : 'com.linkedin.pegasus:restli-common:' + pegasusVersion,
'restliClient' : 'com.linkedin.pegasus:restli-client:' + pegasusVersion,
Expand All @@ -71,22 +71,22 @@ project.ext.externalDependency = [
'assertJ': 'org.assertj:assertj-core:3.11.1',
'avro': 'org.apache.avro:avro:1.11.3',
'avroCompiler': 'org.apache.avro:avro-compiler:1.11.3',
'awsGlueSchemaRegistrySerde': 'software.amazon.glue:schema-registry-serde:1.1.10',
'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:1.1.1',
'awsSecretsManagerJdbc': 'com.amazonaws.secretsmanager:aws-secretsmanager-jdbc:1.0.8',
'awsPostgresIamAuth': 'software.amazon.jdbc:aws-advanced-jdbc-wrapper:1.0.0',
'awsGlueSchemaRegistrySerde': 'software.amazon.glue:schema-registry-serde:1.1.17',
'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:1.1.9',
'awsSecretsManagerJdbc': 'com.amazonaws.secretsmanager:aws-secretsmanager-jdbc:1.0.13',
'awsPostgresIamAuth': 'software.amazon.jdbc:aws-advanced-jdbc-wrapper:1.0.2',
'awsRds':'software.amazon.awssdk:rds:2.18.24',
'cacheApi' : 'javax.cache:cache-api:1.1.0',
'cacheApi': 'javax.cache:cache-api:1.1.0',
'commonsCli': 'commons-cli:commons-cli:1.5.0',
'commonsIo': 'commons-io:commons-io:2.4',
'commonsLang': 'commons-lang:commons-lang:2.6',
'commonsText': 'org.apache.commons:commons-text:1.10.0',
'commonsCollections': 'commons-collections:commons-collections:3.2.2',
'data' : 'com.linkedin.pegasus:data:' + pegasusVersion,
'caffeine': 'com.github.ben-manes.caffeine:caffeine:3.1.8',
'datastaxOssNativeProtocol': 'com.datastax.oss:native-protocol:1.5.1',
'datastaxOssCore': 'com.datastax.oss:java-driver-core:4.14.1',
'datastaxOssQueryBuilder': 'com.datastax.oss:java-driver-query-builder:4.14.1',
'dgraph4j' : 'io.dgraph:dgraph4j:21.03.1',
'dgraph4j' : 'io.dgraph:dgraph4j:21.12.0',
'dropwizardMetricsCore': 'io.dropwizard.metrics:metrics-core:4.2.3',
'dropwizardMetricsJmx': 'io.dropwizard.metrics:metrics-jmx:4.2.3',
'ebean': 'io.ebean:ebean:' + ebeanVersion,
Expand Down Expand Up @@ -131,7 +131,7 @@ project.ext.externalDependency = [
'jsonPatch': 'com.github.java-json-tools:json-patch:1.13',
'jsonSimple': 'com.googlecode.json-simple:json-simple:1.1.1',
'jsonSmart': 'net.minidev:json-smart:2.4.9',
'json': 'org.json:json:20230227',
'json': 'org.json:json:20231013',
'junit': 'junit:junit:4.13.2',
'junitJupiterApi': "org.junit.jupiter:junit-jupiter-api:$junitJupiterVersion",
'junitJupiterParams': "org.junit.jupiter:junit-jupiter-params:$junitJupiterVersion",
Expand All @@ -140,7 +140,7 @@ project.ext.externalDependency = [
'kafkaAvroSerde': 'io.confluent:kafka-streams-avro-serde:5.5.1',
'kafkaAvroSerializer': 'io.confluent:kafka-avro-serializer:5.1.4',
'kafkaClients': "org.apache.kafka:kafka-clients:$kafkaVersion",
'snappy': 'org.xerial.snappy:snappy-java:1.1.10.3',
'snappy': 'org.xerial.snappy:snappy-java:1.1.10.4',
'logbackClassic': "ch.qos.logback:logback-classic:$logbackClassic",
'slf4jApi': "org.slf4j:slf4j-api:$slf4jVersion",
'log4jCore': "org.apache.logging.log4j:log4j-core:$log4jVersion",
Expand All @@ -164,6 +164,7 @@ project.ext.externalDependency = [
'opentelemetryAnnotations': 'io.opentelemetry:opentelemetry-extension-annotations:' + openTelemetryVersion,
'opentracingJdbc':'io.opentracing.contrib:opentracing-jdbc:0.2.15',
'parquet': 'org.apache.parquet:parquet-avro:1.12.3',
'parquetHadoop': 'org.apache.parquet:parquet-hadoop:1.13.1',
'picocli': 'info.picocli:picocli:4.5.0',
'playCache': "com.typesafe.play:play-cache_2.12:$playVersion",
'playWs': 'com.typesafe.play:play-ahc-ws-standalone_2.12:2.1.10',
Expand All @@ -178,6 +179,7 @@ project.ext.externalDependency = [
'playPac4j': 'org.pac4j:play-pac4j_2.12:9.0.2',
'postgresql': 'org.postgresql:postgresql:42.3.8',
'protobuf': 'com.google.protobuf:protobuf-java:3.19.6',
'grpcProtobuf': 'io.grpc:grpc-protobuf:1.53.0',
'rangerCommons': 'org.apache.ranger:ranger-plugins-common:2.3.0',
'reflections': 'org.reflections:reflections:0.9.9',
'resilience4j': 'io.github.resilience4j:resilience4j-retry:1.7.1',
Expand All @@ -201,7 +203,7 @@ project.ext.externalDependency = [
'springBootStarterJetty': "org.springframework.boot:spring-boot-starter-jetty:$springBootVersion",
'springBootStarterCache': "org.springframework.boot:spring-boot-starter-cache:$springBootVersion",
'springBootStarterValidation': "org.springframework.boot:spring-boot-starter-validation:$springBootVersion",
'springKafka': 'org.springframework.kafka:spring-kafka:2.8.11',
'springKafka': 'org.springframework.kafka:spring-kafka:2.9.13',
'springActuator': "org.springframework.boot:spring-boot-starter-actuator:$springBootVersion",
'swaggerAnnotations': 'io.swagger.core.v3:swagger-annotations:2.2.15',
'swaggerCli': 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.46',
Expand Down Expand Up @@ -263,7 +265,7 @@ subprojects {
plugins.withType(JavaPlugin) {
dependencies {
constraints {
implementation('io.netty:netty-all:4.1.86.Final')
implementation('io.netty:netty-all:4.1.100.Final')
implementation('org.apache.commons:commons-compress:1.21')
implementation('org.apache.velocity:velocity-engine-core:2.3')
implementation('org.hibernate:hibernate-validator:6.0.20.Final')
Expand Down
16 changes: 12 additions & 4 deletions datahub-frontend/app/auth/NativeAuthenticationConfigs.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,26 @@
public class NativeAuthenticationConfigs {

public static final String NATIVE_AUTHENTICATION_ENABLED_CONFIG_PATH = "auth.native.enabled";
public static final String NATIVE_AUTHENTICATION_ENFORCE_VALID_EMAIL_ENABLED_CONFIG_PATH = "auth.native.signUp.enforceValidEmail";

private Boolean _isEnabled = true;
private Boolean _isEnforceValidEmailEnabled = true;

public NativeAuthenticationConfigs(final com.typesafe.config.Config configs) {
if (configs.hasPath(NATIVE_AUTHENTICATION_ENABLED_CONFIG_PATH)
&& Boolean.FALSE.equals(
Boolean.parseBoolean(configs.getValue(NATIVE_AUTHENTICATION_ENABLED_CONFIG_PATH).toString()))) {
_isEnabled = false;
if (configs.hasPath(NATIVE_AUTHENTICATION_ENABLED_CONFIG_PATH)) {
_isEnabled = Boolean.parseBoolean(configs.getValue(NATIVE_AUTHENTICATION_ENABLED_CONFIG_PATH).toString());
}
if (configs.hasPath(NATIVE_AUTHENTICATION_ENFORCE_VALID_EMAIL_ENABLED_CONFIG_PATH)) {
_isEnforceValidEmailEnabled =
Boolean.parseBoolean(configs.getValue(NATIVE_AUTHENTICATION_ENFORCE_VALID_EMAIL_ENABLED_CONFIG_PATH).toString());
}
}

public boolean isNativeAuthenticationEnabled() {
return _isEnabled;
}

public boolean isEnforceValidEmailEnabled() {
return _isEnforceValidEmailEnabled;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.pac4j.play.store.PlaySessionStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import play.data.validation.Constraints;
import play.libs.Json;
import play.mvc.Controller;
import play.mvc.Http;
Expand Down Expand Up @@ -203,6 +204,13 @@ public Result signUp(Http.Request request) {
JsonNode invalidCredsJson = Json.newObject().put("message", "Email must not be empty.");
return Results.badRequest(invalidCredsJson);
}
if (_nativeAuthenticationConfigs.isEnforceValidEmailEnabled()) {
Constraints.EmailValidator emailValidator = new Constraints.EmailValidator();
if (!emailValidator.isValid(email)) {
JsonNode invalidCredsJson = Json.newObject().put("message", "Email must not be empty.");
return Results.badRequest(invalidCredsJson);
}
}

if (StringUtils.isBlank(password)) {
JsonNode invalidCredsJson = Json.newObject().put("message", "Password must not be empty.");
Expand Down
4 changes: 4 additions & 0 deletions datahub-frontend/conf/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,10 @@ auth.oidc.preferredJwsAlgorithm = ${?AUTH_OIDC_PREFERRED_JWS_ALGORITHM} # Which
#
auth.jaas.enabled = ${?AUTH_JAAS_ENABLED}
auth.native.enabled = ${?AUTH_NATIVE_ENABLED}

# Enforces the usage of a valid email for user sign up
auth.native.signUp.enforceValidEmail = true
auth.native.signUp.enforceValidEmail = ${?ENFORCE_VALID_EMAIL}
#
# To disable all authentication to the app, and proxy all users through a master "datahub" account, make sure that,
# jaas, native and oidc auth are disabled:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ public CompletableFuture<SearchResults> get(DataFetchingEnvironment environment)
}

// 2. Get list of entities that we should query based on filters or assets from aspect.
List<String> entitiesToQuery = assetUrns.stream().map(Urn::getEntityType).collect(Collectors.toList());
List<String> entitiesToQuery = assetUrns.stream().map(Urn::getEntityType).distinct().collect(Collectors.toList());


final List<EntityType> inputEntityTypes = (input.getTypes() == null || input.getTypes().isEmpty()) ? ImmutableList.of() : input.getTypes();
final List<String> inputEntityNames = inputEntityTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList());
final List<String> inputEntityNames = inputEntityTypes.stream().map(EntityTypeMapper::getName).distinct().collect(Collectors.toList());

final List<String> finalEntityNames = inputEntityNames.size() > 0 ? inputEntityNames : entitiesToQuery;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public static List<MatchedField> getMatchedFieldEntry(List<com.linkedin.metadata
Urn urn = Urn.createFromString(field.getValue());
matchedField.setEntity(UrnToEntityMapper.map(urn));
} catch (URISyntaxException e) {
log.warn("Failed to create urn from MatchedField value: {}", field.getValue(), e);
log.debug("Failed to create urn from MatchedField value: {}", field.getValue());
}
}
return matchedField;
Expand Down
10 changes: 9 additions & 1 deletion datahub-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ dependencies {
runtimeOnly externalDependency.mysqlConnector
runtimeOnly externalDependency.postgresql

implementation externalDependency.awsMskIamAuth
implementation(externalDependency.awsMskIamAuth) {
exclude group: 'software.amazon.awssdk', module: 'third-party-jackson-core'
}

annotationProcessor externalDependency.lombok
annotationProcessor externalDependency.picocli
Expand All @@ -75,6 +77,12 @@ dependencies {
testImplementation externalDependency.mockito
testImplementation externalDependency.testng
testRuntimeOnly externalDependency.logbackClassic

constraints {
implementation(implementation externalDependency.parquetHadoop) {
because("CVE-2022-42003")
}
}
}

bootJar {
Expand Down
3 changes: 2 additions & 1 deletion datahub-web-react/.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
PUBLIC_URL=/assets
REACT_APP_THEME_CONFIG=theme_light.config.json
SKIP_PREFLIGHT_CHECK=true
BUILD_PATH=build/yarn
BUILD_PATH=build/yarn
REACT_APP_PROXY_TARGET=http://localhost:9002
8 changes: 8 additions & 0 deletions datahub-web-react/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ need to be deployed, still at `http://localhost:9002`, to service GraphQL API re

Optionally you could also start the app with the mock server without running the docker containers by executing `yarn start:mock`. See [here](src/graphql-mock/fixtures/searchResult/userSearchResult.ts#L6) for available login users.

### Testing your customizations

There is two options to test your customizations:
* **Option 1**: Initialize the docker containers with the `quickstart.sh` script (or if any custom docker-compose file) and then run `yarn start` in this directory. This will start a forwarding server at `localhost:3000` that will use the `datahub-frontend` server at `http://localhost:9002` to fetch real data.
* **Option 2**: Change the environment variable `REACT_APP_PROXY_TARGET` in the `.env` file to point to your `datahub-frontend` server (ex: https://my_datahub_host.com) and then run `yarn start` in this directory. This will start a forwarding server at `localhost:3000` that will use the `datahub-frontend` server at some domain to fetch real data.

The option 2 is useful if you want to test your React customizations without having to run the hole DataHub stack locally. However, if you changed other components of the DataHub stack, you will need to run the hole stack locally (building the docker images) and use the option 1.

### Functional testing

In order to start a server and run frontend unit tests using react-testing-framework, run:
Expand Down
6 changes: 3 additions & 3 deletions datahub-web-react/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ node {
}

// Version of node to use.
version = '16.8.0'
version = '21.2.0'

// Version of Yarn to use.
yarnVersion = '1.22.0'
yarnVersion = '1.22.1'

// Base URL for fetching node distributions (set nodeDistBaseUrl if you have a mirror).
if (project.hasProperty('nodeDistBaseUrl')) {
Expand Down Expand Up @@ -73,7 +73,7 @@ task yarnBuild(type: YarnTask, dependsOn: [yarnInstall, yarnTest, yarnLint]) {
}

task yarnQuickBuild(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) {
environment = [NODE_OPTIONS: "--max-old-space-size=3072"]
environment = [NODE_OPTIONS: "--max-old-space-size=3072 --openssl-legacy-provider"]
args = ['run', 'build']
}

Expand Down
2 changes: 1 addition & 1 deletion datahub-web-react/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
"start:mock": "yarn run generate && BROWSER=none REACT_APP_MOCK=true craco start",
"start:e2e": "REACT_APP_MOCK=cy BROWSER=none PORT=3010 craco start",
"ec2-dev": "yarn run generate && CI=true;export CI;BROWSER=none craco start",
"build": "yarn run generate && CI=false REACT_APP_MOCK=false craco build && rm -rf dist/ && cp -r build/yarn/ dist/ && rm -r build/yarn/",
"build": "yarn run generate && NODE_OPTIONS='--max-old-space-size=3072 --openssl-legacy-provider' CI=false REACT_APP_MOCK=false craco build && rm -rf dist/ && cp -r build/yarn/ dist/ && rm -r build/yarn/",
"test": "craco test",
"pretest:e2e:ci": "yarn generate",
"test:e2e": "start-server-and-test start:e2e 3010",
Expand Down
Loading

0 comments on commit 1f74fca

Please sign in to comment.