Skip to content

Commit

Permalink
Merge branch 'linkedin:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
shirshanka authored Dec 20, 2021
2 parents bb61e48 + 77e3641 commit 2a9370a
Show file tree
Hide file tree
Showing 96 changed files with 6,189 additions and 1,114 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
- [Geotab](https://www.geotab.com)
- [Grofers](https://grofers.com)
- [hipages](https://hipages.com.au/)
- [IOMED](https://iomed.health)
- [Klarna](https://www.klarna.com)
- [LinkedIn](http://linkedin.com)
- [Peloton](https://www.onepeloton.com)
Expand Down
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ project.ext.externalDependency = [
// avro-serde includes dependencies for `kafka-avro-serializer` `kafka-schema-registry-client` and `avro`
'kafkaAvroSerde': 'io.confluent:kafka-streams-avro-serde:5.5.1',
'kafkaClients': 'org.apache.kafka:kafka-clients:2.3.0',
'logbackClassic': 'ch.qos.logback:logback-classic:1.2.3',
'logbackClassic': 'ch.qos.logback:logback-classic:1.2.9',
'lombok': 'org.projectlombok:lombok:1.18.12',
'mariadbConnector': 'org.mariadb.jdbc:mariadb-java-client:2.6.0',
'mavenArtifact': "org.apache.maven:maven-artifact:$mavenVersion",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public CompletableFuture<List<TimeSeriesAspect>> get(DataFetchingEnvironment env
// Step 1: Get aspects.
List<EnvelopedAspect> aspects =
_client.getTimeseriesAspectValues(urn, _entityName, _aspectName, maybeStartTimeMillis, maybeEndTimeMillis,
maybeLimit, context.getAuthentication());
maybeLimit, null, null, context.getAuthentication());

// Step 2: Bind profiles into GraphQL strong types.
return aspects.stream().map(_aspectMapper::apply).collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ services:
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- ES_JAVA_OPTS=-Xms256m -Xmx256m
- ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true
healthcheck:
retries: 4
start_period: 2m
Expand Down
8 changes: 4 additions & 4 deletions metadata-dao-impl/kafka-producer/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ dependencies {
testCompile externalDependency.mockito

constraints {
implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
because("previous versions are vulnerable to CVE-2021-44228")
implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
because("previous versions are vulnerable to CVE-2021-45105")
}
implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
because("previous versions are vulnerable to CVE-2021-44228")
implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
because("previous versions are vulnerable to CVE-2021-45105")
}
}
}
8 changes: 4 additions & 4 deletions metadata-events/mxe-registration/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ dependencies {
avroOriginal project(path: ':metadata-models', configuration: 'avroSchema')

constraints {
implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
because("previous versions are vulnerable to CVE-2021-44228")
implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
because("previous versions are vulnerable to CVE-2021-45105")
}
implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
because("previous versions are vulnerable to CVE-2021-44228")
implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
because("previous versions are vulnerable to CVE-2021-45105")
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions metadata-events/mxe-utils-avro-1.7/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ dependencies {
testCompile project(':metadata-testing:metadata-test-utils')

constraints {
implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
because("previous versions are vulnerable to CVE-2021-44228")
implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
because("previous versions are vulnerable to CVE-2021-45105")
}
implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
because("previous versions are vulnerable to CVE-2021-44228")
implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
because("previous versions are vulnerable to CVE-2021-45105")
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions metadata-ingestion-examples/common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ dependencies {
runtime externalDependency.logbackClassic

constraints {
implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
because("previous versions are vulnerable to CVE-2021-44228")
implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
because("previous versions are vulnerable to CVE-2021-45105")
}
implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
because("previous versions are vulnerable to CVE-2021-44228")
implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
because("previous versions are vulnerable to CVE-2021-45105")
}
}
}
8 changes: 4 additions & 4 deletions metadata-ingestion-examples/kafka-etl/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ dependencies {
runtime externalDependency.logbackClassic

constraints {
implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
because("previous versions are vulnerable to CVE-2021-44228")
implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
because("previous versions are vulnerable to CVE-2021-45105")
}
implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
because("previous versions are vulnerable to CVE-2021-44228")
implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
because("previous versions are vulnerable to CVE-2021-45105")
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions metadata-ingestion-examples/mce-cli/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ dependencies {
annotationProcessor externalDependency.picocli

constraints {
implementation("org.apache.logging.log4j:log4j-core:2.15.0") {
because("previous versions are vulnerable to CVE-2021-44228")
implementation("org.apache.logging.log4j:log4j-core:2.17.0") {
because("previous versions are vulnerable to CVE-2021-45105")
}
implementation("org.apache.logging.log4j:log4j-api:2.15.0") {
because("previous versions are vulnerable to CVE-2021-44228")
implementation("org.apache.logging.log4j:log4j-api:2.17.0") {
because("previous versions are vulnerable to CVE-2021-45105")
}
}

Expand Down
9 changes: 5 additions & 4 deletions metadata-ingestion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,6 @@ sink:
server: "http://localhost:8080"
```
We automatically expand environment variables in the config,
similar to variable substitution in GNU bash or in docker-compose files. For details, see
https://docs.docker.com/compose/compose-file/compose-file-v2/#variable-substitution.
Running a recipe is quite easy.
```shell
Expand All @@ -177,6 +173,11 @@ datahub ingest -c ./examples/recipes/mssql_to_datahub.yml

A number of recipes are included in the [examples/recipes](./examples/recipes) directory. For full info and context on each source and sink, see the pages described in the [table of plugins](#installing-plugins).

### Handling sensitive information in recipes

We automatically expand environment variables in the config (e.g. `${MSSQL_PASSWORD}`),
similar to variable substitution in GNU bash or in docker-compose files. For details, see
https://docs.docker.com/compose/compose-file/compose-file-v2/#variable-substitution. This environment variable substitution should be used to mask sensitive information in recipe files. As long as you can get env variables securely to the ingestion process there would not be any need to store sensitive information in recipes.
## Transformations

If you'd like to modify data before it reaches the ingestion sinks – for instance, adding additional owners or tags – you can use a transformer to write your own module and integrate it with DataHub.
Expand Down
10 changes: 7 additions & 3 deletions metadata-ingestion/examples/recipes/mode_to_datahub.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
source:
type: "mode"
config:
token: 9fa6a90fcd33
password: a03bcbc011d6f77c585f5682
token: token
password: password
connect_uri: https://app.mode.com/
workspace: "petabloc"
workspace: "workspace"
default_schema: "public"
owner_username_instead_of_email: False
api_options:
retry_backoff_multiplier: 2
max_retry_interval: 10
max_attempts: 5

# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
sink:
Expand Down
15 changes: 9 additions & 6 deletions metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def get_long_description():
base_requirements = {
# Compatability.
"dataclasses>=0.6; python_version < '3.7'",
"typing_extensions>=3.10.0.2",
"typing_extensions>=3.10.0.2,<4",
"mypy_extensions>=0.4.3",
# Actual dependencies.
"typing-inspect",
Expand Down Expand Up @@ -97,7 +97,7 @@ def get_long_description():
"bigquery": sql_common | bigquery_common | {"pybigquery >= 0.6.0"},
"bigquery-usage": bigquery_common | {"cachetools"},
"datahub-business-glossary": set(),
"dbt": set(),
"dbt": {"requests"},
"druid": sql_common | {"pydruid>=0.6.2"},
"feast": {"docker"},
"glue": aws_common,
Expand All @@ -124,7 +124,8 @@ def get_long_description():
"oracle": sql_common | {"cx_Oracle"},
"postgres": sql_common | {"psycopg2-binary", "GeoAlchemy2"},
"redash": {"redash-toolbelt", "sql-metadata"},
"redshift": sql_common | {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2", "sqllineage"},
"redshift": sql_common
| {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2", "sqllineage"},
"redshift-usage": sql_common
| {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"},
"sagemaker": aws_common,
Expand All @@ -147,7 +148,6 @@ def get_long_description():
"sqlalchemy-trino"
},
"nifi": {"requests"},

}

all_exclude_plugins: Set[str] = {
Expand Down Expand Up @@ -183,7 +183,8 @@ def get_long_description():
"flake8>=3.8.3",
"flake8-tidy-imports>=4.3.0",
"isort>=5.7.0",
"mypy>=0.901",
# Waiting for https://github.com/samuelcolvin/pydantic/pull/3175 before allowing mypy 0.920.
"mypy>=0.901,<0.920",
"pytest>=6.2.2",
"pytest-cov>=2.8.1",
"pytest-docker>=0.10.3",
Expand Down Expand Up @@ -303,14 +304,16 @@ def get_long_description():
"trino = datahub.ingestion.source.sql.trino:TrinoSource",
"starburst-trino-usage = datahub.ingestion.source.usage.starburst_trino_usage:TrinoUsageSource",
"nifi = datahub.ingestion.source.nifi:NifiSource",

],
"datahub.ingestion.sink.plugins": [
"file = datahub.ingestion.sink.file:FileSink",
"console = datahub.ingestion.sink.console:ConsoleSink",
"datahub-kafka = datahub.ingestion.sink.datahub_kafka:DatahubKafkaSink",
"datahub-rest = datahub.ingestion.sink.datahub_rest:DatahubRestSink",
],
"datahub.ingestion.state_provider.plugins": [
"datahub = datahub.ingestion.source.state_provider.datahub_ingestion_state_provider:DatahubIngestionStateProvider",
],
"apache_airflow_provider": ["provider_info=datahub_provider:get_provider_info"],
}

Expand Down
Loading

0 comments on commit 2a9370a

Please sign in to comment.