Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Dec 3, 2024
2 parents 23988df + e19af9e commit 629db58
Show file tree
Hide file tree
Showing 85 changed files with 1,753 additions and 902 deletions.
1 change: 1 addition & 0 deletions datahub-graphql-core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ plugins {
id "io.github.kobylynskyi.graphql.codegen" version "4.1.1"
}

apply from: '../gradle/coverage/java-coverage.gradle'

dependencies {
implementation project(':metadata-service:restli-client-api')
Expand Down
1 change: 1 addition & 0 deletions datahub-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ plugins {
}

apply from: "../gradle/versioning/versioning.gradle"
apply from: "../gradle/coverage/java-coverage.gradle"

ext {
docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
Expand Down
7 changes: 7 additions & 0 deletions docs/managed-datahub/release-notes/v_0_3_7.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ If you are using an older CLI/SDK version, then please upgrade it. This applies
## Release Changelog
---

### v0.3.7.5

- [GMS] Fix upstream lineage patching when path contained encoded slash
- [UI] Fix merging siblings schema with v1 and v2 fields
- [UI] Fix display nullable in schema field drawer
- [Ingestion] Reduce Data Product write volume from unset side-effect

### v0.3.7.4

- [#11935](https://github.com/datahub-project/datahub/pull/11935) - Added environment variable for enabling stricter URN validation rules `STRICT_URN_VALIDATION_ENABLED` [[1](https://datahubproject.io/docs/what/urn/#restrictions)].
Expand Down
3 changes: 3 additions & 0 deletions entity-registry/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ plugins {
id 'java-test-fixtures'
}

apply from: "../gradle/coverage/java-coverage.gradle"

dependencies {
implementation spec.product.pegasus.data
implementation spec.product.pegasus.generator
Expand Down Expand Up @@ -53,3 +55,4 @@ dependencies {
testFixturesAnnotationProcessor externalDependency.lombok
}
compileTestJava.dependsOn tasks.getByPath(':entity-registry:custom-test-model:modelDeploy')

Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,8 @@ public void testLargePatchStandard() throws Exception {
UpstreamLineage result = upstreamLineageTemplate.applyPatch(upstreamLineage, jsonPatch);
long end = System.currentTimeMillis();
assertTrue(
end - start < 10000,
String.format("Expected less then 10 seconds patch actual %s ms", end - start));
end - start < 20000,
String.format("Expected less then 20 seconds patch actual %s ms", end - start));

assertEquals(result.getUpstreams().size(), 187, "Expected 1 less upstream");
assertEquals(result.getFineGrainedLineages().size(), 607);
Expand Down
31 changes: 31 additions & 0 deletions gradle/coverage/java-coverage.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apply plugin: "jacoco"

jacoco {
toolVersion = "0.8.12"
}

/*
These need to run after evaluation since jacoco plugin alters the test task based on the included test
lib dependencies defined in each subproject (junit or testNG)
*/
afterEvaluate {
test {
finalizedBy jacocoTestReport
}

jacocoTestReport {
dependsOn test
reports {
xml {
required = true
/*
Tools that aggregate and analyse coverage tools search for the coverage result files. Keeping them under one
folder will minimize the time spent searching through the full source tree.
*/
outputLocation = rootProject.layout.buildDirectory.file("coverage-reports/jacoco-${project.name}.xml")
}
csv.required = false
html.required = false
}
}
}
18 changes: 18 additions & 0 deletions gradle/coverage/python-coverage.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//coverage related args to be passed to pytest
ext.get_coverage_args = { test_name = "" ->

def coverage_file_name = "pycov-${project.name}${test_name}.xml"

/*
Tools that aggregate and analyse coverage tools search for the coverage result files. Keeping them under one folder
will minimize the time spent searching through the full source tree.
*/
def base_path = "${rootProject.buildDir}/coverage-reports"

/*
--cov=src was added via setup.cfg in many of the python projects but for some reason, was not getting picked up
consistently, so adding it explicitly. Centralizing these params would make it easier to adjust them for all python
projects (with overrides being in the sub-project build script.)
*/
return "--cov-report xml:${base_path}/${coverage_file_name} --cov=src"
}
2 changes: 2 additions & 0 deletions ingestion-scheduler/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plugins {
id 'java'
}

apply from: "../gradle/coverage/java-coverage.gradle"

dependencies {
implementation project(path: ':metadata-models')
implementation project(path: ':metadata-io')
Expand Down
3 changes: 3 additions & 0 deletions li-utils/build.gradle
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@

plugins {
id 'java-library'
id 'pegasus'
}

apply from: "../gradle/coverage/java-coverage.gradle"


dependencies {
api spec.product.pegasus.data
Expand Down
1 change: 1 addition & 0 deletions metadata-auth/auth-api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ apply plugin: 'signing'
apply plugin: 'maven-publish'
apply plugin: 'io.codearte.nexus-staging'
apply from: '../../metadata-integration/java/versioning.gradle'
apply from: '../../gradle/coverage/java-coverage.gradle'

jar {
archiveClassifier = "lib"
Expand Down
2 changes: 2 additions & 0 deletions metadata-dao-impl/kafka-producer/build.gradle
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
apply plugin: 'java'

apply from: '../../gradle/coverage/java-coverage.gradle'

dependencies {
implementation project(':metadata-events:mxe-avro')
implementation project(':metadata-events:mxe-registration')
Expand Down
2 changes: 2 additions & 0 deletions metadata-events/mxe-utils-avro/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ plugins {
id 'pegasus'
}

apply from: "../../gradle/coverage/java-coverage.gradle"

dependencies {
api project(':metadata-events:mxe-avro')
api project(':metadata-models')
Expand Down
4 changes: 3 additions & 1 deletion metadata-ingestion-modules/airflow-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plugins {
id 'base'
}

apply from: "../../gradle/coverage/python-coverage.gradle"

ext {
python_executable = 'python3'
venv_name = 'venv'
Expand Down Expand Up @@ -97,7 +99,7 @@ task testQuick(type: Exec, dependsOn: installTest) {
inputs.files(project.fileTree(dir: "tests/"))
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest --cov-config=setup.cfg --cov-report xml:coverage_quick.xml -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
"pytest --cov-config=setup.cfg ${get_coverage_args('quick')} -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import random
import signal
import subprocess
import textwrap
import time
from typing import Any, Iterator, Sequence

Expand Down Expand Up @@ -110,6 +111,48 @@ def _wait_for_dag_finish(
raise NotReadyError(f"DAG has not finished yet: {dag_run['state']}")


def _dump_dag_logs(airflow_instance: AirflowInstance, dag_id: str) -> None:
# Get the dag run info
res = airflow_instance.session.get(
f"{airflow_instance.airflow_url}/api/v1/dags/{dag_id}/dagRuns", timeout=5
)
res.raise_for_status()
dag_run = res.json()["dag_runs"][0]
dag_run_id = dag_run["dag_run_id"]

# List the tasks in the dag run
res = airflow_instance.session.get(
f"{airflow_instance.airflow_url}/api/v1/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances",
timeout=5,
)
res.raise_for_status()
task_instances = res.json()["task_instances"]

# Sort tasks by start_date to maintain execution order
task_instances.sort(key=lambda x: x["start_date"] or "")

print(f"\nTask execution order for DAG {dag_id}:")
for task in task_instances:
task_id = task["task_id"]
state = task["state"]
try_number = task.get("try_number", 1)

task_header = f"Task: {task_id} (State: {state}; Try: {try_number})"

# Get logs for the task's latest try number
try:
res = airflow_instance.session.get(
f"{airflow_instance.airflow_url}/api/v1/dags/{dag_id}/dagRuns/{dag_run_id}"
f"/taskInstances/{task_id}/logs/{try_number}",
params={"full_content": "true"},
timeout=5,
)
res.raise_for_status()
print(f"\n=== {task_header} ===\n{textwrap.indent(res.text, ' ')}")
except Exception as e:
print(f"Failed to fetch logs for {task_header}: {e}")


@contextlib.contextmanager
def _run_airflow(
tmp_path: pathlib.Path,
Expand Down Expand Up @@ -377,6 +420,11 @@ def test_airflow_plugin(
print("Sleeping for a few seconds to let the plugin finish...")
time.sleep(10)

try:
_dump_dag_logs(airflow_instance, dag_id)
except Exception as e:
print(f"Failed to dump DAG logs: {e}")

if dag_id == DAG_TO_SKIP_INGESTION:
# Verify that no MCPs were generated.
assert not os.path.exists(airflow_instance.metadata_file)
Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion-modules/airflow-plugin/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,6 @@ commands =
[testenv:py310-airflow24]
extras = dev,integration-tests,plugin-v2,test-airflow24

[testenv:py310-airflow{26,27,28},py311-airflow{29,210}]
[testenv:py3{10,11}-airflow{26,27,28,29,210}]
extras = dev,integration-tests,plugin-v2

4 changes: 3 additions & 1 deletion metadata-ingestion-modules/dagster-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plugins {
id 'base'
}

apply from: "../../gradle/coverage/python-coverage.gradle"

ext {
python_executable = 'python3'
venv_name = 'venv'
Expand Down Expand Up @@ -84,7 +86,7 @@ task testQuick(type: Exec, dependsOn: installDevTest) {
outputs.dir("${venv_name}")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
"pytest -vv ${get_coverage_args('quick')} --continue-on-collection-errors --junit-xml=junit.quick.xml"
}

task buildWheel(type: Exec, dependsOn: [environmentSetup]) {
Expand Down
4 changes: 3 additions & 1 deletion metadata-ingestion-modules/gx-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plugins {
id 'base'
}

apply from: "../../gradle/coverage/python-coverage.gradle"

ext {
python_executable = 'python3'
venv_name = 'venv'
Expand Down Expand Up @@ -84,7 +86,7 @@ task testQuick(type: Exec, dependsOn: installDevTest) {
outputs.dir("${venv_name}")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
"pytest -vv ${get_coverage_args('quick')} --continue-on-collection-errors --junit-xml=junit.quick.xml"
}

task buildWheel(type: Exec, dependsOn: [environmentSetup]) {
Expand Down
6 changes: 4 additions & 2 deletions metadata-ingestion-modules/prefect-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plugins {
id 'base'
}

apply from: "../../gradle/coverage/python-coverage.gradle"

ext {
python_executable = 'python3'
venv_name = 'venv'
Expand Down Expand Up @@ -82,14 +84,14 @@ task testQuick(type: Exec, dependsOn: installDevTest) {
outputs.dir("${venv_name}")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest --cov-config=setup.cfg --cov-report xml:coverage_quick.xml -vv --continue-on-collection-errors --junit-xml=junit.quick.xml -s"
"pytest --cov-config=setup.cfg ${get_coverage_args('quick')} -vv --continue-on-collection-errors --junit-xml=junit.quick.xml -s"
}


task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
"pytest -m 'not slow_integration' -vv ${get_coverage_args('full')} --continue-on-collection-errors --junit-xml=junit.full.xml"
}


Expand Down
14 changes: 6 additions & 8 deletions metadata-ingestion/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plugins {
id 'base'
}

apply from: "../gradle/coverage/python-coverage.gradle"

ext {
python_executable = 'python3'
venv_name = 'venv'
Expand All @@ -11,10 +13,6 @@ if (!project.hasProperty("extra_pip_requirements")) {
ext.extra_pip_requirements = ""
}

def get_coverage_arg(test_name) {
return "--cov-report xml:coverage_${test_name}.xml "
}

task checkPythonVersion(type: Exec) {
commandLine python_executable, '-c',
'import sys; sys.version_info >= (3, 8), f"Python version {sys.version_info[:2]} not allowed"'
Expand Down Expand Up @@ -134,7 +132,7 @@ task testQuick(type: Exec, dependsOn: [installDev, ':metadata-models:generateJso
inputs.files(project.fileTree(dir: "src/", include: "**/*.py"))
inputs.files(project.fileTree(dir: "tests/"))
outputs.dir("${venv_name}")
def cvg_arg = get_coverage_arg("quick")
def cvg_arg = get_coverage_args("quick")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest ${cvg_arg} tests/unit --random-order --durations=20 -m 'not integration' -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
Expand Down Expand Up @@ -166,19 +164,19 @@ task testSingle(dependsOn: [installDevTest]) {
}

task testIntegrationBatch0(type: Exec, dependsOn: [installDevTest]) {
def cvg_arg = get_coverage_arg("intBatch0")
def cvg_arg = get_coverage_args("intBatch0")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest ${cvg_arg} --durations=50 -m 'integration_batch_0' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch0.xml"
}
task testIntegrationBatch1(type: Exec, dependsOn: [installDevTest]) {
def cvg_arg = get_coverage_arg("intBatch1")
def cvg_arg = get_coverage_args("intBatch1")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest ${cvg_arg} --durations=50 -m 'integration_batch_1' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch1.xml"
}
task testIntegrationBatch2(type: Exec, dependsOn: [installDevTest]) {
def cvg_arg = get_coverage_arg("intBatch2")
def cvg_arg = get_coverage_args("intBatch2")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest ${cvg_arg} --durations=20 -m 'integration_batch_2' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch2.xml"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def fqn(self) -> str:
return (
self.qualified_name
or self.id
or Urn.create_from_string(self.urn).get_entity_id()[0]
or Urn.from_string(self.urn).get_entity_id()[0]
)

@validator("urn", pre=True, always=True)
Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion/src/datahub/cli/put_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def platform(
"""

if name.startswith(f"urn:li:{DataPlatformUrn.ENTITY_TYPE}"):
platform_urn = DataPlatformUrn.create_from_string(name)
platform_urn = DataPlatformUrn.from_string(name)
platform_name = platform_urn.get_entity_id_as_string()
else:
platform_name = name.lower()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _get_owner_urn(maybe_urn: str) -> str:

def _abort_if_non_existent_urn(graph: DataHubGraph, urn: str, operation: str) -> None:
try:
parsed_urn: Urn = Urn.create_from_string(urn)
parsed_urn: Urn = Urn.from_string(urn)
entity_type = parsed_urn.get_type()
except Exception:
click.secho(f"Provided urn {urn} does not seem valid", fg="red")
Expand Down
Loading

0 comments on commit 629db58

Please sign in to comment.