Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
david-leifker authored Dec 5, 2024
2 parents b31d8b1 + cb7d687 commit 1b1cea0
Show file tree
Hide file tree
Showing 22 changed files with 398 additions and 185 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ jobs:
path: |
~/.cache/uv
key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }}
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
Expand Down
6 changes: 3 additions & 3 deletions datahub-web-react/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5043,9 +5043,9 @@ [email protected]:
tslib "^2.4.0"

cross-spawn@^7.0.2:
version "7.0.3"
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==
version "7.0.6"
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.6.tgz#8a58fe78f00dcd70c370451759dfbfaf03e8ee9f"
integrity sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==
dependencies:
path-key "^3.1.0"
shebang-command "^2.0.0"
Expand Down
2 changes: 1 addition & 1 deletion entity-registry/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ apply from: "../gradle/coverage/java-coverage.gradle"

dependencies {
implementation spec.product.pegasus.data
implementation spec.product.pegasus.generator
compileOnly spec.product.pegasus.generator
api project(path: ':metadata-models')
api project(path: ':metadata-models', configuration: "dataTemplate")
api externalDependency.classGraph
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;

/** Extracts fields from a RecordTemplate based on the appropriate {@link FieldSpec}. */
public class FieldExtractor {
Expand All @@ -30,15 +30,34 @@ private static long getNumArrayWildcards(PathSpec pathSpec) {

// Extract the value of each field in the field specs from the input record
public static <T extends FieldSpec> Map<T, List<Object>> extractFields(
@Nonnull RecordTemplate record, List<T> fieldSpecs) {
return extractFields(record, fieldSpecs, MAX_VALUE_LENGTH);
@Nullable RecordTemplate record, List<T> fieldSpecs) {
return extractFields(record, fieldSpecs, false);
}

public static <T extends FieldSpec> Map<T, List<Object>> extractFields(
@Nonnull RecordTemplate record, List<T> fieldSpecs, int maxValueLength) {
@Nullable RecordTemplate record, List<T> fieldSpecs, boolean requiredFieldExtract) {
return extractFields(record, fieldSpecs, MAX_VALUE_LENGTH, requiredFieldExtract);
}

public static <T extends FieldSpec> Map<T, List<Object>> extractFields(
@Nullable RecordTemplate record, List<T> fieldSpecs, int maxValueLength) {
return extractFields(record, fieldSpecs, maxValueLength, false);
}

public static <T extends FieldSpec> Map<T, List<Object>> extractFields(
@Nullable RecordTemplate record,
List<T> fieldSpecs,
int maxValueLength,
boolean requiredFieldExtract) {
final Map<T, List<Object>> extractedFields = new HashMap<>();
for (T fieldSpec : fieldSpecs) {
Optional<Object> value = RecordUtils.getFieldValue(record, fieldSpec.getPath());
if (requiredFieldExtract && record == null) {
throw new IllegalArgumentException(
"Field extraction is required and the RecordTemplate is null");
}
Optional<Object> value =
Optional.ofNullable(record)
.flatMap(maybeRecord -> RecordUtils.getFieldValue(maybeRecord, fieldSpec.getPath()));
if (!value.isPresent()) {
extractedFields.put(fieldSpec, Collections.emptyList());
} else {
Expand Down
40 changes: 40 additions & 0 deletions metadata-ingestion/tests/unit/urns/invalid_urns.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Basic URN format tests
urn:li:abc
urn:li:abc:
urn:li:abc:()
urn:li:abc:(abc,)
urn:li:corpuser:abc)

# Reserved characters
urn:li:corpuser:foo␟bar
urn:li:tag:a,b,c

# CorpUser URN tests
urn:li:corpuser:(part1,part2)

# Dataset URN tests
urn:li:dataset:(urn:li:user:abc,dataset,prod)
urn:li:dataset:(urn:li:user:abc,dataset)
urn:li:dataset:(urn:li:user:abc,dataset,invalidEnv)

# DataFlow URN tests
urn:li:dataFlow:(airflow,flow_id)

# DataJob URN tests
urn:li:dataJob:(urn:li:user:abc,job_id)
urn:li:dataJob:(urn:li:dataFlow:(airflow,flow_id,prod))

# Domain URN tests
urn:li:domain:(part1,part2)

# Tag URN tests
urn:li:tag:(part1,part2)

# Notebook URN tests
urn:li:notebook:(part1,part2,part3)

# CorpGroup URN tests
urn:li:corpGroup:(part1,part2)

# DataProcessInstance URN tests
urn:li:dataProcessInstance:(part1,part2)
10 changes: 0 additions & 10 deletions metadata-ingestion/tests/unit/urns/test_corp_group_urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import pytest

from datahub.utilities.urns.corp_group_urn import CorpGroupUrn
from datahub.utilities.urns.error import InvalidUrnError


@pytest.mark.filterwarnings("ignore::DeprecationWarning")
Expand All @@ -17,12 +16,3 @@ def test_parse_urn(self) -> None:
assert str(corp_group_urn) == corp_group_urn_str
assert corp_group_urn == CorpGroupUrn(name="abc")
assert corp_group_urn == CorpGroupUrn.create_from_id("abc")

def test_invalid_urn(self) -> None:
with self.assertRaises(InvalidUrnError):
CorpGroupUrn.create_from_string(
"urn:li:abc:(urn:li:dataPlatform:abc,def,prod)"
)

with self.assertRaises(InvalidUrnError):
CorpGroupUrn.create_from_string("urn:li:corpGroup:(part1,part2)")
10 changes: 0 additions & 10 deletions metadata-ingestion/tests/unit/urns/test_corpuser_urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import pytest

from datahub.utilities.urns.corpuser_urn import CorpuserUrn
from datahub.utilities.urns.error import InvalidUrnError


@pytest.mark.filterwarnings("ignore::DeprecationWarning")
Expand All @@ -17,12 +16,3 @@ def test_parse_urn(self) -> None:
assert str(corpuser_urn) == corpuser_urn_str
assert corpuser_urn == CorpuserUrn("abc")
assert corpuser_urn == CorpuserUrn.create_from_id("abc")

def test_invalid_urn(self) -> None:
with self.assertRaises(InvalidUrnError):
CorpuserUrn.create_from_string(
"urn:li:abc:(urn:li:dataPlatform:abc,def,prod)"
)

with self.assertRaises(InvalidUrnError):
CorpuserUrn.create_from_string("urn:li:corpuser:(part1,part2)")
8 changes: 0 additions & 8 deletions metadata-ingestion/tests/unit/urns/test_data_flow_urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import pytest

from datahub.utilities.urns.data_flow_urn import DataFlowUrn
from datahub.utilities.urns.error import InvalidUrnError


@pytest.mark.filterwarnings("ignore::DeprecationWarning")
Expand All @@ -16,10 +15,3 @@ def test_parse_urn(self) -> None:
assert data_flow_urn.get_env() == "prod"
assert data_flow_urn.__str__() == "urn:li:dataFlow:(airflow,def,prod)"
assert data_flow_urn == DataFlowUrn("airflow", "def", "prod")

def test_invalid_urn(self) -> None:
with self.assertRaises(InvalidUrnError):
DataFlowUrn.create_from_string("urn:li:abc:(airflow,def,prod)")

with self.assertRaises(InvalidUrnError):
DataFlowUrn.create_from_string("urn:li:dataFlow:(airflow,flow_id)")
15 changes: 0 additions & 15 deletions metadata-ingestion/tests/unit/urns/test_data_job_urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from datahub.utilities.urns.data_flow_urn import DataFlowUrn
from datahub.utilities.urns.data_job_urn import DataJobUrn
from datahub.utilities.urns.error import InvalidUrnError


@pytest.mark.filterwarnings("ignore::DeprecationWarning")
Expand All @@ -22,17 +21,3 @@ def test_parse_urn(self) -> None:
assert data_job_urn == DataJobUrn(
"urn:li:dataFlow:(airflow,flow_id,prod)", "job_id"
)

def test_invalid_urn(self) -> None:
with self.assertRaises(InvalidUrnError):
DataJobUrn.create_from_string(
"urn:li:abc:(urn:li:dataFlow:(airflow,flow_id,prod),job_id)"
)

with self.assertRaises(InvalidUrnError):
DataJobUrn.create_from_string("urn:li:dataJob:(urn:li:user:abc,job_id)")

with self.assertRaises(InvalidUrnError):
DataJobUrn.create_from_string(
"urn:li:dataJob:(urn:li:dataFlow:(airflow,flow_id,prod))"
)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import pytest

from datahub.utilities.urns.data_process_instance_urn import DataProcessInstanceUrn
from datahub.utilities.urns.error import InvalidUrnError


@pytest.mark.filterwarnings("ignore::DeprecationWarning")
Expand All @@ -20,12 +19,3 @@ def test_parse_urn(self) -> None:
assert dataprocessinstance_urn == DataProcessInstanceUrn("abc")
assert dataprocessinstance_urn == DataProcessInstanceUrn.create_from_id("abc")
assert "abc" == dataprocessinstance_urn.get_dataprocessinstance_id()

def test_invalid_urn(self) -> None:
with self.assertRaises(InvalidUrnError):
DataProcessInstanceUrn.create_from_string("urn:li:abc:dataProcessInstance")

with self.assertRaises(InvalidUrnError):
DataProcessInstanceUrn.create_from_string(
"urn:li:dataProcessInstance:(part1,part2)"
)
20 changes: 0 additions & 20 deletions metadata-ingestion/tests/unit/urns/test_dataset_urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from datahub.utilities.urns.data_platform_urn import DataPlatformUrn
from datahub.utilities.urns.dataset_urn import DatasetUrn
from datahub.utilities.urns.error import InvalidUrnError


@pytest.mark.filterwarnings("ignore::DeprecationWarning")
Expand All @@ -20,22 +19,3 @@ def test_parse_urn(self) -> None:
assert dataset_urn.get_env() == "PROD"
assert dataset_urn.__str__() == dataset_urn_str
assert dataset_urn == DatasetUrn("urn:li:dataPlatform:abc", "def", "prod")

def test_invalid_urn(self) -> None:
with self.assertRaises(InvalidUrnError):
DatasetUrn.create_from_string(
"urn:li:abc:(urn:li:dataPlatform:abc,def,prod)"
)

with self.assertRaises(InvalidUrnError):
DatasetUrn.create_from_string(
"urn:li:dataset:(urn:li:user:abc,dataset,prod)"
)

with self.assertRaises(InvalidUrnError):
DatasetUrn.create_from_string("urn:li:dataset:(urn:li:user:abc,dataset)")

with self.assertRaises(InvalidUrnError):
DatasetUrn.create_from_string(
"urn:li:dataset:(urn:li:user:abc,dataset,invalidEnv)"
)
8 changes: 0 additions & 8 deletions metadata-ingestion/tests/unit/urns/test_domain_urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import pytest

from datahub.utilities.urns.domain_urn import DomainUrn
from datahub.utilities.urns.error import InvalidUrnError


@pytest.mark.filterwarnings("ignore::DeprecationWarning")
Expand All @@ -17,10 +16,3 @@ def test_parse_urn(self) -> None:
assert str(domain_urn) == domain_urn_str
assert domain_urn == DomainUrn("abc")
assert domain_urn == DomainUrn.create_from_id("abc")

def test_invalid_urn(self) -> None:
with self.assertRaises(InvalidUrnError):
DomainUrn.create_from_string("urn:li:abc:domain")

with self.assertRaises(InvalidUrnError):
DomainUrn.create_from_string("urn:li:domain:(part1,part2)")
10 changes: 0 additions & 10 deletions metadata-ingestion/tests/unit/urns/test_notebook_urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import pytest

from datahub.utilities.urns.error import InvalidUrnError
from datahub.utilities.urns.notebook_urn import NotebookUrn


Expand All @@ -16,12 +15,3 @@ def test_parse_urn(self) -> None:
assert str(notebook_urn) == notebook_urn_str

assert notebook_urn == NotebookUrn("querybook", "123")

def test_invalid_urn(self) -> None:
with self.assertRaises(InvalidUrnError):
NotebookUrn.create_from_string(
"urn:li:abc:(urn:li:dataPlatform:abc,def,prod)"
)

with self.assertRaises(InvalidUrnError):
NotebookUrn.create_from_string("urn:li:notebook:(part1,part2,part3)")
8 changes: 0 additions & 8 deletions metadata-ingestion/tests/unit/urns/test_tag_urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import pytest

from datahub.utilities.urns.error import InvalidUrnError
from datahub.utilities.urns.tag_urn import TagUrn


Expand All @@ -17,10 +16,3 @@ def test_parse_urn(self) -> None:
assert str(tag_urn) == tag_urn_str
assert tag_urn == TagUrn("abc")
assert tag_urn == TagUrn.create_from_id("abc")

def test_invalid_urn(self) -> None:
with self.assertRaises(InvalidUrnError):
TagUrn.create_from_string("urn:li:abc:tag_id")

with self.assertRaises(InvalidUrnError):
TagUrn.create_from_string("urn:li:tag:(part1,part2)")
Loading

0 comments on commit 1b1cea0

Please sign in to comment.