Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Oct 3, 2024
2 parents 2de9ecb + fa67e3a commit b40167a
Show file tree
Hide file tree
Showing 33 changed files with 1,153 additions and 1,043 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -235,4 +235,100 @@ describe('filterSchemaRows', () => {
expect(filteredRows).toMatchObject([{ fieldPath: 'shipment' }]);
expect(expandedRowsFromFilter).toMatchObject(new Set());
});

it('should properly filter schema rows based on business attribute properties description', () => {
const rowsWithSchemaFieldEntity = [
{
fieldPath: 'customer',
schemaFieldEntity: {
businessAttributes: {
businessAttribute: {
businessAttribute: { properties: { description: 'customer description' } },
},
},
},
},
{
fieldPath: 'testing',
schemaFieldEntity: {
businessAttributes: {
businessAttribute: {
businessAttribute: { properties: { description: 'testing description' } },
},
},
},
},
{
fieldPath: 'shipment',
schemaFieldEntity: {
businessAttributes: {
businessAttribute: {
businessAttribute: { properties: { description: 'shipment description' } },
},
},
},
},
] as SchemaField[];
const filterText = 'testing description';
const editableSchemaMetadata = { editableSchemaFieldInfo: [] };
const { filteredRows, expandedRowsFromFilter } = filterSchemaRows(
rowsWithSchemaFieldEntity,
editableSchemaMetadata,
filterText,
testEntityRegistry,
);

expect(filteredRows).toMatchObject([{ fieldPath: 'testing' }]);
expect(expandedRowsFromFilter).toMatchObject(new Set());
});

it('should properly filter schema rows based on business attribute properties tags', () => {
const rowsWithSchemaFieldEntity = [
{
fieldPath: 'customer',
schemaFieldEntity: {
businessAttributes: {
businessAttribute: {
businessAttribute: { properties: { tags: { tags: [{ tag: sampleTag }] } } },
},
},
},
},
{
fieldPath: 'testing',
schemaFieldEntity: {
businessAttributes: {
businessAttribute: {
businessAttribute: {
properties: { tags: { tags: [{ tag: { properties: { name: 'otherTag' } } }] } },
},
},
},
},
},
{
fieldPath: 'shipment',
schemaFieldEntity: {
businessAttributes: {
businessAttribute: {
businessAttribute: {
properties: { tags: { tags: [{ tag: { properties: { name: 'anotherTag' } } }] } },
},
},
},
},
},
] as SchemaField[];
const filterText = sampleTag.properties.name;
const editableSchemaMetadata = { editableSchemaFieldInfo: [] };
const { filteredRows, expandedRowsFromFilter } = filterSchemaRows(
rowsWithSchemaFieldEntity,
editableSchemaMetadata,
filterText,
testEntityRegistry,
);

expect(filteredRows).toMatchObject([{ fieldPath: 'customer' }]);
expect(expandedRowsFromFilter).toMatchObject(new Set());
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,25 @@ function matchesTagsOrTermsOrDescription(field: SchemaField, filterText: string,
);
}

function matchesBusinessAttributesProperties(field: SchemaField, filterText: string, entityRegistry: EntityRegistry) {
if (!field.schemaFieldEntity?.businessAttributes) return false;
const businessAttributeProperties =
field.schemaFieldEntity?.businessAttributes?.businessAttribute?.businessAttribute?.properties;
return (
businessAttributeProperties?.description?.toLocaleLowerCase().includes(filterText) ||
businessAttributeProperties?.name?.toLocaleLowerCase().includes(filterText) ||
businessAttributeProperties?.glossaryTerms?.terms?.find((termAssociation) =>
entityRegistry
.getDisplayName(EntityType.GlossaryTerm, termAssociation.term)
.toLocaleLowerCase()
.includes(filterText),
) ||
businessAttributeProperties?.tags?.tags?.find((tagAssociation) =>
entityRegistry.getDisplayName(EntityType.Tag, tagAssociation.tag).toLocaleLowerCase().includes(filterText),
)
);
}

// returns list of fieldPaths for fields that have Terms or Tags or Descriptions matching the filterText
function getFilteredFieldPathsByMetadata(editableSchemaMetadata: any, entityRegistry, filterText) {
return (
Expand Down Expand Up @@ -56,7 +75,8 @@ export function filterSchemaRows(
if (
matchesFieldName(row.fieldPath, formattedFilterText) ||
matchesEditableTagsOrTermsOrDescription(row, filteredFieldPathsByEditableMetadata) ||
matchesTagsOrTermsOrDescription(row, formattedFilterText, entityRegistry) // non-editable tags, terms and description
matchesTagsOrTermsOrDescription(row, formattedFilterText, entityRegistry) || // non-editable tags, terms and description
matchesBusinessAttributesProperties(row, formattedFilterText, entityRegistry)
) {
finalFieldPaths.add(row.fieldPath);
}
Expand All @@ -65,7 +85,8 @@ export function filterSchemaRows(
if (
matchesFieldName(fieldName, formattedFilterText) ||
matchesEditableTagsOrTermsOrDescription(row, filteredFieldPathsByEditableMetadata) ||
matchesTagsOrTermsOrDescription(row, formattedFilterText, entityRegistry) // non-editable tags, terms and description
matchesTagsOrTermsOrDescription(row, formattedFilterText, entityRegistry) || // non-editable tags, terms and description
matchesBusinessAttributesProperties(row, formattedFilterText, entityRegistry)
) {
// if we match specifically on this field (not just its parent), add and expand all parents
splitFieldPath.reduce((previous, current) => {
Expand Down
4 changes: 1 addition & 3 deletions docs-website/adoptionStoriesIndexes.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
{
"name": "Visa",
"slug": "visa",
"imageUrl": "/img/logos/companies/visa.png",
"imageSize": "large",
"link": "https://blog.datahubproject.io/how-visa-uses-datahub-to-scale-data-governance-cace052d61c5",
"linkType": "blog",
"tagline": "How Visa uses DataHub to scale data governance",
Expand Down Expand Up @@ -374,4 +372,4 @@
"category": "And More"
}
]
}
}
20 changes: 10 additions & 10 deletions docs-website/src/pages/_components/CaseStudy/caseStudyContent.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ const caseStudyData = [
image: "https://datahubproject.io/img/logos/companies/netflix.png",
link: "https://datahubproject.io/adoption-stories/#netflix",
},
{
title: "Scaling Data Governance",
description:
"How VISA Uses DataHub to Scale Data Governance.",
tag: "Finance",
backgroundImage:
"https://miro.medium.com/v2/resize:fit:2000/format:webp/1*[email protected]",
image: "https://datahubproject.io/img/logos/companies/visa.png",
link: "https://datahubproject.io/adoption-stories/#visa",
},
// {
// title: "Scaling Data Governance",
// description:
// "How VISA Uses DataHub to Scale Data Governance.",
// tag: "Finance",
// backgroundImage:
// "https://miro.medium.com/v2/resize:fit:2000/format:webp/1*[email protected]",
// image: "https://datahubproject.io/img/logos/companies/visa.png",
// link: "https://datahubproject.io/adoption-stories/#visa",
// },
{
title: "Ensuring Data Reliability",
description:
Expand Down
2 changes: 1 addition & 1 deletion docs-website/src/pages/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import CloseButton from "@ant-design/icons/CloseCircleFilled";

const companyIndexes = require("../../adoptionStoriesIndexes.json");
const companies = companyIndexes.companies;
const keyCompanySlugs = ["netflix", "visa", "pinterest", "airtel", "optum"];
const keyCompanySlugs = ["netflix", "pinterest", "airtel", "notion", "optum"];
const keyCompanies = keyCompanySlugs
.map((slug) => companies.find((co) => co.slug === slug))
.filter((isDefined) => isDefined);
Expand Down
Binary file removed docs-website/static/img/logos/companies/visa.png
Binary file not shown.
26 changes: 26 additions & 0 deletions docs/how/search.md
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,32 @@ queryConfigurations:
boost_mode: multiply
```

##### Example 4: Entity Ranking

Alter the ranking of entities. For example, chart vs dashboard, you may want the dashboard
to appear above charts. This can be done using the following function score and leverages a prefix match on the entity type
of the URN. Depending on the entity the weight may have to be adjusted based on your data and the entities
involved since often multiple field matches may shift weight towards one entity vs another.

```yaml
queryConfigurations:
- queryRegex: .*
simpleQuery: true
prefixMatchQuery: true
exactMatchQuery: true
functionScore:
functions:
- filter:
prefix:
urn:
value: 'urn:li:dashboard:'
weight: 1.5
score_mode: multiply
boost_mode: multiply
```

### Search Autocomplete Configuration

Similar to the options provided in the previous section for search configuration, there are autocomplete specific options
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ def extract():
ins={
"data": In(
dagster_type=PythonObjectDagsterType(list),
metadata={"datahub.inputs": [DatasetUrn("snowflake", "tableA").urn]},
metadata={"datahub.inputs": [DatasetUrn("snowflake", "tableA").urn()]},
)
},
out={
"result": Out(
metadata={"datahub.outputs": [DatasetUrn("snowflake", "tableB").urn]}
metadata={"datahub.outputs": [DatasetUrn("snowflake", "tableB").urn()]}
)
},
)
Expand Down Expand Up @@ -101,6 +101,5 @@ def asset_lineage_extractor(
dagster_url="http://localhost:3000",
asset_lineage_extractor=asset_lineage_extractor,
)

datahub_sensor = make_datahub_sensor(config=config)
defs = Definitions(jobs=[do_stuff], sensors=[datahub_sensor])
13 changes: 5 additions & 8 deletions metadata-ingestion-modules/dagster-plugin/examples/assets_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
define_asset_job,
multi_asset,
)
from datahub.ingestion.graph.config import DatahubClientConfig
from datahub.utilities.urns.dataset_urn import DatasetUrn

from datahub_dagster_plugin.sensors.datahub_sensors import (
Expand All @@ -18,7 +19,7 @@
@multi_asset(
outs={
"extract": AssetOut(
metadata={"datahub.outputs": [DatasetUrn("snowflake", "tableD").urn]}
metadata={"datahub.outputs": [DatasetUrn("snowflake", "tableD").urn()]}
),
}
)
Expand Down Expand Up @@ -47,13 +48,9 @@ def transform(extract):

assets_job = define_asset_job(name="assets_job")

config = DatahubDagsterSourceConfig.parse_obj(
{
"rest_sink_config": {
"server": "http://localhost:8080",
},
"dagster_url": "http://localhost:3000",
}
config = DatahubDagsterSourceConfig(
datahub_client_config=DatahubClientConfig(server="http://localhost:8080"),
dagster_url="http://localhost:3000",
)

datahub_sensor = make_datahub_sensor(config=config)
Expand Down
15 changes: 6 additions & 9 deletions metadata-ingestion-modules/dagster-plugin/examples/ops_job.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dagster import Definitions, In, Out, PythonObjectDagsterType, job, op
from datahub.ingestion.graph.config import DatahubClientConfig
from datahub.utilities.urns.dataset_urn import DatasetUrn

from datahub_dagster_plugin.sensors.datahub_sensors import (
Expand All @@ -17,12 +18,12 @@ def extract():
ins={
"data": In(
dagster_type=PythonObjectDagsterType(list),
metadata={"datahub.inputs": [DatasetUrn("snowflake", "tableA").urn]},
metadata={"datahub.inputs": [DatasetUrn("snowflake", "tableA").urn()]},
)
},
out={
"result": Out(
metadata={"datahub.outputs": [DatasetUrn("snowflake", "tableB").urn]}
metadata={"datahub.outputs": [DatasetUrn("snowflake", "tableB").urn()]}
)
},
)
Expand All @@ -38,13 +39,9 @@ def do_stuff():
transform(extract())


config = DatahubDagsterSourceConfig.parse_obj(
{
"rest_sink_config": {
"server": "http://localhost:8080",
},
"dagster_url": "http://localhost:3000",
}
config = DatahubDagsterSourceConfig(
datahub_client_config=DatahubClientConfig(server="http://localhost:8080"),
dagster_url="http://localhost:3000",
)

datahub_sensor = make_datahub_sensor(config=config)
Expand Down
Loading

0 comments on commit b40167a

Please sign in to comment.