Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Dec 9, 2024
2 parents a7afedb + 8a1c180 commit e449254
Show file tree
Hide file tree
Showing 33 changed files with 115,176 additions and 168 deletions.
16 changes: 4 additions & 12 deletions .github/workflows/airflow-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,21 @@ jobs:
include:
# Note: this should be kept in sync with tox.ini.
- python-version: "3.8"
extra_pip_requirements: "apache-airflow~=2.1.4"
extra_pip_extras: plugin-v1
- python-version: "3.8"
extra_pip_requirements: "apache-airflow~=2.2.4"
extra_pip_extras: plugin-v1
extra_pip_requirements: "apache-airflow~=2.3.4"
extra_pip_extras: test-airflow23
- python-version: "3.10"
extra_pip_requirements: "apache-airflow~=2.4.3"
extra_pip_extras: plugin-v2,test-airflow24
extra_pip_extras: test-airflow24
- python-version: "3.10"
extra_pip_requirements: "apache-airflow~=2.6.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.10.txt"
extra_pip_extras: plugin-v2
- python-version: "3.10"
extra_pip_requirements: "apache-airflow~=2.7.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt"
extra_pip_extras: plugin-v2
- python-version: "3.10"
extra_pip_requirements: "apache-airflow~=2.8.1 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.1/constraints-3.10.txt"
extra_pip_extras: plugin-v2
- python-version: "3.11"
extra_pip_requirements: "apache-airflow~=2.9.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.11.txt"
extra_pip_extras: plugin-v2
- python-version: "3.11"
extra_pip_requirements: "apache-airflow~=2.10.2 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.10.2/constraints-3.11.txt"
extra_pip_extras: plugin-v2
extra_pip_requirements: "apache-airflow~=2.10.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.10.3/constraints-3.11.txt"
fail-fast: false
steps:
- name: Set up JDK 17
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import { useGetSearchResultsLazyQuery } from '../../../../../../../graphql/searc
import { Container, Entity, EntityType } from '../../../../../../../types.generated';
import { useEnterKeyListener } from '../../../../../../shared/useEnterKeyListener';
import { useEntityRegistry } from '../../../../../../useEntityRegistry';
import { getParentEntities } from '../../../../../../search/filters/utils';
import ParentEntities from '../../../../../../search/filters/ParentEntities';

type Props = {
onCloseModal: () => void;
Expand All @@ -26,14 +28,18 @@ const StyleTag = styled(Tag)`
align-items: center;
`;

const PreviewImage = styled.img`
export const PreviewImage = styled.img`
max-height: 18px;
width: auto;
object-fit: contain;
background-color: transparent;
margin-right: 4px;
`;

export const ParentWrapper = styled.div`
max-width: 400px;
`;

export const ContainerSelectModal = ({ onCloseModal, defaultValues, onOkOverride, titleOverride }: Props) => {
const [containerSearch, { data: platforSearchData }] = useGetSearchResultsLazyQuery();
const entityRegistry = useEntityRegistry();
Expand Down Expand Up @@ -65,10 +71,16 @@ export const ContainerSelectModal = ({ onCloseModal, defaultValues, onOkOverride
// Renders a search result in the select dropdown.
const renderSearchResult = (entity: Container) => {
const displayName = entityRegistry.getDisplayName(EntityType.Container, entity);
const parentEntities: Entity[] = getParentEntities(entity as Entity) || [];

const truncatedDisplayName = displayName.length > 25 ? `${displayName.slice(0, 25)}...` : displayName;
return (
<Tooltip title={displayName}>
{parentEntities.length > 0 && (
<ParentWrapper>
<ParentEntities parentEntities={parentEntities} />
</ParentWrapper>
)}
<PreviewImage src={entity.platform?.properties?.logoUrl || undefined} alt={entity.properties?.name} />
<span>{truncatedDisplayName}</span>
</Tooltip>
Expand Down
11 changes: 10 additions & 1 deletion datahub-web-react/src/app/search/SearchFilterLabel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ import CustomAvatar from '../shared/avatar/CustomAvatar';
import { IconStyleType } from '../entity/Entity';
import { formatNumber } from '../shared/formatNumber';
import useGetBrowseV2LabelOverride from './filters/useGetBrowseV2LabelOverride';
import { getParentEntities } from './filters/utils';
import { ParentWrapper } from '../entity/shared/containers/profile/sidebar/Container/ContainerSelectModal';
import ParentEntities from './filters/ParentEntities';

type Props = {
field: string;
Expand Down Expand Up @@ -157,11 +160,17 @@ export const SearchFilterLabel = ({ field, value, entity, count, hideCount }: Pr
if (entity?.type === EntityType.Container) {
const container = entity as Container;
const displayName = entityRegistry.getDisplayName(EntityType.Container, container);
const parentEntities: Entity[] = getParentEntities(container as Entity) || [];
const truncatedDisplayName = displayName.length > 25 ? `${displayName.slice(0, 25)}...` : displayName;
return (
<Tooltip title={displayName}>
{!!container.platform?.properties?.logoUrl && (
<PreviewImage src={container.platform?.properties?.logoUrl} alt={container.properties?.name} />
<>
<ParentWrapper style={{ width: '200px' }}>
<ParentEntities parentEntities={parentEntities} />
</ParentWrapper>
<PreviewImage src={container.platform?.properties?.logoUrl} alt={container.properties?.name} />
</>
)}
<span>
{truncatedDisplayName}
Expand Down
16 changes: 15 additions & 1 deletion datahub-web-react/src/app/search/SimpleSearchFilters.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@ import { FacetFilterInput, FacetMetadata } from '../../types.generated';
import { FilterScenarioType } from './filters/render/types';
import { useFilterRendererRegistry } from './filters/render/useFilterRenderer';
import { SimpleSearchFilter } from './SimpleSearchFilter';
import { ENTITY_FILTER_NAME, ENTITY_INDEX_FILTER_NAME, LEGACY_ENTITY_FILTER_NAME } from './utils/constants';
import {
DEGREE_FILTER_NAME,
ENTITY_FILTER_NAME,
ENTITY_INDEX_FILTER_NAME,
LEGACY_ENTITY_FILTER_NAME,
} from './utils/constants';

const TOP_FILTERS = ['degree', ENTITY_FILTER_NAME, 'platform', 'tags', 'glossaryTerms', 'domains', 'owners'];

Expand Down Expand Up @@ -43,6 +48,15 @@ export const SimpleSearchFilters = ({ facets, selectedFilters, onFilterSelect, l
: filter,
)
.filter((filter) => filter.field !== field || !(filter.values?.length === 0));

// Do not let user unselect all degree filters
if (field === DEGREE_FILTER_NAME && !selected) {
const hasDegreeFilter = newFilters.find((filter) => filter.field === DEGREE_FILTER_NAME);
if (!hasDegreeFilter) {
return;
}
}

setCachedProps({ ...cachedProps, selectedFilters: newFilters });
onFilterSelect(newFilters);
};
Expand Down
3 changes: 2 additions & 1 deletion datahub-web-react/src/app/search/filters/FilterOption.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { generateColor } from '../../entity/shared/components/styled/StyledTag';
import { ANTD_GRAY } from '../../entity/shared/constants';
import { useEntityRegistry } from '../../useEntityRegistry';
import {
CONTAINER_FILTER_NAME,
ENTITY_SUB_TYPE_FILTER_NAME,
MAX_COUNT_VAL,
PLATFORM_FILTER_NAME,
Expand Down Expand Up @@ -125,7 +126,7 @@ export default function FilterOption({
const { field, value, count, entity } = filterOption;
const entityRegistry = useEntityRegistry();
const { icon, label } = getFilterIconAndLabel(field, value, entityRegistry, entity || null, 14);
const shouldShowIcon = field === PLATFORM_FILTER_NAME && icon !== null;
const shouldShowIcon = (field === PLATFORM_FILTER_NAME || field === CONTAINER_FILTER_NAME) && icon !== null;
const shouldShowTagColor = field === TAGS_FILTER_NAME && entity?.type === EntityType.Tag;
const isSubTypeFilter = field === TYPE_NAMES_FILTER_NAME;
const parentEntities: Entity[] = getParentEntities(entity as Entity) || [];
Expand Down
14 changes: 14 additions & 0 deletions datahub-web-react/src/app/search/filters/utils.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import {
FacetFilterInput,
FacetMetadata,
GlossaryTerm,
Container,
} from '../../../types.generated';
import { IconStyleType } from '../../entity/Entity';
import {
Expand Down Expand Up @@ -186,6 +187,15 @@ export function getFilterIconAndLabel(
entityRegistry.getIcon(EntityType.DataPlatform, size || 12, IconStyleType.ACCENT, ANTD_GRAY[9])
);
label = filterEntity ? entityRegistry.getDisplayName(EntityType.DataPlatform, filterEntity) : filterValue;
} else if (filterField === CONTAINER_FILTER_NAME) {
// Scenario where the filter entity exists and filterField is container
const logoUrl = (filterEntity as Container)?.platform?.properties?.logoUrl;
icon = logoUrl ? (
<PlatformIcon src={logoUrl} size={size} />
) : (
entityRegistry.getIcon(EntityType.DataPlatform, size || 12, IconStyleType.ACCENT, ANTD_GRAY[9])
);
label = entityRegistry.getDisplayName(EntityType.Container, filterEntity);
} else if (filterField === BROWSE_PATH_V2_FILTER_NAME) {
icon = <FolderFilled size={size} color="black" />;
label = getLastBrowseEntryFromFilterValue(filterValue);
Expand All @@ -196,6 +206,7 @@ export function getFilterIconAndLabel(
filterEntity,
size,
);

icon = newIcon;
label = newLabel;
} else {
Expand Down Expand Up @@ -344,6 +355,9 @@ export function getParentEntities(entity: Entity): Entity[] | null {
if (entity.type === EntityType.Domain) {
return (entity as Domain).parentDomains?.domains || [];
}
if (entity.type === EntityType.Container) {
return (entity as Container).parentContainers?.containers || [];
}
return null;
}

Expand Down
1 change: 1 addition & 0 deletions datahub-web-react/src/graphql/fragments.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,7 @@ fragment entityContainer on Container {

fragment parentContainerFields on Container {
urn
type
properties {
name
}
Expand Down
3 changes: 3 additions & 0 deletions datahub-web-react/src/graphql/search.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,9 @@ fragment facetFields on FacetMetadata {
properties {
name
}
parentContainers {
...parentContainersFields
}
}
... on CorpUser {
username
Expand Down
2 changes: 2 additions & 0 deletions docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe

- #11701: The Fivetran `sources_to_database` field is deprecated in favor of setting directly within `sources_to_platform_instance.<key>.database`.
- #11742: For PowerBi ingestion, `use_powerbi_email` is now enabled by default when extracting ownership information.
- #12056: The DataHub Airflow plugin no longer supports Airflow 2.1 and Airflow 2.2.
- #12056: The DataHub Airflow plugin now defaults to the v2 plugin implementation.

### Breaking Changes

Expand Down
24 changes: 13 additions & 11 deletions docs/lineage/airflow.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ The DataHub Airflow plugin supports:
- Task run information, including task successes and failures.
- Manual lineage annotations using `inlets` and `outlets` on Airflow operators.

There's two actively supported implementations of the plugin, with different Airflow version support.
There's two implementations of the plugin, with different Airflow version support.

| Approach | Airflow Version | Notes |
| --------- | --------------- | --------------------------------------------------------------------------- |
| Plugin v2 | 2.3.4+ | Recommended. Requires Python 3.8+ |
| Plugin v1 | 2.1 - 2.8 | No automatic lineage extraction; may not extract lineage if the task fails. |
| Approach | Airflow Versions | Notes |
| --------- | ---------------- | --------------------------------------------------------------------------------------- |
| Plugin v2 | 2.3.4+ | Recommended. Requires Python 3.8+ |
| Plugin v1 | 2.3 - 2.8 | Deprecated. No automatic lineage extraction; may not extract lineage if the task fails. |

If you're using Airflow older than 2.1, it's possible to use the v1 plugin with older versions of `acryl-datahub-airflow-plugin`. See the [compatibility section](#compatibility) for more details.
If you're using Airflow older than 2.3, it's possible to use the v1 plugin with older versions of `acryl-datahub-airflow-plugin`. See the [compatibility section](#compatibility) for more details.

<!-- TODO: Update the local Airflow guide and link to it here. -->
<!-- If you are looking to run Airflow and DataHub using docker locally, follow the guide [here](../../docker/airflow/local_airflow.md). -->
Expand All @@ -29,7 +29,7 @@ If you're using Airflow older than 2.1, it's possible to use the v1 plugin with

### Installation

The v2 plugin requires Airflow 2.3+ and Python 3.8+. If you don't meet these requirements, use the v1 plugin instead.
The v2 plugin requires Airflow 2.3+ and Python 3.8+. If you don't meet these requirements, see the [compatibility section](#compatibility) for other options.

```shell
pip install 'acryl-datahub-airflow-plugin[plugin-v2]'
Expand Down Expand Up @@ -84,9 +84,10 @@ enabled = True # default

### Installation

The v1 plugin requires Airflow 2.1 - 2.8 and Python 3.8+. If you're on older versions, it's still possible to use an older version of the plugin. See the [compatibility section](#compatibility) for more details.
The v1 plugin requires Airflow 2.3 - 2.8 and Python 3.8+. If you're on older versions, it's still possible to use an older version of the plugin. See the [compatibility section](#compatibility) for more details.

If you're using Airflow 2.3+, we recommend using the v2 plugin instead. If you need to use the v1 plugin with Airflow 2.3+, you must also set the environment variable `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN=true`.
Note that the v1 plugin is less featureful than the v2 plugin, and is overall not actively maintained.
Since datahub v0.15.0, the v2 plugin has been the default. If you need to use the v1 plugin with `acryl-datahub-airflow-plugin` v0.15.0+, you must also set the environment variable `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN=true`.

```shell
pip install 'acryl-datahub-airflow-plugin[plugin-v1]'
Expand Down Expand Up @@ -340,11 +341,12 @@ The solution is to upgrade `acryl-datahub-airflow-plugin>=0.12.0.4` or upgrade `

## Compatibility

We no longer officially support Airflow <2.1. However, you can use older versions of `acryl-datahub-airflow-plugin` with older versions of Airflow.
Both of these options support Python 3.7+.
We no longer officially support Airflow <2.3. However, you can use older versions of `acryl-datahub-airflow-plugin` with older versions of Airflow.
The first two options support Python 3.7+, and the last option supports Python 3.8+.

- Airflow 1.10.x, use DataHub plugin v1 with acryl-datahub-airflow-plugin <= 0.9.1.0.
- Airflow 2.0.x, use DataHub plugin v1 with acryl-datahub-airflow-plugin <= 0.11.0.1.
- Airflow 2.2.x, use DataHub plugin v2 with acryl-datahub-airflow-plugin <= 0.14.1.5.

DataHub also previously supported an Airflow [lineage backend](https://airflow.apache.org/docs/apache-airflow/2.2.0/lineage.html#lineage-backend) implementation. While the implementation is still in our codebase, it is deprecated and will be removed in a future release.
Note that the lineage backend did not support automatic lineage extraction, did not capture task failures, and did not work in AWS MWAA.
Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion-modules/airflow-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ if (!project.hasProperty("extra_pip_requirements")) {
ext.extra_pip_requirements = ""
}
if (!project.hasProperty("extra_pip_extras")) {
ext.extra_pip_extras = "plugin-v2"
ext.extra_pip_extras = ""
}
// If extra_pip_extras is non-empty, we need to add a comma to the beginning of the string.
if (extra_pip_extras != "") {
Expand Down
14 changes: 10 additions & 4 deletions metadata-ingestion-modules/airflow-plugin/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def get_long_description():

base_requirements = {
f"acryl-datahub[datahub-rest]{_self_pin}",
# Actual dependencies.
"apache-airflow >= 2.0.2",
# We require Airflow 2.3.x, since we need the new DAG listener API.
"apache-airflow>=2.3.0",
}

plugins: Dict[str, Set[str]] = {
Expand All @@ -44,12 +44,13 @@ def get_long_description():
# We remain restrictive on the versions allowed here to prevent
# us from being broken by backwards-incompatible changes in the
# underlying package.
"openlineage-airflow>=1.2.0,<=1.22.0",
"openlineage-airflow>=1.2.0,<=1.25.0",
},
}

# Include datahub-rest in the base requirements.
# Require some plugins by default.
base_requirements.update(plugins["datahub-rest"])
base_requirements.update(plugins["plugin-v2"])


mypy_stubs = {
Expand Down Expand Up @@ -109,6 +110,11 @@ def get_long_description():
"apache-airflow-providers-sqlite",
}
per_version_test_requirements = {
"test-airflow23": {
"pendulum<3.0",
"Flask-Session<0.6.0",
"connexion<3.0",
},
"test-airflow24": {
"pendulum<3.0",
"Flask-Session<0.6.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def get_task_inlets(operator: "Operator") -> List:
return operator._inlets # type: ignore[attr-defined, union-attr]
if hasattr(operator, "get_inlet_defs"):
return operator.get_inlet_defs() # type: ignore[attr-defined]
return operator.inlets
return operator.inlets or []


def get_task_outlets(operator: "Operator") -> List:
Expand All @@ -56,7 +56,7 @@ def get_task_outlets(operator: "Operator") -> List:
return operator._outlets # type: ignore[attr-defined, union-attr]
if hasattr(operator, "get_outlet_defs"):
return operator.get_outlet_defs()
return operator.outlets
return operator.outlets or []


__all__ = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def hookimpl(f: _F) -> _F: # type: ignore[misc] # noqa: F811
"1",
)
_RUN_IN_THREAD_TIMEOUT = float(
os.getenv("DATAHUB_AIRFLOW_PLUGIN_RUN_IN_THREAD_TIMEOUT", 15)
os.getenv("DATAHUB_AIRFLOW_PLUGIN_RUN_IN_THREAD_TIMEOUT", 10)
)
_DATAHUB_CLEANUP_DAG = "Datahub_Cleanup"

Expand Down Expand Up @@ -102,6 +102,7 @@ def get_airflow_plugin_listener() -> Optional["DataHubListener"]:
"capture_tags": plugin_config.capture_tags_info,
"capture_ownership": plugin_config.capture_ownership_info,
"enable_extractors": plugin_config.enable_extractors,
"render_templates": plugin_config.render_templates,
"disable_openlineage_plugin": plugin_config.disable_openlineage_plugin,
},
)
Expand Down
Loading

0 comments on commit e449254

Please sign in to comment.