Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Jul 9, 2024
2 parents 8b5ec06 + b6c7fe8 commit 98a82b8
Show file tree
Hide file tree
Showing 88 changed files with 6,741 additions and 2,106 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/pr-labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ jobs:
"RyanHolstien",
"Kunal-kankriya",
"purnimagarg1",
"gaurav2733",
"dushayntAW",
"AvaniSiddhapuraAPT",
"akarsh991",
"shubhamjagtap639",
"mayurinehate"
"sagar-salvi-apptware",
"kushagra-apptware",
"Salman-Apptware",
"mayurinehate",
"noggi"
]'),
github.actor
)
Expand Down
5 changes: 5 additions & 0 deletions docs-website/docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ module.exports = {
async: true,
defer: true,
},
{
src: "/scripts/rb2b.js",
async: true,
defer: true,
}
],
noIndex: isSaas,
customFields: {
Expand Down
1 change: 1 addition & 0 deletions docs-website/static/scripts/rb2b.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/actions/actions/slack.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ Similar to the quickstart scenario, there are no specific software installation
If you are using the `datahub-actions` library directly from Python, or the `datahub-actions` cli directly, then you need to first install the `slack` action plugin in your Python virtualenv.

```
pip install "datahub-actions[slack]"
pip install "acryl-datahub-actions[slack]"
```
Then run the action with a configuration file that you have modified to capture your credentials and configuration.
Expand Down
2 changes: 1 addition & 1 deletion docs/actions/actions/teams.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ Similar to the quickstart scenario, there are no specific software installation
If you are using the `datahub-actions` library directly from Python, or the `datahub-actions` cli directly, then you need to first install the `teams` action plugin in your Python virtualenv.

```
pip install "datahub-actions[teams]"
pip install "acryl-datahub-actions[teams]"
```

Then run the action with a configuration file that you have modified to capture your credentials and configuration.
Expand Down
6 changes: 4 additions & 2 deletions metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@
"sql-metadata==2.2.2",
*sqllineage_lib,
"GitPython>2",
"python-liquid",
}

bigquery_common = {
Expand Down Expand Up @@ -343,6 +344,7 @@
"feast": {
"feast>=0.34.0,<1",
"flask-openid>=1.3.0",
"dask[dataframe]<2024.7.0",
},
"glue": aws_common,
# hdbcli is supported officially by SAP, sqlalchemy-hana is built on top but not officially supported
Expand Down Expand Up @@ -370,7 +372,7 @@
"kafka-connect": sql_common | {"requests", "JPype1"},
"ldap": {"python-ldap>=2.4"},
"looker": looker_common,
"lookml": looker_common,
"lookml": looker_common | sqlglot_lib,
"metabase": {"requests"} | sqlglot_lib,
"mlflow": {
"mlflow-skinny>=2.3.0",
Expand Down Expand Up @@ -511,7 +513,7 @@
"flake8-tidy-imports>=4.3.0",
"flake8-bugbear==23.3.12",
"isort>=5.7.0",
"mypy==1.0.0",
"mypy==1.10.1",
*test_api_requirements,
pytest_dep,
"pytest-asyncio>=0.16.0",
Expand Down
23 changes: 22 additions & 1 deletion metadata-ingestion/src/datahub/cli/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@
from requests.models import Response
from requests.sessions import Session

import datahub
from datahub.cli import config_utils
from datahub.emitter.aspect import ASPECT_MAP, TIMESERIES_ASPECT_MAP
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.request_helper import make_curl_command
from datahub.emitter.serialization_helper import post_json_transform
from datahub.metadata.schema_classes import _Aspect
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
MetadataChangeEvent,
MetadataChangeProposal,
)
from datahub.metadata.schema_classes import SystemMetadataClass, _Aspect
from datahub.utilities.urns.urn import Urn, guess_entity_type

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -689,3 +695,18 @@ def generate_access_token(
return token_name, response.json().get("data", {}).get("createAccessToken", {}).get(
"accessToken", None
)


def ensure_has_system_metadata(
event: Union[
MetadataChangeProposal, MetadataChangeProposalWrapper, MetadataChangeEvent
]
) -> None:
if event.systemMetadata is None:
event.systemMetadata = SystemMetadataClass()
metadata = event.systemMetadata
if metadata.properties is None:
metadata.properties = {}
props = metadata.properties
props["clientId"] = datahub.__package_name__
props["clientVersion"] = datahub.__version__
20 changes: 12 additions & 8 deletions metadata-ingestion/src/datahub/emitter/rest_emitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
from requests.adapters import HTTPAdapter, Retry
from requests.exceptions import HTTPError, RequestException

from datahub.cli.cli_utils import fixup_gms_url, get_system_auth
from datahub.cli.cli_utils import (
ensure_has_system_metadata,
fixup_gms_url,
get_system_auth,
)
from datahub.configuration.common import ConfigurationError, OperationalError
from datahub.emitter.generic_emitter import Emitter
from datahub.emitter.mcp import MetadataChangeProposalWrapper
Expand Down Expand Up @@ -228,12 +232,10 @@ def emit_mce(self, mce: MetadataChangeEvent) -> None:
snapshot_fqn = (
f"com.linkedin.metadata.snapshot.{mce.proposedSnapshot.RECORD_SCHEMA.name}"
)
system_metadata_obj = {}
if mce.systemMetadata is not None:
system_metadata_obj = {
"lastObserved": mce.systemMetadata.lastObserved,
"runId": mce.systemMetadata.runId,
}
ensure_has_system_metadata(mce)
# To make lint happy
assert mce.systemMetadata is not None
system_metadata_obj = mce.systemMetadata.to_obj()
snapshot = {
"entity": {"value": {snapshot_fqn: mce_obj}},
"systemMetadata": system_metadata_obj,
Expand All @@ -246,7 +248,7 @@ def emit_mcp(
self, mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper]
) -> None:
url = f"{self._gms_server}/aspects?action=ingestProposal"

ensure_has_system_metadata(mcp)
mcp_obj = pre_json_transform(mcp.to_obj())
payload = json.dumps({"proposal": mcp_obj})

Expand All @@ -256,6 +258,8 @@ def emit_mcps(
self, mcps: List[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]]
) -> None:
url = f"{self._gms_server}/aspects?action=ingestProposalBatch"
for mcp in mcps:
ensure_has_system_metadata(mcp)

mcp_objs = [pre_json_transform(mcp.to_obj()) for mcp in mcps]
payload = json.dumps({"proposals": mcp_objs})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ def get_fields_from_schema(
logger.error(
"Failed to get fields from schema, continuing...", exc_info=e
)
return []
return
else:
raise
json_type = cls._get_type_from_schema(jsonref_schema_dict)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ def _query_project_list(self) -> Iterable[BigqueryProject]:
"Maybe resourcemanager.projects.get permission is missing for the service account. "
"You can assign predefined roles/bigquery.metadataViewer role to your service account.",
)
return []
return

for project in projects:
if self.config.project_id_pattern.allowed(project.id):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def _get_dpi_workunits(
f"Status should be either SUCCESSFUL, FAILURE_WITH_TASK or CANCELED and it was "
f"{job.status}"
)
return []
return
result = status_result_map[job.status]
start_timestamp_millis = job.start_time * 1000
for mcp in dpi.generate_mcp(
Expand Down
Loading

0 comments on commit 98a82b8

Please sign in to comment.