From 2b42b29d2fbfb12cfb68a0578b63993bcd182c07 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 4 Dec 2024 04:07:09 -0500 Subject: [PATCH] fix(ingest/tableau): make `sites.get_by_id` call optional (#12024) --- .../ingestion/source/tableau/tableau.py | 34 ++++++++++++++----- .../tableau/test_tableau_ingest.py | 2 ++ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index f3ad5ea706f7ca..197e73dca7141b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -68,6 +68,7 @@ CapabilityReport, MetadataWorkUnitProcessor, Source, + StructuredLogLevel, TestableSource, TestConnectionReport, ) @@ -696,6 +697,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: config=self.config, ctx=self.ctx, site=site, + site_id=site.id, report=self.report, server=self.server, platform=self.platform, @@ -703,11 +705,19 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: logger.info(f"Ingesting assets of site '{site.content_url}'.") yield from site_source.ingest_tableau_site() else: - site = self.server.sites.get_by_id(self.server.site_id) + site = None + with self.report.report_exc( + title="Unable to fetch site details. Site hierarchy may be incomplete and external urls may be missing.", + message="This usually indicates missing permissions. Ensure that you have all necessary permissions.", + level=StructuredLogLevel.WARN, + ): + site = self.server.sites.get_by_id(self.server.site_id) + site_source = TableauSiteSource( config=self.config, ctx=self.ctx, site=site, + site_id=self.server.site_id, report=self.report, server=self.server, platform=self.platform, @@ -740,7 +750,8 @@ def __init__( self, config: TableauConfig, ctx: PipelineContext, - site: SiteItem, + site: Optional[SiteItem], + site_id: Optional[str], report: TableauSourceReport, server: Server, platform: str, @@ -749,9 +760,16 @@ def __init__( self.report = report self.server: Server = server self.ctx: PipelineContext = ctx - self.site: SiteItem = site self.platform = platform + self.site: Optional[SiteItem] = site + if site_id is not None: + self.site_id: str = site_id + else: + assert self.site is not None, "site or site_id is required" + assert self.site.id is not None, "site_id is required when site is provided" + self.site_id = self.site.id + self.database_tables: Dict[str, DatabaseTable] = {} self.tableau_stat_registry: Dict[str, UsageStat] = {} self.tableau_project_registry: Dict[str, TableauProject] = {} @@ -805,7 +823,7 @@ def dataset_browse_prefix(self) -> str: def _re_authenticate(self): tableau_auth: Union[ TableauAuth, PersonalAccessTokenAuth - ] = self.config.get_tableau_auth(self.site.content_url) + ] = self.config.get_tableau_auth(self.site_id) self.server.auth.sign_in(tableau_auth) @property @@ -3189,10 +3207,10 @@ def emit_project_in_topological_order( else: # This is a root Tableau project since the parent_project_id is None. # For a root project, either the site is the parent, or the platform is the default parent. - if self.config.add_site_container and self.site and self.site.id: + if self.config.add_site_container: # The site containers have already been generated by emit_site_container, so we # don't need to emit them again here. - parent_project_key = self.gen_site_key(self.site.id) + parent_project_key = self.gen_site_key(self.site_id) yield from gen_containers( container_key=project_key, @@ -3209,12 +3227,12 @@ def emit_project_in_topological_order( yield from emit_project_in_topological_order(project) def emit_site_container(self): - if not self.site or not self.site.id: + if not self.site: logger.warning("Can not ingest site container. No site information found.") return yield from gen_containers( - container_key=self.gen_site_key(self.site.id), + container_key=self.gen_site_key(self.site_id), name=self.site.name or "Default", sub_types=[c.SITE], ) diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 6c45b8a47de412..38a53b323876d1 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -1028,6 +1028,7 @@ def check_lineage_metadata( ctx=context, platform="tableau", site=SiteItem(name="Site 1", content_url="site1"), + site_id="site1", report=TableauSourceReport(), server=Server("https://test-tableau-server.com"), ) @@ -1248,6 +1249,7 @@ def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_gra config=mock.MagicMock(), ctx=mock.MagicMock(), site=mock.MagicMock(), + site_id=None, server=mock_sdk.return_value, report=reporter, )