Skip to content

Commit

Permalink
Teradata Connector (open-metadata#16373)
Browse files Browse the repository at this point in the history
* [WIP] add teradata connector

* [WIP] add teradata ingestion

* [WIP] add teradata connector

* [WIP] add teradata connector

* [WIP] add teradata connector

* [WIP] add teradata connector

* [WIP] add teradata connector

* [WIP] add teradata connector

* Reformat code

* Remove unused databaseName property
  • Loading branch information
gpby authored May 28, 2024
1 parent 80bbe20 commit d909a31
Show file tree
Hide file tree
Showing 17 changed files with 572 additions and 1 deletion.
2 changes: 2 additions & 0 deletions ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
"elasticsearch8": "elasticsearch8~=8.9.0",
"giturlparse": "giturlparse",
"validators": "validators~=0.22.0",
"teradata": "teradatasqlalchemy>=20.0.0.0",
}

COMMONS = {
Expand Down Expand Up @@ -264,6 +265,7 @@
"snowflake": {VERSIONS["snowflake"]},
"superset": {}, # uses requests
"tableau": {VERSIONS["tableau"], VERSIONS["validators"], VERSIONS["packaging"]},
"teradata": {VERSIONS["teradata"]},
"trino": {VERSIONS["trino"]},
"vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5"},
"pii-processor": {
Expand Down
4 changes: 4 additions & 0 deletions ingestion/src/metadata/ingestion/lineage/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@
from metadata.generated.schema.entity.services.connections.database.sqliteConnection import (
SQLiteType,
)
from metadata.generated.schema.entity.services.connections.database.teradataConnection import (
TeradataType,
)
from metadata.utils.singleton import Singleton


Expand Down Expand Up @@ -112,6 +115,7 @@ class Dialect(Enum):
str(SQLiteType.SQLite.value): Dialect.SQLITE,
str(MssqlType.Mssql.value): Dialect.TSQL,
str(AzureSQLType.AzureSQL.value): Dialect.TSQL,
str(TeradataType.Teradata.value): Dialect.TERADATA,
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,17 @@ class ColumnTypeParser:
except ImportError:
pass

try:
# pylint: disable=import-outside-toplevel
from teradatasqlalchemy import BYTE, VARBYTE

_COLUMN_TYPE_MAPPING[BYTE] = "BINARY"
_SOURCE_TYPE_TO_OM_TYPE["BYTE"] = "BINARY"
_COLUMN_TYPE_MAPPING[VARBYTE] = "VARBINARY"
_SOURCE_TYPE_TO_OM_TYPE["VARBYTE"] = "VARBINARY"
except ImportError:
pass

@staticmethod
def get_column_type(column_type: Any) -> str:
for func in [
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Source connection handler
"""
import enum
from typing import Optional
from urllib.parse import quote_plus

from sqlalchemy.engine import Engine

from metadata.generated.schema.entity.automations.workflow import (
Workflow as AutomationWorkflow,
)
from metadata.generated.schema.entity.services.connections.database.teradataConnection import (
TeradataConnection,
)
from metadata.ingestion.connections.builders import (
create_generic_db_connection,
get_connection_args_common,
get_connection_options_dict,
)
from metadata.ingestion.connections.test_connections import test_connection_db_common
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.teradata.queries import TERADATA_GET_DATABASE


def get_connection_url(connection: TeradataConnection) -> str:
"""
Create Teradtaa connection url
"""
url = f"{connection.scheme.value}://{connection.hostPort}/"
url += f"?user={quote_plus(connection.username)}"
if connection.password:
url += f"&password={quote_plus(connection.password.get_secret_value())}"

# add standard options
params = "&".join(
[
f"{key}={quote_plus(str(getattr(connection, key) if not isinstance(getattr(connection, key), enum.Enum) else getattr(connection, key).value))}"
for key in ["account", "logdata", "logmech", "tmode"]
if getattr(connection, key, None)
]
)
url = f"{url}&{params}"

# add additional options if specified
options = get_connection_options_dict(connection)
if options:
params = "&".join(
f"{key}={quote_plus(str(value if not isinstance(value, enum.Enum) else value.value))}"
for (key, value) in options.items()
if value
)
url += f"{url}&{params}"

return url


def get_connection(connection: TeradataConnection) -> Engine:
"""
Create connection
"""
return create_generic_db_connection(
connection=connection,
get_connection_url_fn=get_connection_url,
get_connection_args_fn=get_connection_args_common,
)


def test_connection(
metadata: OpenMetadata,
engine: Engine,
service_connection: TeradataConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
) -> None:
"""
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
queries = {"GetDatabases": TERADATA_GET_DATABASE}

test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
queries=queries,
)
148 changes: 148 additions & 0 deletions ingestion/src/metadata/ingestion/source/database/teradata/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Teradata source implementation.
"""
import traceback
from typing import Iterable, Optional

from teradatasqlalchemy.dialect import TeradataDialect

from metadata.generated.schema.api.data.createStoredProcedure import (
CreateStoredProcedureRequest,
)
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
from metadata.generated.schema.entity.data.storedProcedure import StoredProcedureCode
from metadata.generated.schema.entity.services.connections.database.teradataConnection import (
TeradataConnection,
)
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
StackTraceError,
)
from metadata.generated.schema.metadataIngestion.workflow import (
Source as WorkflowSource,
)
from metadata.generated.schema.type.basic import EntityName
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import InvalidSourceException
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.common_db_source import CommonDbSourceService
from metadata.ingestion.source.database.teradata.models import (
STORED_PROC_LANGUAGE_MAP,
TeradataStoredProcedure,
)
from metadata.ingestion.source.database.teradata.queries import (
TERADATA_GET_STORED_PROCEDURES,
TERADATA_SHOW_STORED_PROCEDURE,
)
from metadata.ingestion.source.database.teradata.utils import get_table_comment
from metadata.utils import fqn
from metadata.utils.logger import ingestion_logger
from metadata.utils.sqlalchemy_utils import get_all_table_comments

logger = ingestion_logger()

TeradataDialect.get_table_comment = get_table_comment
TeradataDialect.get_all_table_comments = get_all_table_comments


class TeradataSource(CommonDbSourceService):
"""
Implements the necessary methods to extract
Database metadata from Teradata Source
"""

@classmethod
def create(
cls, config_dict, metadata: OpenMetadata, pipeline_name: Optional[str] = None
):
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
connection = config.serviceConnection.__root__.config
if not isinstance(connection, TeradataConnection):
raise InvalidSourceException(
f"Expected TeradataConnection, but got {connection}"
)
return cls(config, metadata)

def get_stored_procedures(self) -> Iterable[TeradataStoredProcedure]:
"""List Teradata stored procedures"""
if self.source_config.includeStoredProcedures:
results = self.engine.execute(
TERADATA_GET_STORED_PROCEDURES.format(
schema_name=self.context.get().database_schema,
)
).all()
for row in results:
try:
stored_procedure = TeradataStoredProcedure.parse_obj(dict(row))
stored_procedure.definition = self.describe_procedure_definition(
stored_procedure
)
yield stored_procedure
except Exception as exc:
logger.error()
self.status.failed(
error=StackTraceError(
name=dict(row).get("name", "UNKNOWN"),
error=f"Error parsing Stored Procedure payload: {exc}",
stackTrace=traceback.format_exc(),
)
)

def describe_procedure_definition(
self, stored_procedure: TeradataStoredProcedure
) -> str:
"""
We can only get the SP definition via SHOW PROCEDURE
"""
res = self.engine.execute(
TERADATA_SHOW_STORED_PROCEDURE.format(
schema_name=stored_procedure.database_schema,
procedure_name=stored_procedure.procedure_name,
)
)
return str(res.first()[0])

def yield_stored_procedure(
self, stored_procedure: TeradataStoredProcedure
) -> Iterable[Either[CreateStoredProcedureRequest]]:
"""Prepare the stored procedure payload"""

try:
stored_procedure_request = CreateStoredProcedureRequest(
name=EntityName(__root__=stored_procedure.procedure_name),
description=None,
storedProcedureCode=StoredProcedureCode(
language=STORED_PROC_LANGUAGE_MAP.get(
stored_procedure.procedure_type
),
code=stored_procedure.definition,
),
databaseSchema=fqn.build(
metadata=self.metadata,
entity_type=DatabaseSchema,
service_name=self.context.get().database_service,
database_name=self.context.get().database,
schema_name=stored_procedure.database_schema,
),
)
yield Either(right=stored_procedure_request)
self.register_record_stored_proc_request(stored_procedure_request)

except Exception as exc:
yield Either(
left=StackTraceError(
name=stored_procedure.procedure_name,
error=f"Error yielding Stored Procedure [{stored_procedure.procedure_name}] due to [{exc}]",
stackTrace=traceback.format_exc(),
)
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
Teradata models
"""
from typing import Optional

from pydantic import BaseModel, Field

from metadata.generated.schema.entity.data.storedProcedure import Language

STORED_PROC_LANGUAGE_MAP = {
"SQL": Language.SQL,
"EXTERNAL": Language.External,
}


class TeradataStoredProcedure(BaseModel):
"""Teradata stored procedure list query results"""

procedure_name: str = Field(...)
database_schema: Optional[str] = Field(None)
procedure_type: str = Field(Language.SQL)
definition: str = Field(None)
Loading

0 comments on commit d909a31

Please sign in to comment.