Skip to content

Commit

Permalink
feat(ingest/qlik): Qlik cloud connector integration (datahub-project#…
Browse files Browse the repository at this point in the history
…9682)

Co-authored-by: Harshal Sheth <[email protected]>
  • Loading branch information
shubhamjagtap639 and hsheth2 authored Feb 26, 2024
1 parent 93acb82 commit a1f2216
Show file tree
Hide file tree
Showing 17 changed files with 5,682 additions and 0 deletions.
4 changes: 4 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import mlflowLogo from '../../../../images/mlflowlogo.png';
import dynamodbLogo from '../../../../images/dynamodblogo.png';
import fivetranLogo from '../../../../images/fivetranlogo.png';
import csvLogo from '../../../../images/csv-logo.png';
import qlikLogo from '../../../../images/qliklogo.png';

export const ATHENA = 'athena';
export const ATHENA_URN = `urn:li:dataPlatform:${ATHENA}`;
Expand Down Expand Up @@ -113,6 +114,8 @@ export const FIVETRAN = 'fivetran';
export const FIVETRAN_URN = `urn:li:dataPlatform:${FIVETRAN}`;
export const CSV = 'csv-enricher';
export const CSV_URN = `urn:li:dataPlatform:${CSV}`;
export const QLIK_SENSE = 'qlik-sense';
export const QLIK_SENSE_URN = `urn:li:dataPlatform:${QLIK_SENSE}`;

export const PLATFORM_URN_TO_LOGO = {
[ATHENA_URN]: athenaLogo,
Expand Down Expand Up @@ -149,6 +152,7 @@ export const PLATFORM_URN_TO_LOGO = {
[VERTICA_URN]: verticaLogo,
[FIVETRAN_URN]: fivetranLogo,
[CSV_URN]: csvLogo,
[QLIK_SENSE_URN]: qlikLogo,
};

export const SOURCE_TO_PLATFORM_URN = {
Expand Down
7 changes: 7 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/sources.json
Original file line number Diff line number Diff line change
Expand Up @@ -236,5 +236,12 @@
"displayName": "Other",
"docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
"recipe": "source:\n type: <source-type>\n config:\n # Source-type specifics config\n <source-configs>"
},
{
"urn": "urn:li:dataPlatform:qlik-sense",
"name": "qlik-sense",
"displayName": "Qlik Sense",
"docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/qlik-sense/",
"recipe": "source:\n type: qlik-sense\n config:\n # Coordinates\n tenant_hostname: https://xyz12xz.us.qlikcloud.com\n # Coordinates\n api_key: QLIK_API_KEY\n\n # Optional - filter for certain space names instead of ingesting everything.\n # space_pattern:\n\n # allow:\n # - space_name\n ingest_owner: true"
}
]
Binary file added datahub-web-react/src/images/qliklogo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 23 additions & 0 deletions metadata-ingestion/docs/sources/qlik-sense/qlik-sense_pre.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
## Integration Details

This source extracts the following:

- Accessible spaces and apps within that spaces as Container.
- Qlik Datasets as Datahub Datasets with schema metadata.
- Sheets as Datahub dashboard and charts present inside sheets.

## Configuration Notes

1. Refer [doc](https://qlik.dev/authenticate/api-key/generate-your-first-api-key/) to generate an API key from the hub.
2. Get tenant hostname from About tab after login to qlik sense account.

## Concept mapping

| Qlik Sense | Datahub | Notes |
|------------------------|---------------------------------------------------------------|----------------------------------|
| `Space` | [Container](../../metamodel/entities/container.md) | SubType `"Qlik Space"` |
| `App` | [Container](../../metamodel/entities/container.md) | SubType `"Qlik App"` |
| `Sheet` | [Dashboard](../../metamodel/entities/dashboard.md) | |
| `Chart` | [Chart](../../metamodel/entities/chart.md) | |
| `Dataset` | [Dataset](../../metamodel/entities/dataset.md) | SubType `"Qlik Dataset"` |
| `User` | [User (a.k.a CorpUser)](../../metamodel/entities/corpuser.md) | Optionally Extracted |
25 changes: 25 additions & 0 deletions metadata-ingestion/docs/sources/qlik-sense/qlik-sense_recipe.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
source:
type: qlik-sense
config:
# Coordinates
tenant_hostname: "xyz12xz.us.qlikcloud.com"
# Credentials
api_key: "QLIK_API_KEY"

# Optional - filter for certain space names instead of ingesting everything.
# Mention 'personal_space' if entities of personal space need to ingest
# space_pattern:
# allow:
# - space_name

ingest_owner: true

# Optional -- This mapping is optional and only required to configure platform-instance for Qlik app dataset upstream source tables
# A mapping of the Qlik app dataset upstream tables from data connection to platform instance. Use 'data_connection_name' as key.
# data_connection_to_platform_instance:
# data_connection_name:
# platform_instance: cloud_instance
# env: DEV

sink:
# sink configs
3 changes: 3 additions & 0 deletions metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@
# databricks is alias for unity-catalog and needs to be kept in sync
"databricks": databricks | sql_common | sqllineage_lib,
"fivetran": snowflake_common | bigquery_common,
"qlik-sense": sqlglot_lib | {"requests", "websocket-client"},
}

# This is mainly used to exclude plugins from the Docker image.
Expand Down Expand Up @@ -521,6 +522,7 @@
"mode",
"fivetran",
"kafka-connect",
"qlik-sense",
]
if plugin
for dependency in plugins[plugin]
Expand Down Expand Up @@ -625,6 +627,7 @@
"gcs = datahub.ingestion.source.gcs.gcs_source:GCSSource",
"sql-queries = datahub.ingestion.source.sql_queries:SqlQueriesSource",
"fivetran = datahub.ingestion.source.fivetran.fivetran:FivetranSource",
"qlik-sense = datahub.ingestion.source.qlik_sense.qlik_sense:QlikSenseSource",
],
"datahub.ingestion.transformer.plugins": [
"simple_remove_dataset_ownership = datahub.ingestion.transformer.remove_dataset_ownership:SimpleRemoveDatasetOwnership",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class DatasetSubTypes(str, Enum):
SALESFORCE_CUSTOM_OBJECT = "Custom Object"
SALESFORCE_STANDARD_OBJECT = "Object"
POWERBI_DATASET_TABLE = "PowerBI Dataset Table"
QLIK_DATASET = "Qlik Dataset"
BIGQUERY_TABLE_SNAPSHOT = "Bigquery Table Snapshot"

# TODO: Create separate entity...
Expand All @@ -40,6 +41,8 @@ class BIContainerSubTypes(str, Enum):
TABLEAU_WORKBOOK = "Workbook"
POWERBI_WORKSPACE = "Workspace"
POWERBI_DATASET = "PowerBI Dataset"
QLIK_SPACE = "Qlik Space"
QLIK_APP = "Qlik App"


class BIAssetSubTypes(str, Enum):
Expand Down
Empty file.
130 changes: 130 additions & 0 deletions metadata-ingestion/src/datahub/ingestion/source/qlik_sense/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import logging
from dataclasses import dataclass
from typing import Dict, Optional

import pydantic

from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.source_common import (
EnvConfigMixin,
PlatformInstanceConfigMixin,
)
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityRemovalSourceReport,
)
from datahub.ingestion.source.state.stateful_ingestion_base import (
StatefulIngestionConfigBase,
)

logger = logging.getLogger(__name__)

QLIK_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"


class Constant:
"""
keys used in qlik plugin
"""

# Rest API response key constants
DATA = "data"
ID = "id"
NAME = "name"
TYPE = "type"
ITEMID = "itemId"
NEXT = "next"
LINKS = "links"
HREF = "href"
DATASETTYPE = "datasetType"
CREATEDAT = "createdAt"
UPDATEDAT = "updatedAt"
SECUREQRI = "secureQri"
QRI = "qri"
SPACEID = "spaceId"
SPACE = "space"
CREATEDTIME = "createdTime"
LASTMODIFIEDTIME = "lastModifiedTime"
OPERATIONAL = "operational"
SIZE = "size"
ROWCOUNT = "rowCount"
DATATYPE = "dataType"
PRIMARYKEY = "primaryKey"
NULLABLE = "nullable"
SCHEMA = "schema"
DATAFIELDS = "dataFields"
RESOURCETYPE = "resourceType"
USAGE = "usage"
CREATEDDATE = "createdDate"
MODIFIEDDATE = "modifiedDate"
RESOURCEID = "resourceId"
DATASETSCHEMA = "datasetSchema"
GRAPH = "graph"
NODES = "nodes"
RESOURCES = "resources"
LINEAGE = "lineage"
TABLELABEL = "tableLabel"
TABLEQRI = "tableQRI"
OWNERID = "ownerId"
# Websocket response key constants
QID = "qId"
RESULT = "result"
QRETURN = "qReturn"
QTYPE = "qType"
QHANDLE = "qHandle"
QLAYOUT = "qLayout"
QMETA = "qMeta"
QCHILDLIST = "qChildList"
QITEMS = "qItems"
QINFO = "qInfo"
QLIST = "qList"
CONNECTORPROPERTIES = "connectorProperties"
TABLEQUALIFIERS = "tableQualifiers"
CONNECTIONINFO = "connectionInfo"
SOURCECONNECTORID = "sourceConnectorID"
DATABASENAME = "databaseName"
SCHEMANAME = "schemaName"
TABLES = "tables"
DATACONNECTORID = "dataconnectorid"
DATACONNECTORNAME = "dataconnectorName"
DATACONNECTORPLATFORM = "dataconnectorPlatform"
# Item type
APP = "app"
DATASET = "dataset"
# Personal entity constants
PERSONAL_SPACE_ID = "personal-space-id"
PERSONAL_SPACE_NAME = "personal_space"


@dataclass
class QlikSourceReport(StaleEntityRemovalSourceReport):
number_of_spaces: int = 0

def report_number_of_spaces(self, number_of_spaces: int) -> None:
self.number_of_spaces = number_of_spaces


class PlatformDetail(PlatformInstanceConfigMixin, EnvConfigMixin):
pass


class QlikSourceConfig(
StatefulIngestionConfigBase, PlatformInstanceConfigMixin, EnvConfigMixin
):
tenant_hostname: str = pydantic.Field(description="Qlik Tenant hostname")
api_key: str = pydantic.Field(description="Qlik API Key")
# Qlik space identifier
space_pattern: AllowDenyPattern = pydantic.Field(
default=AllowDenyPattern.allow_all(),
description="Regex patterns to filter Qlik spaces in ingestion."
"Mention 'personal_space' if entities of personal space need to ingest",
)
ingest_owner: Optional[bool] = pydantic.Field(
default=True,
description="Ingest Owner from source. This will override Owner info entered from UI",
)
# Qlik app dataset upstream tables from data connection to platform instance mapping
data_connection_to_platform_instance: Dict[str, PlatformDetail] = pydantic.Field(
default={},
description="A mapping of the Qlik app dataset upstream tables from data connection to platform instance."
"Use 'data_connection_name' as key.",
)
Loading

0 comments on commit a1f2216

Please sign in to comment.