From 6b11480b27e4a344cb61f79bbb2b1689371deb1e Mon Sep 17 00:00:00 2001 From: KNagaVivek <79193329+KNagaVivek@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:34:51 +0530 Subject: [PATCH 1/3] Create watsonx-presto-config.md --- .../resource-configs/watsonx-presto-config.md | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 website/docs/reference/resource-configs/watsonx-presto-config.md diff --git a/website/docs/reference/resource-configs/watsonx-presto-config.md b/website/docs/reference/resource-configs/watsonx-presto-config.md new file mode 100644 index 00000000000..42111a2fb47 --- /dev/null +++ b/website/docs/reference/resource-configs/watsonx-presto-config.md @@ -0,0 +1,113 @@ +--- +title: "IBM watsonx.data Presto configurations" +id: "watsonx-presto-configs" +--- + +## Instance requirements + +To use IBM watsonx.data Presto(java) with dbt, ensure the instance has an attached catalog that allows creating, renaming, altering, and dropping objects such as tables and views. The user connecting to the instance with dbt must have equivalent permissions for the target catalog. + +## Session properties + +With IBM watsonx.data SaaS/Software, or Presto instance, you can [set session properties](https://prestodb.io/docs/current/sql/set-session.html) to modify the current configuration for your user session. + +To temporarily adjust session properties for a specific dbt model or a group of models, use a [dbt hook](/reference/resource-configs/pre-hook-post-hook). For example: + +```sql +{{ + config( + pre_hook="set session query_max_run_time='10m'" + ) +}} +``` + +## Connector properties + +IBM watsonx.data SaaS/Software and Presto support various connector properties to manage how your data is represented. These properties are particularly useful for file-based connectors like Hive. + +For information on what is supported for each data source, refer to one of the following resources: +- [Presto Connectors](https://prestodb.io/docs/current/connector.html) +- [watsonx.data SaaS Catalog](https://cloud.ibm.com/docs/watsonxdata?topic=watsonxdata-reg_database) +- [watsonx.data Software Catalog](https://www.ibm.com/docs/en/watsonx/watsonxdata/1.1.x?topic=components-adding-database-catalog-pair) + + +### Hive catalogs + +When using the Hive connector, ensure the following settings are configured. These settings are crucial for enabling frequently executed operations like `DROP` and `RENAME` in dbt: + +```java +hive.metastore-cache-ttl=0s +hive.metastore-refresh-interval=5s +hive.allow-drop-table=true +hive.allow-rename-table=true + +``` + +## File format configuration + +For file-based connectors, such as Hive, you can customize table materialization and data formats. For example, to create a partitioned [Parquet](https://spark.apache.org/docs/latest/sql-data-sources-parquet.html) table: + +```sql +{{ + config( + materialized='table', + properties={ + "format": "'PARQUET'", + "partitioning": "ARRAY['bucket(id, 2)']", + } + ) +}} +``` + +## Seeds and prepared statements +The `dbt-watsonx-presto` adapter offers comprehensive support for all [Presto datatypes](https://prestodb.io/docs/current/language/types.html) and [watsonx.data Presto datatypes](https://www.ibm.com/support/pages/node/7157339) in seed files. However, to utilize this feature, you need to explicitly define the data types for each column in the `dbt_project.yml` file. + +To configure column data types, update your `/dbt_project.yml` file as follows: + +```sh +seeds: + : + : + +column_types: + : + : +``` +This ensures that dbt correctly interprets and applies the specified data types when loading seed data into your watsonx.data Presto instances. + + +## Materializations +### Table + +The `dbt-watsonx-presto` adapter helps you create and update tables through table materialization, making it easier to work with data in watsonx.data Presto. + +#### Recommendations +- **Check Permissions:** Ensure that the necessary permissions for table creation are enabled in the catalog or schema. +- **Check Connector Documentation:** Review Presto [connector’s documentation](https://prestodb.io/docs/current/connector.html) or watsonx.data Presto [sql statement support](https://www.ibm.com/support/pages/node/7157339) to ensure it supports table creation and modification. + +#### Limitations with Some Connectors +Certain watsonx.data Presto connectors, particularly read-only ones or those with restricted permissions, do not allow creating or modifying tables. If you attempt to use table materialization with these connectors, you may encounter an error like: + +```sh +PrestoUserError(type=USER_ERROR, name=NOT_SUPPORTED, message="This connector does not support creating tables with data", query_id=20241206_071536_00026_am48r) +``` + +### View + +The `dbt-watsonx-presto` adapter supports creating views using the `materialized='view'` configuration in your dbt model. By default, when you set the materialization to view, it creates a view in watsonx.data Presto. + +```sql +{{ + config( + materialized='view', + ) +}} +``` + +For more details, refer to the watsonx.data [sql statement support](https://www.ibm.com/support/pages/node/7157339) or Presto [connector documentation](https://prestodb.io/docs/current/connector.html) to verify whether your connector supports view creation. + + +### Unsupported Features +The following features are not supported by the `dbt-watsonx-presto` adapter +- Incremental Materialization +- Materialized Views +- Snapshots From f0a76761fdae341c7ad6ee0e796ed241f8d86432 Mon Sep 17 00:00:00 2001 From: KNagaVivek <79193329+KNagaVivek@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:47:46 +0530 Subject: [PATCH 2/3] Create watsonx-presto-setup.md --- .../watsonx-presto-setup.md | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 website/docs/docs/core/connect-data-platform/watsonx-presto-setup.md diff --git a/website/docs/docs/core/connect-data-platform/watsonx-presto-setup.md b/website/docs/docs/core/connect-data-platform/watsonx-presto-setup.md new file mode 100644 index 00000000000..6bf4ca61a2b --- /dev/null +++ b/website/docs/docs/core/connect-data-platform/watsonx-presto-setup.md @@ -0,0 +1,105 @@ +--- +title: "IBM watsonx.data Presto setup" +description: "Read this guide to learn about the IBM watsonx.data Presto setup in dbt." +id: "watsonx-presto setup" +meta: + maintained_by: IBM + authors: Karnati Naga Vivek, Hariharan Ashokan, Biju Palliyath, Gopikrishnan Varadarajulu, Rohan Pednekar + github_repo: 'IBM/dbt-watsonx-presto' + pypi_package: 'dbt-watsonx-presto' + min_core_version: v1.8.0 + cloud_support: 'Not Supported' + min_supported_version: 'n/a' + slack_channel_name: + slack_channel_link: + platform_name: IBM watsonx.data + config_page: /reference/resource-configs/watsonx-presto-config +--- + +The dbt-watsonx-presto adapter allows you to use dbt to transform and manage data on IBM watsonx.data Presto(Java), leveraging its distributed SQL query engine capabilities. The configuration and connection setup described here are also applicable to open-source Presto. Before proceeding, ensure you have the following: +
    +
  • An active IBM watsonx.data Presto(Java) Engine with connection details (host, port, catalog, schema) in SaaS/Software.
  • +
  • Authentication Credentials: Username and password/apikey.
  • +
  • For watsonx.data instances, SSL verification is required for secure connections. If the instance host uses HTTPS, there is no need to specify the SSL certificate parameter. However, if the instance host uses an unsecured HTTP connection, ensure you provide the path to the SSL certificate file.
  • +
+Refer to the Configuring dbt-watsonx-presto section for guidance on obtaining and organizing these details. + + + + +import SetUpPages from '/snippets/_setup-pages-intro.md'; + + + + +## Connecting to IBM watsonx.data Presto + +To connect dbt with watsonx.data Presto(java), you need to configure a profile in your `profiles.yml` file located in the `.dbt/` directory of your home folder. The following is an example configuration for connecting to IBM watsonx.data SaaS and Software instances: + + + +```yaml +my_project: + outputs: + software: + type: presto + method: BasicAuth + user: [user] + password: [password] + host: [hostname] + database: [database name] + schema: [your dbt schema] + port: [port number] + threads: [1 or more] + ssl_verify: path/to/certificate + + saas: + type: presto + method: BasicAuth + user: [user] + password: [api_key] + host: [hostname] + database: [database name] + schema: [your dbt schema] + port: [port number] + threads: [1 or more] + + target: software + +``` + + + +## Host parameters + +The following profile fields are required for configuring watsonx.data Presto(java) connections. Currently, it supports only the `BasicAuth` authentication method. For IBM watsonx.data SaaS or Software instances, You can get the hostname and port details by clicking View connect details inside the Presto(java) engine details page. + +| Option | Required/Optional | Description | Example | +| --------- | ------- | ------- | ----------- | +| `method` | Required (default value is none) | Authentication method for Presto | `None` or `BasicAuth` | +| `user` | Required | Username or email for authentication. | `user` | +| `password`| Required (if `method` is `BasicAuth`) | Password or API key for authentication | `password` | +| `host` | Required | Hostname for connecting to Presto. | `127.0.0.1` | +| `database`| Required | The catalog name in your presto instance. | `Analytics` | +| `schema` | Required | The schema name within your presto instance catalog. | `my_schema` | +| `port` | Required | Port for connecting to Presto. | `443` | +| ssl_verify | Optional (default: **true**) | Specifies the path to the SSL certificate or a boolean value. The SSL certificate path is required if the watsonx.data instance is not secure (HTTP).| `path/to/certificate` or `true` | + + +### Schemas and databases +When selecting the catalog and the schema, make sure the user has read and write access to both. This selection does not limit your ability to query the catalog. Instead, they serve as the default location for where tables and views are materialized. In addition, the Presto connector used in the catalog must support creating tables. This default can be changed later from within your dbt project. + +### SSL Verification +- If the Presto instance uses an unsecured HTTP connection, you must set `ssl_verify` to the path of the SSL certificate file. +- If the instance uses `HTTPS`, this parameter is not required and can be omitted. + +## Additional parameters + +The following profile fields are optional to set up. They let you configure your instance session and dbt for your connection. + + +| Profile field | Description | Example | +| ----------------------------- | ----------------------------------------------------------------------------------------------------------- | ------------------------------------ | +| `threads` | How many threads dbt should use (default is `1`) | `8` | +| `http_headers` | HTTP headers to send alongside requests to Presto, specified as a yaml dictionary of (header, value) pairs. | `X-Presto-Routing-Group: my-instance` | +| `http_scheme` | The HTTP scheme to use for requests to (default: `http`, or `https` if `BasicAuth`) | `https` or `http` | From cbc72cbd89ef36bc8595c834b24c3aa26f2525b3 Mon Sep 17 00:00:00 2001 From: KNagaVivek <79193329+KNagaVivek@users.noreply.github.com> Date: Thu, 9 Jan 2025 09:41:50 +0530 Subject: [PATCH 3/3] Update sidebars --- website/sidebars.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/website/sidebars.js b/website/sidebars.js index 3a8f560c297..00850689b31 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -252,6 +252,7 @@ const sidebarSettings = { "docs/core/connect-data-platform/tidb-setup", "docs/core/connect-data-platform/upsolver-setup", "docs/core/connect-data-platform/vertica-setup", + "docs/core/connect-data-platform/watsonx-presto-setup", "docs/core/connect-data-platform/yellowbrick-setup", ], }, @@ -897,6 +898,7 @@ const sidebarSettings = { "reference/resource-configs/teradata-configs", "reference/resource-configs/upsolver-configs", "reference/resource-configs/vertica-configs", + "reference/resource-configs/watsonx-presto-config", "reference/resource-configs/yellowbrick-configs", ], },