Support connecting to HiveServer2 with ZooKeeper Service Discovery en…

…abled in GraalVM Native Image
apache · Nov 22, 2024 · 5b8bed2 · 5b8bed2
1 parent fa2c403
commit 5b8bed2
Show file tree

Hide file tree

Showing 10 changed files with 566 additions and 30 deletions.
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -23,6 +23,7 @@
 1. DistSQL: Check inline expression when create sharding table rule with inline sharding algorithm - [#33735](https://github.com/apache/shardingsphere/pull/33735)
 1. Infra: Support setting `hive_conf_list`, `hive_var_list` and `sess_var_list` for jdbcURL when connecting to HiveServer2 - [#33749](https://github.com/apache/shardingsphere/pull/33749)
 1. Infra: Support connecting to HiveServer2 through database connection pools other than HikariCP - [#33762](https://github.com/apache/shardingsphere/pull/33762)
+1. Proxy Native: Support connecting to HiveServer2 with ZooKeeper Service Discovery enabled in GraalVM Native Image - [#33768](https://github.com/apache/shardingsphere/pull/33768)
 
 ### Bug Fixes
 

diff --git a/...ntent/user-manual/shardingsphere-jdbc/optional-plugins/hiveserver2/_index.cn.md b/...ntent/user-manual/shardingsphere-jdbc/optional-plugins/hiveserver2/_index.cn.md
@@ -94,12 +94,6 @@ services:
       SERVICE_NAME: hiveserver2
     ports:
       - "10000:10000"
-    expose:
-      - 10002
-    volumes:
-      - warehouse:/opt/hive/data/warehouse
-volumes:
-  warehouse:
 ```
 
 ### 创建业务表
@@ -113,7 +107,8 @@ sudo snap install dbeaver-ce
 snap run dbeaver-ce
 ```
 
-在 DBeaver Community 内使用 `jdbc:hive2://localhost:10000/` 的 `jdbcUrl` 连接至 HiveServer2，`username` 和 `password` 留空。
+在 DBeaver Community 内，使用 `jdbc:hive2://localhost:10000/` 的 `jdbcUrl` 连接至 HiveServer2，`username` 和 `password` 留空。
+执行如下 SQL，
 
 ```sql
 -- noinspection SqlNoDataSourceInspectionForFile
@@ -297,3 +292,149 @@ HiveServer2 不支持 ShardingSphere 集成级别的本地事务，XA 事务或
 
 当用户使用 DBeaver Community 连接至 HiveServer2 时，需确保 DBeaver Community 版本大于或等于 `24.2.5`。
 参考 https://github.com/dbeaver/dbeaver/pull/35059 。
+
+### 连接至开启 ZooKeeper Service Discovery 的 HiveServer2 的限制
+
+当前的确支持在 ShardingSphere 配置文件中的 `jdbcUrl` 配置连接至开启 ZooKeeper Service Discovery 的 HiveServer2，但存在限制。
+
+引入讨论，假设存在如下 Docker Compose 文件来启动开启 ZooKeeper Service Discovery 的 HiveServer2。
+
+```yaml
+services:
+  zookeeper:
+    image: zookeeper:3.9.3-jre-17
+    ports:
+      - "2181:2181"
+  apache-hive-1:
+    image: apache/hive:4.0.1
+    depends_on:
+      - zookeeper
+    environment:
+      SERVICE_NAME: hiveserver2
+      SERVICE_OPTS: >-
+        -Dhive.server2.support.dynamic.service.discovery=true
+        -Dhive.zookeeper.quorum=zookeeper:2181
+        -Dhive.server2.thrift.bind.host=0.0.0.0
+        -Dhive.server2.thrift.port=10000
+    ports:
+      - "10000:10000"
+  apache-hive-2:
+    image: apache/hive:4.0.1
+    depends_on:
+      - zookeeper
+    environment:
+      SERVICE_NAME: hiveserver2
+      SERVICE_OPTS: >-
+        -Dhive.server2.support.dynamic.service.discovery=true
+        -Dhive.zookeeper.quorum=zookeeper:2181
+        -Dhive.server2.thrift.bind.host=0.0.0.0
+        -Dhive.server2.thrift.port=20000
+    ports:
+      - "20000:20000"
+```
+
+此时，对于 ZooKeeper Server 中的`/hiveserver2/serverUri=0.0.0.0:10000;version=4.0.1;sequence=0000000000` 节点，
+存在值为`hive.server2.instance.uri=0.0.0.0:10000;hive.server2.authentication=NONE;hive.server2.transport.mode=binary;hive.server2.thrift.sasl.qop=auth;hive.server2.thrift.bind.host=0.0.0.0;hive.server2.thrift.port=10000;hive.server2.use.SSL=false`。
+对于 ZooKeeper Server 中的`/hiveserver2/serverUri=0.0.0.0:20000;version=4.0.1;sequence=0000000001` 节点，
+存在值为`hive.server2.instance.uri=0.0.0.0:20000;hive.server2.authentication=NONE;hive.server2.transport.mode=binary;hive.server2.thrift.sasl.qop=auth;hive.server2.thrift.bind.host=0.0.0.0;hive.server2.thrift.port=20000;hive.server2.use.SSL=false`。
+
+在 DBeaver Community 内，
+使用 `jdbc:hive2://127.0.0.1:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2` 的 `jdbcUrl` 连接至 HiveServer2，
+`username` 和 `password` 留空。
+执行如下 SQL，
+
+```sql
+-- noinspection SqlNoDataSourceInspectionForFile
+CREATE DATABASE demo_ds_0;
+CREATE DATABASE demo_ds_1;
+CREATE DATABASE demo_ds_2;
+```
+
+分别使用 `jdbc:hive2://127.0.0.1:2181/demo_ds_0;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2` ，
+`jdbc:hive2://127.0.0.1:2181/demo_ds_1;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2` 和
+`jdbc:hive2://127.0.0.1:2181/demo_ds_2;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2`
+的 `jdbcUrl` 连接至 HiveServer2 来执行如下 SQL，
+
+```sql
+-- noinspection SqlNoDataSourceInspectionForFile
+set iceberg.mr.schema.auto.conversion=true;
+
+CREATE TABLE IF NOT EXISTS t_order
+(
+    order_id   BIGINT,
+    order_type INT,
+    user_id    INT    NOT NULL,
+    address_id BIGINT NOT NULL,
+    status     VARCHAR(50),
+    PRIMARY KEY (order_id) disable novalidate
+) STORED BY ICEBERG STORED AS ORC TBLPROPERTIES ('format-version' = '2');
+
+TRUNCATE TABLE t_order;
+```
+
+在业务项目引入`前提条件`涉及的依赖后，在业务项目的 classpath 上编写 ShardingSphere 数据源的配置文件`demo.yaml`，
+
+```yaml
+dataSources:
+    ds_0:
+        dataSourceClassName: com.zaxxer.hikari.HikariDataSource
+        driverClassName: org.apache.hive.jdbc.HiveDriver
+        jdbcUrl: jdbc:hive2://127.0.0.1:2181/demo_ds_0;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2
+    ds_1:
+        dataSourceClassName: com.zaxxer.hikari.HikariDataSource
+        driverClassName: org.apache.hive.jdbc.HiveDriver
+        jdbcUrl: jdbc:hive2://127.0.0.1:2181/demo_ds_1;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2
+    ds_2:
+        dataSourceClassName: com.zaxxer.hikari.HikariDataSource
+        driverClassName: org.apache.hive.jdbc.HiveDriver
+        jdbcUrl: jdbc:hive2://127.0.0.1:2181/demo_ds_2;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2
+rules:
+- !SHARDING
+    tables:
+      t_order:
+        actualDataNodes:
+        keyGenerateStrategy:
+          column: order_id
+          keyGeneratorName: snowflake
+    defaultDatabaseStrategy:
+      standard:
+        shardingColumn: user_id
+        shardingAlgorithmName: inline
+    shardingAlgorithms:
+      inline:
+        type: INLINE
+        props:
+          algorithm-expression: ds_${user_id % 2}
+    keyGenerators:
+      snowflake:
+        type: SNOWFLAKE
+```
+
+此时可正常创建 ShardingSphere 的数据源并在虚拟数据源上执行逻辑 SQL，
+
+```java
+import com.zaxxer.hikari.HikariConfig;
+import com.zaxxer.hikari.HikariDataSource;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.sql.Statement;
+public class ExampleUtils {
+    void test() throws SQLException {
+        HikariConfig config = new HikariConfig();
+        config.setJdbcUrl("jdbc:shardingsphere:classpath:demo.yaml");
+        config.setDriverClassName("org.apache.shardingsphere.driver.ShardingSphereDriver");
+        try (HikariDataSource dataSource = new HikariDataSource(config);
+             Connection connection = dataSource.getConnection();
+             Statement statement = connection.createStatement()) {
+            statement.execute("INSERT INTO t_order (user_id, order_type, address_id, status) VALUES (1, 1, 1, 'INSERT_TEST')");
+            statement.executeQuery("SELECT * FROM t_order");
+            statement.execute("DELETE FROM t_order WHERE order_id=1");
+        }
+    }
+}
+```
+
+但一旦 ZooKeeper Server 中的`/hiveserver2`节点被更新，
+由于 ShardingSphere 的内部类会缓存包含旧 HiveServer 实例的信息的 `java.sql.Connection`，
+用户需要重新创建 ShardingSphere JDBC DataSource，
+或重新创建通过 `org.apache.shardingsphere.driver.ShardingSphereDriver` 创建的 JDBC DataSource。
diff --git a/...ntent/user-manual/shardingsphere-jdbc/optional-plugins/hiveserver2/_index.en.md b/...ntent/user-manual/shardingsphere-jdbc/optional-plugins/hiveserver2/_index.en.md
@@ -96,12 +96,6 @@ services:
       SERVICE_NAME: hiveserver2
     ports:
       - "10000:10000"
-    expose:
-      - 10002
-    volumes:
-      - warehouse:/opt/hive/data/warehouse
-volumes:
-  warehouse:
 ```
 
 ### Create business tables
@@ -115,7 +109,9 @@ sudo snap install dbeaver-ce
 snap run dbeaver-ce
 ```
 
-In DBeaver Community, connect to HiveServer2 using the `jdbcUrl` of `jdbc:hive2://localhost:10000/`, leaving `username` and `password` blank.
+In DBeaver Community, use the `jdbcUrl` of `jdbc:hive2://localhost:10000/` to connect to HiveServer2, 
+and leave `username` and `password` blank.
+Execute the following SQL,
 
 ```sql
 -- noinspection SqlNoDataSourceInspectionForFile
@@ -306,3 +302,151 @@ For more discussion, please visit https://cwiki.apache.org/confluence/display/Hi
 When users use DBeaver Community to connect to HiveServer2, they need to ensure that the DBeaver Community version is greater than or equal to `24.2.5`.
 
 See https://github.com/dbeaver/dbeaver/pull/35059.
+
+### Limitations of connecting to HiveServer2 with ZooKeeper Service Discovery
+
+Currently, the `jdbcUrl` configuration in the ShardingSphere configuration file does support connecting to HiveServer2 with ZooKeeper Service Discovery, 
+but there are limitations.
+
+For discussion, assume that there is the following Docker Compose file to start HiveServer2 with ZooKeeper Service Discovery.
+
+```yaml
+services:
+  zookeeper:
+    image: zookeeper:3.9.3-jre-17
+    ports:
+      - "2181:2181"
+  apache-hive-1:
+    image: apache/hive:4.0.1
+    depends_on:
+      - zookeeper
+    environment:
+      SERVICE_NAME: hiveserver2
+      SERVICE_OPTS: >-
+        -Dhive.server2.support.dynamic.service.discovery=true
+        -Dhive.zookeeper.quorum=zookeeper:2181
+        -Dhive.server2.thrift.bind.host=0.0.0.0
+        -Dhive.server2.thrift.port=10000
+    ports:
+      - "10000:10000"
+  apache-hive-2:
+    image: apache/hive:4.0.1
+    depends_on:
+      - zookeeper
+    environment:
+      SERVICE_NAME: hiveserver2
+      SERVICE_OPTS: >-
+        -Dhive.server2.support.dynamic.service.discovery=true
+        -Dhive.zookeeper.quorum=zookeeper:2181
+        -Dhive.server2.thrift.bind.host=0.0.0.0
+        -Dhive.server2.thrift.port=20000
+    ports:
+      - "20000:20000"
+```
+
+At this time, for the node `/hiveserver2/serverUri=0.0.0.0:10000;version=4.0.1;sequence=0000000000` in ZooKeeper Server,
+the value exists as `hive.server2.instance.uri=0.0.0.0:10000;hive.server2.authentication=NONE;hive.server2.transport.mode=binary;hive.server2.thrift.sasl.qop=auth;hive.server2.thrift.bind.host=0.0.0.0;hive.server2.thrift.port=10000;hive.server2.use.SSL=false`.
+For the node `/hiveserver2/serverUri=0.0.0.0:20000;version=4.0.1;sequence=0000000001` in ZooKeeper Server,
+there is a value of `hive.server2.instance.uri=0.0.0.0:20000;hive.server2.authentication=NONE;hive.server2.transport.mode=binary;hive.server2.thrift.sasl.qop=auth;hive.server2.thrift.bind.host=0.0.0.0;hive.server2.thrift.port=20000;hive.server2.use.SSL=false`.
+
+In DBeaver Community,
+use `jdbcUrl` of `jdbc:hive2://127.0.0.1:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2` to connect to HiveServer2,
+leave `username` and `password` blank.
+Execute the following SQL,
+
+```sql
+-- noinspection SqlNoDataSourceInspectionForFile
+CREATE DATABASE demo_ds_0;
+CREATE DATABASE demo_ds_1;
+CREATE DATABASE demo_ds_2;
+```
+
+Use `jdbcUrl` of `jdbc:hive2://127.0.0.1:2181/demo_ds_0;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2`, 
+`jdbc:hive2://127.0.0.1:2181/demo_ds_1;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2`
+and `jdbc:hive2://127.0.0.1:2181/demo_ds_2;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2`
+to connect to HiveServer2 and execute the following SQL,
+
+```sql
+-- noinspection SqlNoDataSourceInspectionForFile
+set iceberg.mr.schema.auto.conversion=true;
+
+CREATE TABLE IF NOT EXISTS t_order
+(
+    order_id   BIGINT,
+    order_type INT,
+    user_id    INT    NOT NULL,
+    address_id BIGINT NOT NULL,
+    status     VARCHAR(50),
+    PRIMARY KEY (order_id) disable novalidate
+) STORED BY ICEBERG STORED AS ORC TBLPROPERTIES ('format-version' = '2');
+
+TRUNCATE TABLE t_order;
+```
+
+After the business project introduces the dependencies involved in the `prerequisites`, 
+write the ShardingSphere data source configuration file `demo.yaml` on the classpath of the business project.
+
+```yaml
+dataSources:
+    ds_0:
+        dataSourceClassName: com.zaxxer.hikari.HikariDataSource
+        driverClassName: org.apache.hive.jdbc.HiveDriver
+        jdbcUrl: jdbc:hive2://127.0.0.1:2181/demo_ds_0;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2
+    ds_1:
+        dataSourceClassName: com.zaxxer.hikari.HikariDataSource
+        driverClassName: org.apache.hive.jdbc.HiveDriver
+        jdbcUrl: jdbc:hive2://127.0.0.1:2181/demo_ds_1;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2
+    ds_2:
+        dataSourceClassName: com.zaxxer.hikari.HikariDataSource
+        driverClassName: org.apache.hive.jdbc.HiveDriver
+        jdbcUrl: jdbc:hive2://127.0.0.1:2181/demo_ds_2;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2
+rules:
+- !SHARDING
+    tables:
+      t_order:
+        actualDataNodes:
+        keyGenerateStrategy:
+          column: order_id
+          keyGeneratorName: snowflake
+    defaultDatabaseStrategy:
+      standard:
+        shardingColumn: user_id
+        shardingAlgorithmName: inline
+    shardingAlgorithms:
+      inline:
+        type: INLINE
+        props:
+          algorithm-expression: ds_${user_id % 2}
+    keyGenerators:
+      snowflake:
+        type: SNOWFLAKE
+```
+
+At this point, you can create the ShardingSphere data source normally and execute logical SQL on the virtual data source.
+
+```java
+import com.zaxxer.hikari.HikariConfig;
+import com.zaxxer.hikari.HikariDataSource;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.sql.Statement;
+public class ExampleUtils {
+    void test() throws SQLException {
+        HikariConfig config = new HikariConfig();
+        config.setJdbcUrl("jdbc:shardingsphere:classpath:demo.yaml");
+        config.setDriverClassName("org.apache.shardingsphere.driver.ShardingSphereDriver");
+        try (HikariDataSource dataSource = new HikariDataSource(config);
+             Connection connection = dataSource.getConnection();
+             Statement statement = connection.createStatement()) {
+            statement.execute("INSERT INTO t_order (user_id, order_type, address_id, status) VALUES (1, 1, 1, 'INSERT_TEST')");
+            statement.executeQuery("SELECT * FROM t_order");
+            statement.execute("DELETE FROM t_order WHERE order_id=1");
+        }
+    }
+}
+```
+
+But once the `/hiveserver2` node in ZooKeeper Server is updated,
+since ShardingSphere's internal class will cache the `java.sql.Connection` containing the information of the old HiveServer instance,
+users need to recreate ShardingSphere JDBC DataSource,
+or recreate the JDBC DataSource created by `org.apache.shardingsphere.driver.ShardingSphereDriver`.
diff --git a/...nt/user-manual/shardingsphere-jdbc/optional-plugins/testcontainers/_index.cn.md b/...nt/user-manual/shardingsphere-jdbc/optional-plugins/testcontainers/_index.cn.md
@@ -24,6 +24,11 @@ ShardingSphere 默认情况下不提供对 `org.testcontainers.jdbc.ContainerDat
         <artifactId>shardingsphere-infra-database-testcontainers</artifactId>
         <version>${shardingsphere.version}</version>
     </dependency>
+    <dependency>
+        <groupId>org.postgresql</groupId>
+        <artifactId>postgresql</artifactId>
+        <version>42.7.2</version>
+    </dependency>
     <dependency>
         <groupId>org.testcontainers</groupId>
         <artifactId>postgresql</artifactId>

diff --git a/...nt/user-manual/shardingsphere-jdbc/optional-plugins/testcontainers/_index.en.md b/...nt/user-manual/shardingsphere-jdbc/optional-plugins/testcontainers/_index.en.md
@@ -24,6 +24,11 @@ the possible Maven dependencies are as follows,
         <artifactId>shardingsphere-infra-database-testcontainers</artifactId>
         <version>${shardingsphere.version}</version>
     </dependency>
+    <dependency>
+        <groupId>org.postgresql</groupId>
+        <artifactId>postgresql</artifactId>
+        <version>42.7.2</version>
+    </dependency>
     <dependency>
         <groupId>org.testcontainers</groupId>
         <artifactId>postgresql</artifactId>