From 3276e588dea62ddf9007b649157367b88f82f11e Mon Sep 17 00:00:00 2001 From: Dylan Date: Fri, 8 Mar 2024 18:22:13 +0800 Subject: [PATCH] test(batch): support hive catalog for iceberg source (#15550) --- .../iceberg-source/docker/hive/config.ini | 18 +++ .../docker/hive/docker-compose.yml | 116 ++++++++++++++++++ .../hive/spark-script/spark-connect-server.sh | 23 ++++ .../iceberg-source/python/main.py | 2 +- 4 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 integration_tests/iceberg-source/docker/hive/config.ini create mode 100644 integration_tests/iceberg-source/docker/hive/docker-compose.yml create mode 100755 integration_tests/iceberg-source/docker/hive/spark-script/spark-connect-server.sh diff --git a/integration_tests/iceberg-source/docker/hive/config.ini b/integration_tests/iceberg-source/docker/hive/config.ini new file mode 100644 index 000000000000..df07c7525825 --- /dev/null +++ b/integration_tests/iceberg-source/docker/hive/config.ini @@ -0,0 +1,18 @@ +[risingwave] +db=dev +user=root +host=127.0.0.1 +port=4566 + +[source] +connector = iceberg +catalog.type = hive +catalog.uri = thrift://metastore:9083 +warehouse.path = s3://icebergdata/demo +s3.endpoint=http://minio-0:9301 +s3.access.key = hummockadmin +s3.secret.key = hummockadmin +s3.region = ap-southeast-1 +catalog.name = demo +database.name=s1 +table.name=t1 \ No newline at end of file diff --git a/integration_tests/iceberg-source/docker/hive/docker-compose.yml b/integration_tests/iceberg-source/docker/hive/docker-compose.yml new file mode 100644 index 000000000000..3314083c1077 --- /dev/null +++ b/integration_tests/iceberg-source/docker/hive/docker-compose.yml @@ -0,0 +1,116 @@ +version: '3.8' + +services: + postgres: + image: postgres:16.1 + environment: + POSTGRES_USER: admin + POSTGRES_PASSWORD: 123456 + POSTGRES_DB: metastore_db + expose: + - 5432 + ports: + - "5432:5432" + networks: + iceberg_net: + spark: + depends_on: + - minio-0 + - metastore + image: ghcr.io/icelake-io/icelake-spark:0.1 + environment: + - AWS_ACCESS_KEY_ID=hummockadmin + - AWS_SECRET_ACCESS_KEY=hummockadmin + - AWS_REGION=us-east-1 + - SPARK_HOME=/opt/spark + - PYSPARK_PYTHON=/usr/bin/python3.9 + - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin:/opt/spark/sbin + user: root + networks: + iceberg_net: + links: + - minio-0:icebergdata.minio-0 + expose: + - 15002 + healthcheck: + test: netstat -ltn | grep -c 15002 + interval: 1s + retries: 1200 + volumes: + - ./spark-script:/spark-script + entrypoint: [ "/spark-script/spark-connect-server.sh" ] + + risingwave-standalone: + extends: + file: ../../../../docker/docker-compose.yml + service: risingwave-standalone + healthcheck: + test: + - CMD-SHELL + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/4566; exit $$?;' + interval: 1s + timeout: 30s + environment: + - AWS_REGION=us-east-1 + links: + - minio-0:icebergdata.minio-0 + networks: + iceberg_net: + + minio-0: + extends: + file: ../../../../docker/docker-compose.yml + service: minio-0 + entrypoint: " + /bin/sh -c ' + + set -e + + mkdir -p \"/data/icebergdata/demo\" + mkdir -p \"/data/hummock001\" + + /usr/bin/docker-entrypoint.sh \"$$0\" \"$$@\" + + '" + networks: + iceberg_net: + + etcd-0: + extends: + file: ../../../../docker/docker-compose.yml + service: etcd-0 + networks: + iceberg_net: + + metastore: + image: naushadh/hive-metastore + depends_on: + - postgres + environment: + - DATABASE_HOST=postgres + - DATABASE_DB=metastore_db + - DATABASE_USER=admin + - DATABASE_PASSWORD=123456 + - AWS_ACCESS_KEY_ID=hummockadmin + - AWS_SECRET_ACCESS_KEY=hummockadmin + - S3_ENDPOINT_URL=http://minio-0:9301 + - S3_BUCKET=icebergdata + - S3_PREFIX=demo + ports: + - "9083:9083" + expose: + - 9083 + networks: + iceberg_net: + +volumes: + risingwave-standalone: + external: false + etcd-0: + external: false + minio-0: + external: false + +networks: + iceberg_net: + name: iceberg \ No newline at end of file diff --git a/integration_tests/iceberg-source/docker/hive/spark-script/spark-connect-server.sh b/integration_tests/iceberg-source/docker/hive/spark-script/spark-connect-server.sh new file mode 100755 index 000000000000..210a0663bea6 --- /dev/null +++ b/integration_tests/iceberg-source/docker/hive/spark-script/spark-connect-server.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -ex + +JARS=$(find /opt/spark/deps -type f -name "*.jar" | tr '\n' ':') + +/opt/spark/sbin/start-connect-server.sh \ + --master local[3] \ + --driver-class-path $JARS \ + --conf spark.driver.bindAddress=0.0.0.0 \ + --conf spark.sql.catalog.demo=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.demo.catalog-impl=org.apache.iceberg.hive.HiveCatalog \ + --conf spark.sql.catalog.demo.uri=thrift://metastore:9083 \ + --conf spark.sql.catalog.demo.clients=10 \ + --conf spark.sql.catalog.demo.warehouse=s3a://icebergdata/demo \ + --conf spark.sql.catalog.demo.hadoop.fs.s3a.endpoint=http://minio-0:9301 \ + --conf spark.sql.catalog.demo.hadoop.fs.s3a.path.style.access=true \ + --conf spark.sql.catalog.demo.hadoop.fs.s3a.access.key=hummockadmin \ + --conf spark.sql.catalog.demo.hadoop.fs.s3a.secret.key=hummockadmin \ + --conf spark.sql.defaultCatalog=demo + +tail -f /opt/spark/logs/spark*.out \ No newline at end of file diff --git a/integration_tests/iceberg-source/python/main.py b/integration_tests/iceberg-source/python/main.py index f4cd77653908..bca0c828df1d 100644 --- a/integration_tests/iceberg-source/python/main.py +++ b/integration_tests/iceberg-source/python/main.py @@ -113,7 +113,7 @@ def run_case(case): if __name__ == "__main__": - case_names = ["rest", "storage"] + case_names = ["hive", "rest", "storage"] for case_name in case_names: print(f"Running test case: {case_name}") run_case(case_name)