Skip to content

Commit

Permalink
test(batch): support hive catalog for iceberg source (#15550)
Browse files Browse the repository at this point in the history
  • Loading branch information
chenzl25 authored Mar 8, 2024
1 parent 32163b3 commit 3276e58
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 1 deletion.
18 changes: 18 additions & 0 deletions integration_tests/iceberg-source/docker/hive/config.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[risingwave]
db=dev
user=root
host=127.0.0.1
port=4566

[source]
connector = iceberg
catalog.type = hive
catalog.uri = thrift://metastore:9083
warehouse.path = s3://icebergdata/demo
s3.endpoint=http://minio-0:9301
s3.access.key = hummockadmin
s3.secret.key = hummockadmin
s3.region = ap-southeast-1
catalog.name = demo
database.name=s1
table.name=t1
116 changes: 116 additions & 0 deletions integration_tests/iceberg-source/docker/hive/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
version: '3.8'

services:
postgres:
image: postgres:16.1
environment:
POSTGRES_USER: admin
POSTGRES_PASSWORD: 123456
POSTGRES_DB: metastore_db
expose:
- 5432
ports:
- "5432:5432"
networks:
iceberg_net:
spark:
depends_on:
- minio-0
- metastore
image: ghcr.io/icelake-io/icelake-spark:0.1
environment:
- AWS_ACCESS_KEY_ID=hummockadmin
- AWS_SECRET_ACCESS_KEY=hummockadmin
- AWS_REGION=us-east-1
- SPARK_HOME=/opt/spark
- PYSPARK_PYTHON=/usr/bin/python3.9
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin:/opt/spark/sbin
user: root
networks:
iceberg_net:
links:
- minio-0:icebergdata.minio-0
expose:
- 15002
healthcheck:
test: netstat -ltn | grep -c 15002
interval: 1s
retries: 1200
volumes:
- ./spark-script:/spark-script
entrypoint: [ "/spark-script/spark-connect-server.sh" ]

risingwave-standalone:
extends:
file: ../../../../docker/docker-compose.yml
service: risingwave-standalone
healthcheck:
test:
- CMD-SHELL
- bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/4566; exit $$?;'
interval: 1s
timeout: 30s
environment:
- AWS_REGION=us-east-1
links:
- minio-0:icebergdata.minio-0
networks:
iceberg_net:

minio-0:
extends:
file: ../../../../docker/docker-compose.yml
service: minio-0
entrypoint: "
/bin/sh -c '
set -e
mkdir -p \"/data/icebergdata/demo\"
mkdir -p \"/data/hummock001\"
/usr/bin/docker-entrypoint.sh \"$$0\" \"$$@\"
'"
networks:
iceberg_net:

etcd-0:
extends:
file: ../../../../docker/docker-compose.yml
service: etcd-0
networks:
iceberg_net:

metastore:
image: naushadh/hive-metastore
depends_on:
- postgres
environment:
- DATABASE_HOST=postgres
- DATABASE_DB=metastore_db
- DATABASE_USER=admin
- DATABASE_PASSWORD=123456
- AWS_ACCESS_KEY_ID=hummockadmin
- AWS_SECRET_ACCESS_KEY=hummockadmin
- S3_ENDPOINT_URL=http://minio-0:9301
- S3_BUCKET=icebergdata
- S3_PREFIX=demo
ports:
- "9083:9083"
expose:
- 9083
networks:
iceberg_net:

volumes:
risingwave-standalone:
external: false
etcd-0:
external: false
minio-0:
external: false

networks:
iceberg_net:
name: iceberg
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash

set -ex

JARS=$(find /opt/spark/deps -type f -name "*.jar" | tr '\n' ':')

/opt/spark/sbin/start-connect-server.sh \
--master local[3] \
--driver-class-path $JARS \
--conf spark.driver.bindAddress=0.0.0.0 \
--conf spark.sql.catalog.demo=org.apache.iceberg.spark.SparkCatalog \
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
--conf spark.sql.catalog.demo.catalog-impl=org.apache.iceberg.hive.HiveCatalog \
--conf spark.sql.catalog.demo.uri=thrift://metastore:9083 \
--conf spark.sql.catalog.demo.clients=10 \
--conf spark.sql.catalog.demo.warehouse=s3a://icebergdata/demo \
--conf spark.sql.catalog.demo.hadoop.fs.s3a.endpoint=http://minio-0:9301 \
--conf spark.sql.catalog.demo.hadoop.fs.s3a.path.style.access=true \
--conf spark.sql.catalog.demo.hadoop.fs.s3a.access.key=hummockadmin \
--conf spark.sql.catalog.demo.hadoop.fs.s3a.secret.key=hummockadmin \
--conf spark.sql.defaultCatalog=demo

tail -f /opt/spark/logs/spark*.out
2 changes: 1 addition & 1 deletion integration_tests/iceberg-source/python/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def run_case(case):


if __name__ == "__main__":
case_names = ["rest", "storage"]
case_names = ["hive", "rest", "storage"]
for case_name in case_names:
print(f"Running test case: {case_name}")
run_case(case_name)

0 comments on commit 3276e58

Please sign in to comment.