Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docker integ test with async API #1003

Merged
merged 11 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions docker/integ-test/.env
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ MASTER_UI_PORT=8080
MASTER_PORT=7077
UI_PORT=4040
SPARK_CONNECT_PORT=15002
PPL_JAR=../../ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
FLINT_JAR=../../flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
PPL_JAR=./ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
FLINT_JAR=./flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
SQL_APP_JAR=./spark-sql-application/target/scala-2.12/sql-job-assembly-0.7.0-SNAPSHOT.jar
OPENSEARCH_NODE_MEMORY=512m
OPENSEARCH_ADMIN_PASSWORD=C0rrecthorsebatterystaple.
OPENSEARCH_PORT=9200
OPENSEARCH_PA_PORT=9600
OPENSEARCH_DASHBOARDS_PORT=5601
S3_ACCESS_KEY=Vt7jnvi5BICr1rkfsheT
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it for minio? it could be anything string?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is for minio container and is only valid for the minio container.

S3_SECRET_KEY=5NK3StGvoGCLUWvbaGN0LBUf9N6sjE94PEzLdqwO
73 changes: 73 additions & 0 deletions docker/integ-test/configuration-updater/apply-configuration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/bin/sh

# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0

# Login to Minio
curl -q \
-c /tmp/minio-cookies.txt \
-H 'Content-Type: application/json' \
-d '{"accessKey": "minioadmin", "secretKey": "minioadmin"}' \
http://minio-S3:9001/api/v1/login
# Delete the test bucket
curl -b /tmp/minio-cookies.txt \
-X DELETE \
http://minio-S3:9001/api/v1/buckets/test
# Create the integ-test bucket
curl -q \
-b /tmp/minio-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-d '{"name": "integ-test", "versioning": {"enabled": true, "excludePrefixes": [], "excludeFolders": false}, "locking": true}' \
http://minio-S3:9001/api/v1/buckets
# Create the access key
curl -q \
-b /tmp/minio-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-d "{\"policy\": \"\", \"accessKey\": \"${S3_ACCESS_KEY}\", \"secretKey\": \"${S3_SECRET_KEY}\", \"description\": \"\", \"comment\": \"\", \"name\": \"\", \"expiry\": null}" \
http://minio-S3:9001/api/v1/service-account-credentials

# Login to OpenSearch Dashboards
echo ">>> Login to OpenSearch dashboards"
curl -q \
-c /tmp/opensearch-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-H 'Osd-Version: 2.18.0' \
-H 'Osd-Xsrf: fetch' \
-d "{\"username\": \"admin\", \"password\": \"${OPENSEARCH_ADMIN_PASSWORD}\"}" \
'http://opensearch-dashboards:5601/auth/login?dataSourceId='
if [ "$?" -eq "0" ]; then
echo " >>> Login successful"
else
echo " >>> Login failed"
fi
# Create the S3/Glue datasource
echo ">>> Creating datasource"
curl -q \
-b /tmp/opensearch-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-H 'Osd-Version: 2.18.0' \
-H 'Osd-Xsrf: fetch' \
-d "{\"name\": \"mys3\", \"allowedRoles\": [], \"connector\": \"s3glue\", \"properties\": {\"glue.auth.type\": \"iam_role\", \"glue.auth.role_arn\": \"arn:aws:iam::123456789012:role/S3Access\", \"glue.indexstore.opensearch.uri\": \"http://opensearch:9200\", \"glue.indexstore.opensearch.auth\": \"basicauth\", \"glue.indexstore.opensearch.auth.username\": \"admin\", \"glue.indexstore.opensearch.auth.password\": \"${OPENSEARCH_ADMIN_PASSWORD}\"}}" \
http://opensearch-dashboards:5601/api/directquery/dataconnections
if [ "$?" -eq "0" ]; then
echo " >>> S3 datasource created"
else
echo " >>> Failed to create S3 datasource"
fi

echo ">>> Setting cluster settings"
curl -v \
-u "admin:${OPENSEARCH_ADMIN_PASSWORD}" \
-X PUT \
-H 'Content-Type: application/json' \
-d '{"persistent": {"plugins.query.executionengine.spark.config": "{\"applicationId\":\"integ-test\",\"executionRoleARN\":\"arn:aws:iam::xxxxx:role/emr-job-execution-role\",\"region\":\"us-west-2\", \"sparkSubmitParameters\": \"--conf spark.dynamicAllocation.enabled=false\"}"}}' \
http://opensearch:9200/_cluster/settings
if [ "$?" -eq "0" ]; then
echo " >>> Successfully set cluster settings"
else
echo " >>> Failed to set cluster settings"
fi
139 changes: 109 additions & 30 deletions docker/integ-test/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,35 @@
services:
metastore:
build: ./metastore
container_name: metastore
ports:
- "${THRIFT_PORT:-9083}:9083"
volumes:
- type: bind
source: ./metastore/hive-site.xml
target: /opt/apache-hive-2.3.9-bin/conf/hive-site.xml
- type: bind
source: ./metastore/hive-log4j2.properties
target: /opt/apache-hive-2.3.9-bin/conf/hive-log4j2.properties
- type: volume
source: metastore-data
target: /data
networks:
- opensearch-net

spark:
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
build:
context: ./spark
dockerfile: Dockerfile
args:
SPARK_VERSION: ${SPARK_VERSION:-3.5.3}
container_name: spark
entrypoint: /opt/bitnami/scripts/spark/spark-master-entrypoint.sh
ports:
- "${MASTER_UI_PORT:-8080}:8080"
- "${MASTER_PORT:-7077}:7077"
- "${UI_PORT:-4040}:4040"
- "${SPARK_CONNECT_PORT}:15002"
entrypoint: /opt/bitnami/scripts/spark/master-entrypoint.sh
environment:
- SPARK_MODE=master
- SPARK_RPC_AUTHENTICATION_ENABLED=no
Expand All @@ -17,19 +39,10 @@ services:
- SPARK_PUBLIC_DNS=localhost
volumes:
- type: bind
source: ./spark-master-entrypoint.sh
target: /opt/bitnami/scripts/spark/master-entrypoint.sh
- type: bind
source: ./spark-defaults.conf
target: /opt/bitnami/spark/conf/spark-defaults.conf
- type: bind
source: ./log4j2.properties
target: /opt/bitnami/spark/conf/log4j2.properties
- type: bind
source: $PPL_JAR
source: ../../$PPL_JAR
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
- type: bind
source: $FLINT_JAR
source: ../../$FLINT_JAR
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/"]
Expand All @@ -40,9 +53,22 @@ services:
start_interval: 5s
networks:
- opensearch-net
depends_on:
metastore:
condition: service_started
opensearch:
condition: service_healthy
opensearch-dashboards:
condition: service_healthy
configuration-updater:
condition: service_completed_successfully

spark-worker:
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
build:
context: ./spark
dockerfile: Dockerfile
args:
SPARK_VERSION: ${SPARK_VERSION:-3.5.3}
container_name: spark-worker
environment:
- SPARK_MODE=worker
Expand All @@ -56,32 +82,43 @@ services:
- SPARK_PUBLIC_DNS=localhost
volumes:
- type: bind
source: ./spark-defaults.conf
target: /opt/bitnami/spark/conf/spark-defaults.conf
- type: bind
source: ./log4j2.properties
target: /opt/bitnami/spark/conf/log4j2.properties
- type: bind
source: $PPL_JAR
source: ../../$PPL_JAR
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
- type: bind
source: $FLINT_JAR
source: ../../$FLINT_JAR
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
networks:
- opensearch-net
depends_on:
- spark
metastore:
condition: service_started
spark:
condition: service_healthy

spark-submit:
build:
context: ../../
dockerfile: docker/integ-test/spark-submit/Dockerfile
args:
FLINT_JAR: ${FLINT_JAR}
PPL_JAR: ${PPL_JAR}
SQL_APP_JAR: ${SQL_APP_JAR}
depends_on:
metastore:
condition: service_completed_successfully

opensearch:
image: opensearchproject/opensearch:${OPENSEARCH_VERSION:-latest}
build: ./opensearch
container_name: opensearch
environment:
- cluster.name=opensearch-cluster
- node.name=opensearch
- discovery.seed_hosts=opensearch
- cluster.initial_cluster_manager_nodes=opensearch
- discovery.type=single-node
- bootstrap.memory_lock=true
- plugins.security.system_indices.enabled=false
- plugins.security.system_indices.permission.enabled=false
- plugins.security.ssl.http.enabled=false
- plugins.query.datasources.encryption.masterkey=9a515c99d4313f140a6607053502f4d6
- OPENSEARCH_JAVA_OPTS=-Xms${OPENSEARCH_NODE_MEMORY:-512m} -Xmx${OPENSEARCH_NODE_MEMORY:-512m}
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
ulimits:
Expand All @@ -92,12 +129,18 @@ services:
soft: 65536
hard: 65536
volumes:
- opensearch-data:/usr/share/opensearch/data
- type: volume
source: opensearch-data
target: /usr/share/opensearch/data
- type: bind
source: /var/run/docker.sock
target: /var/run/docker.sock
ports:
- ${OPENSEARCH_PORT:-9200}:9200
- 9600:9600
- ${OPENSEARCH_PA_PORT:-9600}:9600
expose:
- "${OPENSEARCH_PORT:-9200}"
- "9300"
healthcheck:
test: ["CMD", "curl", "-f", "-u", "admin:${OPENSEARCH_ADMIN_PASSWORD}", "http://localhost:9200/_cluster/health"]
interval: 1m
Expand All @@ -107,6 +150,9 @@ services:
start_interval: 5s
networks:
- opensearch-net
depends_on:
minio:
condition: service_healthy

opensearch-dashboards:
image: opensearchproject/opensearch-dashboards:${DASHBOARDS_VERSION}
Expand All @@ -119,8 +165,16 @@ services:
OPENSEARCH_HOSTS: '["http://opensearch:9200"]'
networks:
- opensearch-net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5601/"]
interval: 1m
timeout: 5s
retries: 3
start_period: 30s
start_interval: 5s
depends_on:
- opensearch
opensearch:
condition: service_healthy

minio:
image: minio/minio
Expand All @@ -132,12 +186,37 @@ services:
- "9001:9001"
volumes:
- minio-data:/data
healthcheck:
test: ["CMD", "curl", "-q", "-f", "http://localhost:9000/minio/health/live"]
interval: 1m
timeout: 5s
retries: 3
start_period: 30s
start_interval: 5s
networks:
- opensearch-net

configuration-updater:
image: alpine/curl:latest
entrypoint: /bin/sh
command: /apply-configuration.sh
environment:
- S3_ACCESS_KEY=${S3_ACCESS_KEY}
- S3_SECRET_KEY=${S3_SECRET_KEY}
- OPENSEARCH_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
volumes:
- type: bind
source: configuration-updater/apply-configuration.sh
target: /apply-configuration.sh
depends_on:
opensearch-dashboards:
condition: service_healthy
networks:
- opensearch-net

volumes:
metastore-data:
opensearch-data:
minio-data:

networks:
opensearch-net:
29 changes: 29 additions & 0 deletions docker/integ-test/metastore/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0

FROM openjdk:21-jdk-bookworm

WORKDIR /opt

ENV HADOOP_HOME=/opt/hadoop-3.3.4
ENV HIVE_HOME=/opt/apache-hive-2.3.9-bin

#RUN apt-get update
RUN curl -L https://archive.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz | tar zxf -
RUN curl -L https://archive.apache.org/dist/hadoop/common/hadoop-3.3.4/hadoop-3.3.4.tar.gz | tar zxf -
RUN cp $HADOOP_HOME/share/hadoop/client/hadoop-client-api-3.3.4.jar $HIVE_HOME/lib/
RUN cp $HADOOP_HOME/share/hadoop/client/hadoop-client-runtime-3.3.4.jar $HIVE_HOME/lib/
RUN cp $HADOOP_HOME/share/hadoop/tools/lib/hadoop-aws-3.3.4.jar $HIVE_HOME/lib/
RUN cp $HADOOP_HOME/share/hadoop/tools/lib/aws-java-sdk-bundle-1.12.262.jar $HIVE_HOME/lib/

RUN groupadd -f -r hive --gid=1000
RUN useradd -r -g hive --uid=1000 -d ${HIVE_HOME} hive
RUN chown hive:hive -R ${HIVE_HOME}

RUN mkdir /data
RUN chown hive:hive /data

WORKDIR $HIVE_HOME
EXPOSE 9083
ENTRYPOINT ["/opt/apache-hive-2.3.9-bin/bin/hive", "--service", "metastore"]
USER hive
Loading
Loading