From 4c35a6ce55ba276ad289d5222b551ae1e187c699 Mon Sep 17 00:00:00 2001 From: Norman Jordan Date: Tue, 14 Jan 2025 13:04:07 -0800 Subject: [PATCH] Added Spark connect to the Spark master container Signed-off-by: Norman Jordan --- docker/integ-test/docker-compose.yml | 1 + .../DockerEMRServerlessClient.java | 4 +++- docker/integ-test/spark/Dockerfile | 1 + .../integ-test/spark/spark-master-entrypoint.sh | 17 +++++++++++++++++ 4 files changed, 22 insertions(+), 1 deletion(-) create mode 100755 docker/integ-test/spark/spark-master-entrypoint.sh diff --git a/docker/integ-test/docker-compose.yml b/docker/integ-test/docker-compose.yml index 4cf2bbc56..8ff92c94a 100644 --- a/docker/integ-test/docker-compose.yml +++ b/docker/integ-test/docker-compose.yml @@ -24,6 +24,7 @@ services: args: SPARK_VERSION: ${SPARK_VERSION:-3.5.3} container_name: spark + entrypoint: /opt/bitnami/scripts/spark/spark-master-entrypoint.sh ports: - "${MASTER_UI_PORT:-8080}:8080" - "${MASTER_PORT:-7077}:7077" diff --git a/docker/integ-test/opensearch/emr-src/org/opensearch/spark/emrserverless/DockerEMRServerlessClient.java b/docker/integ-test/opensearch/emr-src/org/opensearch/spark/emrserverless/DockerEMRServerlessClient.java index a54cdf9c3..88cc448a9 100644 --- a/docker/integ-test/opensearch/emr-src/org/opensearch/spark/emrserverless/DockerEMRServerlessClient.java +++ b/docker/integ-test/opensearch/emr-src/org/opensearch/spark/emrserverless/DockerEMRServerlessClient.java @@ -131,8 +131,10 @@ public StartJobRunResult startJobRun(final StartJobRunRequest startJobRunRequest runContainerCmd.add("/opt/bitnami/spark/bin/spark-submit"); runContainerCmd.add("--deploy-mode"); runContainerCmd.add("client"); + runContainerCmd.add("--exclude-packages"); + runContainerCmd.add("org.opensearch:opensearch-spark-standalone_2.12,org.opensearch:opensearch-spark-sql-application_2.12,org.opensearch:opensearch-spark-ppl_2.12"); runContainerCmd.add("--master"); - runContainerCmd.add("spark://spark:7077"); + runContainerCmd.add("local[2]"); runContainerCmd.addAll(Arrays.asList(sparkSubmitParameters.split(" "))); runContainerCmd.addAll(entryPointArguments); diff --git a/docker/integ-test/spark/Dockerfile b/docker/integ-test/spark/Dockerfile index 479af884a..f180320a6 100644 --- a/docker/integ-test/spark/Dockerfile +++ b/docker/integ-test/spark/Dockerfile @@ -11,3 +11,4 @@ RUN apt install -y curl USER 1001 COPY ./spark-defaults.conf /opt/bitnami/spark/conf/spark-defaults.conf COPY ./log4j2.properties /opt/bitnami/spark/conf/log4j2.properties +COPY ./spark-master-entrypoint.sh /opt/bitnami/scripts/spark/spark-master-entrypoint.sh diff --git a/docker/integ-test/spark/spark-master-entrypoint.sh b/docker/integ-test/spark/spark-master-entrypoint.sh new file mode 100755 index 000000000..a21c20643 --- /dev/null +++ b/docker/integ-test/spark/spark-master-entrypoint.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +function start_spark_connect() { + sc_version=$(ls -1 /opt/bitnami/spark/jars/spark-core_*.jar | sed -e 's/^.*\/spark-core_//' -e 's/\.jar$//' -e 's/-/:/') + + attempt=1 + while [ -e "/tmp/spark_master_running" -a "$attempt" -le 10 ]; do + sleep 1 + /opt/bitnami/spark/sbin/start-connect-server.sh --master spark://spark:7077 --packages org.apache.spark:spark-connect_${sc_version} + attempt=$(($attempt+1)) + done +} + +touch /tmp/spark_master_running +start_spark_connect & +/opt/bitnami/scripts/spark/entrypoint.sh /opt/bitnami/scripts/spark/run.sh +rm /tmp/spark_master_running