From a98288718458011ecc192fccc28f8756e8374127 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Fri, 31 May 2024 11:50:10 -0500 Subject: [PATCH] set dynamic allocation for parallel jobs --- docker-compose.yaml | 1 + scripts/notebook_entrypoint.sh | 31 ++++++++++++++++++++++--------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index ff55fcc..e82408f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -103,5 +103,6 @@ services: - MINIO_ACCESS_KEY=minio - MINIO_SECRET_KEY=minio123 - SPARK_MODE=notebook + - MAX_EXECUTORS=2 volumes: - ./cdr/cdm/jupyter:/cdm_shared_workspace \ No newline at end of file diff --git a/scripts/notebook_entrypoint.sh b/scripts/notebook_entrypoint.sh index 277aeba..920ccb2 100644 --- a/scripts/notebook_entrypoint.sh +++ b/scripts/notebook_entrypoint.sh @@ -2,23 +2,36 @@ echo "starting jupyter notebook" -if [ -n "$SPARK_DRIVER_HOST" ]; then - echo "Setting spark.driver.host to $SPARK_DRIVER_HOST" - source /opt/bitnami/scripts/spark-env.sh - if [ -z "$SPARK_CONF_FILE" ]; then - echo "Error: unable to find SPARK_CONF_FILE path" - exit 1 - fi - echo "spark.driver.host $SPARK_DRIVER_HOST" >> $SPARK_CONF_FILE +source /opt/bitnami/scripts/spark-env.sh +if [ -z "$SPARK_CONF_FILE" ]; then + echo "Error: unable to find SPARK_CONF_FILE path" + exit 1 fi +# Set Spark configurations +{ + # Set dynamic allocation configurations to allow parallel job executions + if [ -z "$MAX_EXECUTORS" ]; then + # If MAX_EXECUTORS is not set, default to 5. Adjust as needed. + MAX_EXECUTORS=5 + fi + echo "spark.dynamicAllocation.enabled true" + echo "spark.dynamicAllocation.minExecutors 1" + echo "spark.dynamicAllocation.maxExecutors $MAX_EXECUTORS" + + # Set spark.driver.host if SPARK_DRIVER_HOST is set + if [ -n "$SPARK_DRIVER_HOST" ]; then + echo "spark.driver.host $SPARK_DRIVER_HOST" + fi +} >> "$SPARK_CONF_FILE" + WORKSPACE_DIR="/cdm_shared_workspace" mkdir -p "$WORKSPACE_DIR" cd "$WORKSPACE_DIR" # Start Jupyter Lab jupyter lab --ip=0.0.0.0 \ - --port=$NOTEBOOK_PORT \ + --port="$NOTEBOOK_PORT" \ --no-browser \ --allow-root \ --notebook-dir="$WORKSPACE_DIR" \