From a279e6f0b2f5dfdb5a0c81bd225ce25c8d3780e1 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Thu, 29 Aug 2024 10:35:10 -0500 Subject: [PATCH 1/2] using ghcr.io/kbase img for standalone spark deployment --- docker-compose.yaml | 20 +++++++++----------- scripts/entrypoint.sh | 9 +-------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index a1e7532..46ae133 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -7,6 +7,8 @@ services: yarn-resourcemanager: image: ghcr.io/kbase/cdm-prototype-yarn:pr-8 container_name: yarn-resourcemanager + # Images from the ghcr.io/kbase registry are exclusively available for Linux/AMD64 platforms. + platform: linux/amd64 ports: - 8088:8088 # web ui environment: @@ -18,6 +20,7 @@ services: yarn-nodemanager: image: ghcr.io/kbase/cdm-prototype-yarn:pr-8 container_name: yarn-nodemanager + platform: linux/amd64 ports: - 8042:8042 # web ui environment: @@ -28,10 +31,9 @@ services: - MINIO_SECRET_KEY=yarnpass spark-master: - build: - context: . - dockerfile: Dockerfile + image: ghcr.io/kbase/cdm-spark-standalone:pr-1 container_name: spark-master + platform: linux/amd64 ports: - "8090:8090" environment: @@ -46,10 +48,9 @@ services: - ./cdr/cdm/jupyter:/cdm_shared_workspace spark-worker-1: - build: - context: . - dockerfile: Dockerfile + image: ghcr.io/kbase/cdm-spark-standalone:pr-1 container_name: spark-worker-1 + platform: linux/amd64 depends_on: - spark-master ports: @@ -68,10 +69,9 @@ services: - ./cdr/cdm/jupyter:/cdm_shared_workspace spark-worker-2: - build: - context: . - dockerfile: Dockerfile + image: ghcr.io/kbase/cdm-spark-standalone:pr-1 container_name: spark-worker-2 + platform: linux/amd64 depends_on: - spark-master ports: @@ -139,7 +139,6 @@ services: - MINIO_ACCESS_KEY=minio-readwrite - MINIO_SECRET_KEY=minio123 - S3_YARN_BUCKET=yarn - - SPARK_MODE=notebook - MAX_EXECUTORS=4 - POSTGRES_USER=hive - POSTGRES_PASSWORD=hivepassword @@ -168,7 +167,6 @@ services: - MINIO_ACCESS_KEY=minio-readonly - MINIO_SECRET_KEY=minio123 - S3_YARN_BUCKET=yarn - - SPARK_MODE=notebook - MAX_EXECUTORS=4 # TODO: create postgres user w/ only write access to the hive tables - POSTGRES_USER=hive diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 0a8eb0f..277bcf1 100644 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -2,11 +2,4 @@ . /opt/scripts/setup.sh -if [ "$SPARK_MODE" = "notebook" ]; then - exec /opt/scripts/notebook_entrypoint.sh "$@" -else - # In bitnami/spark Dockerfile, the entrypoint is set to /opt/bitnami/scripts/spark/entrypoint.sh and followed - # by CMD ["/opt/bitnami/scripts/spark/run.sh"] meaning that the entrypoint is expected the run.sh script as an argument. - # reference: https://github.com/bitnami/containers/blob/main/bitnami/spark/3.5/debian-12/Dockerfile#L69 - exec /opt/bitnami/scripts/spark/entrypoint.sh "$@" /opt/bitnami/scripts/spark/run.sh -fi \ No newline at end of file +exec /opt/scripts/notebook_entrypoint.sh "$@" \ No newline at end of file From d3482c2e96094cb54ecd68b7307c0e124069bb08 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Thu, 29 Aug 2024 11:56:36 -0500 Subject: [PATCH 2/2] using image from jupyterhub --- docker-compose.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 46ae133..8ec8ee1 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -31,7 +31,8 @@ services: - MINIO_SECRET_KEY=yarnpass spark-master: - image: ghcr.io/kbase/cdm-spark-standalone:pr-1 + # The latest image from cdm-jupyterhub that includes spark standalone mode + image: ghcr.io/kbase/cdm-jupyterhub:pr-74 container_name: spark-master platform: linux/amd64 ports: @@ -48,7 +49,8 @@ services: - ./cdr/cdm/jupyter:/cdm_shared_workspace spark-worker-1: - image: ghcr.io/kbase/cdm-spark-standalone:pr-1 + # The latest image from cdm-jupyterhub that includes spark standalone mode + image: ghcr.io/kbase/cdm-jupyterhub:pr-74 container_name: spark-worker-1 platform: linux/amd64 depends_on: @@ -69,7 +71,8 @@ services: - ./cdr/cdm/jupyter:/cdm_shared_workspace spark-worker-2: - image: ghcr.io/kbase/cdm-spark-standalone:pr-1 + # The latest image from cdm-jupyterhub that includes spark standalone mode + image: ghcr.io/kbase/cdm-jupyterhub:pr-74 container_name: spark-worker-2 platform: linux/amd64 depends_on: