generated from kbase/kbase-template
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4b18ba4
commit 1e98bb4
Showing
4 changed files
with
147 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,22 @@ | ||
FROM bitnami/spark:3.5.1 | ||
|
||
RUN export ORI_USER=$(id -u) | ||
# Switch to root to install packages | ||
USER root | ||
|
||
ENTRYPOINT ["sleep 10s"] | ||
# Install necessary packages | ||
RUN apt-get update && apt-get install -y \ | ||
# Fixing error: psutil could not be installed from sources because gcc is not installed | ||
gcc \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Install Jupyterlab and other python dependencies | ||
RUN pip install jupyterlab==4.2.0 pyspark==3.5.1 | ||
|
||
COPY scripts/entrypoint.sh /opt/ | ||
RUN chmod a+x /opt/entrypoint.sh | ||
|
||
# Switch back to the original user | ||
USER ${ORI_USER} | ||
|
||
ENTRYPOINT ["/opt/entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,54 @@ | ||
# CDM Jupyterhub dockerfiles (Prototype) | ||
|
||
This prototype establishes a Docker container configuration for JupyterHub, designed to furnish a multi-user | ||
environment tailored for executing Spark jobs via Jupyter notebooks. | ||
environment tailored for executing Spark jobs via Jupyter notebooks. | ||
|
||
## Using `docker-compose.yaml` | ||
|
||
To deploy the JupyterHub container and Spark nodes locally, execute the following command: | ||
|
||
```bash | ||
docker-compose up --build | ||
``` | ||
|
||
## Test Submitting a Spark Job Locally | ||
|
||
### Submitting a Spark Job via spark-test-node | ||
```bash | ||
docker exec -it spark-test-node \ | ||
sh -c ' | ||
/opt/bitnami/spark/bin/spark-submit \ | ||
--master spark://spark-master:7077 \ | ||
examples/src/main/python/pi.py 10 \ | ||
2>/dev/null | ||
' | ||
``` | ||
|
||
### Submitting a Spark Job via Jupyter Notebook | ||
After launching the [Jupyter Notebook](http://localhost:4041/), establish a Spark context or session with the Spark | ||
master set to the environment variable `SPARK_MASTER_URL` and proceed to submit your job. Once the job is submitted, | ||
you can monitor the job status and logs in the [Spark UI](http://localhost:8080/). | ||
|
||
Sample code to calculate Pi using `SparkContext`: | ||
```python | ||
from pyspark import SparkConf, SparkContext | ||
import random | ||
import os | ||
|
||
spark_master_url = os.environ['SPARK_MASTER_URL'] | ||
|
||
conf = SparkConf().setMaster(spark_master_url).setAppName("Pi") | ||
sc = SparkContext(conf=conf) | ||
|
||
num_samples = 100000000 | ||
def inside(p): | ||
x, y = random.random(), random.random() | ||
return x*x + y*y < 1 | ||
count = sc.parallelize(range(0, num_samples)).filter(inside).count() | ||
pi = 4 * count / num_samples | ||
print(pi) | ||
sc.stop() | ||
``` | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
version: '3' | ||
|
||
# This docker-compose is for developer convenience, not for running in production. | ||
|
||
services: | ||
|
||
spark-master: | ||
image: bitnami/spark:3.5.1 | ||
container_name: spark-master | ||
ports: | ||
- "8080:8080" | ||
- "7077:7077" | ||
environment: | ||
- SPARK_MODE=master | ||
- SPARK_MASTER_WEBUI_PORT=8080 | ||
- SPARK_MASTER_HOST=0.0.0.0 | ||
|
||
spark-worker-1: | ||
image: bitnami/spark:3.5.1 | ||
container_name: spark-worker-1 | ||
ports: | ||
- "8081:8081" | ||
environment: | ||
- SPARK_MODE=worker | ||
- SPARK_MASTER_URL=spark://spark-master:7077 | ||
- SPARK_WORKER_CORES=2 | ||
- SPARK_WORKER_MEMORY=1G | ||
- SPARK_WORKER_PORT=8091 | ||
- SPARK_WORKER_WEBUI_PORT=8081 | ||
|
||
spark-worker-2: | ||
image: bitnami/spark:3.5.1 | ||
container_name: spark-worker-2 | ||
ports: | ||
- "8082:8082" | ||
environment: | ||
- SPARK_MODE=worker | ||
- SPARK_MASTER_URL=spark://spark-master:7077 | ||
- SPARK_WORKER_CORES=2 | ||
- SPARK_WORKER_MEMORY=1G | ||
- SPARK_WORKER_PORT=8092 | ||
- SPARK_WORKER_WEBUI_PORT=8082 | ||
|
||
spark-test-node: | ||
image: bitnami/spark:3.5.1 | ||
container_name: spark-test-node | ||
environment: | ||
- SPARK_MASTER_URL=spark://spark-master:7077 | ||
|
||
notebook: | ||
build: | ||
context: . | ||
dockerfile: Dockerfile | ||
ports: | ||
- "4040:4040" | ||
- "4041:4041" | ||
depends_on: | ||
- spark-master | ||
environment: | ||
- NOTEBOOK_PORT=4041 | ||
- SPARK_MASTER_URL=spark://spark-master:7077 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
|
||
echo "starting jupyter notebook" | ||
|
||
WORKSPACE_DIR="/cdm_shared_workspace" | ||
mkdir -p "$WORKSPACE_DIR" | ||
cd "$WORKSPACE_DIR" | ||
|
||
# Start Jupyter Lab | ||
jupyter lab --ip=0.0.0.0 \ | ||
--port=$NOTEBOOK_PORT \ | ||
--no-browser \ | ||
--allow-root \ | ||
--notebook-dir="$WORKSPACE_DIR" \ | ||
--ServerApp.token='' \ | ||
--ServerApp.password='' |