Skip to content

Commit

Permalink
Merge pull request #5 from kbaseincubator/dev-cluster-devops-1884
Browse files Browse the repository at this point in the history
Dev cluster devops 1884
  • Loading branch information
jsfillman authored May 7, 2024
2 parents a366536 + 2b82f3c commit d370904
Show file tree
Hide file tree
Showing 10 changed files with 223 additions and 31 deletions.
12 changes: 11 additions & 1 deletion .github/codeql.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
name: "Code scanning - action"

on:
push:
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
push:
# run workflow when merging to main/master or develop
branches:
- main
- master
- develop
schedule:
- cron: '0 19 * * 0'

Expand Down
27 changes: 15 additions & 12 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
version: 2
updates:
- package-ecosystem: docker
directory: "/"
schedule:
interval: monthly
time: '11:00'
open-pull-requests-limit: 10
- package-ecosystem: pip
directory: "/"
schedule:
interval: monthly
time: '11:00'
open-pull-requests-limit: 10

# Docker
- package-ecosystem: docker
directory: "/"
schedule:
interval: weekly
time: '11:00'
open-pull-requests-limit: 25

# GitHub Actions
- package-ecosystem: "github-actions"
directory: ".github/workflows"
schedule:
interval: "monthly"
open-pull-requests-limit: 25
4 changes: 2 additions & 2 deletions .github/workflows/manual-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ on:
platforms:
description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.'
required: false
default: 'linux/amd64,linux/arm64/v8'
default: 'linux/amd64'
jobs:
build-push:
uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
uses: kbaseincubator/.github/.github/workflows/reusable_build-push.yml@main
with:
name: '${{ github.event.repository.name }}-develop'
tags: br-${{ github.ref_name }}
Expand Down
48 changes: 48 additions & 0 deletions .github/workflows/pr_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
---
name: Pull Request Build, Tag, & Push
on:
pull_request:
branches:
- develop
- main
- master
types:
- opened
- reopened
- synchronize
- closed
jobs:
build-develop-open:
if: github.base_ref == 'develop' && github.event.pull_request.merged == false
uses: kbaseincubator/.github/.github/workflows/reusable_build.yml@main
with:
platforms: "linux/amd64"
secrets: inherit
build-develop-merge:
if: github.base_ref == 'develop' && github.event.pull_request.merged == true
uses: kbaseincubator/.github/.github/workflows/reusable_build-push.yml@main
with:
name: '${{ github.event.repository.name }}-develop'
tags: pr-${{ github.event.number }},latest
platforms: "linux/amd64"
secrets: inherit
build-main-open:
if: (github.base_ref == 'main' || github.base_ref == 'master') && github.event.pull_request.merged == false
uses: kbaseincubator/.github/.github/workflows/reusable_build-push.yml@main
with:
name: '${{ github.event.repository.name }}'
tags: pr-${{ github.event.number }}
platforms: "linux/amd64"
secrets: inherit
build-main-merge:
if: (github.base_ref == 'main' || github.base_ref == 'master') && github.event.pull_request.merged == true
uses: kbaseincubator/.github/.github/workflows/reusable_build-push.yml@main
with:
name: '${{ github.event.repository.name }}'
tags: pr-${{ github.event.number }},latest-rc
platforms: "linux/amd64"
secrets: inherit
trivy-scans:
if: (github.base_ref == 'develop' || github.base_ref == 'main' || github.base_ref == 'master' ) && github.event.pull_request.merged == false
uses: kbaseincubator/.github/.github/workflows/reusable_trivy-scans.yml@main
secrets: inherit
8 changes: 4 additions & 4 deletions .github/workflows/release-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ on:
types: [ published ]
jobs:
check-source-branch:
uses: kbase/.github/.github/workflows/reusable_validate-branch.yml@main
uses: kbaseincubator/.github/.github/workflows/reusable_validate-branch.yml@main
with:
build_branch: '${{ github.event.release.target_commitish }}'
validate-release-tag:
needs: check-source-branch
uses: kbase/.github/.github/workflows/reusable_validate-release-tag.yml@main
uses: kbaseincubator/.github/.github/workflows/reusable_validate-release-tag.yml@main
with:
release_tag: '${{ github.event.release.tag_name }}'
build-push:
needs: validate-release-tag
uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
uses: kbaseincubator/.github/.github/workflows/reusable_build-push.yml@main
with:
name: '${{ github.event.repository.name }}'
tags: '${{ github.event.release.tag_name }},latest'
platforms: 'linux/amd64,linux/arm64/v8'
platforms: "linux/amd64"
secrets: inherit
62 changes: 50 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,50 @@
# First stage: download and extract the busybox binary
FROM alpine as builder
RUN apk add --no-cache wget
RUN wget -O busybox https://busybox.net/downloads/binaries/1.31.0-defconfig-multiarch-musl/busybox-x86_64 && \
chmod +x busybox

# Second stage: create the minimal image
FROM scratch
COPY --from=builder /busybox /busybox

# Run the sleep command indefinitely
CMD ["/busybox", "sleep", "infinity"]
# Would much prefer to use an official image, but it looks like they're just for submitting jobs
# rather than running a cluster?
# https://github.com/apache/spark-docker/blob/master/3.5.1/scala2.12-java17-ubuntu/entrypoint.sh
# Can't seem to find any usage documentation, so've just been reading the code to figure out how
# they work

FROM ubuntu:22.04 as build

RUN apt update -y
RUN apt install -y wget

ENV SPARK_VER=spark-3.5.1-bin-hadoop3-scala2.13

WORKDIR /opt
# TODO should at least check the sha, or copy the pgp check from the official dockerfile
RUN wget -q https://dlcdn.apache.org/spark/spark-3.5.1/$SPARK_VER.tgz && \
tar -xf $SPARK_VER.tgz


FROM eclipse-temurin:17.0.11_9-jre-jammy

ENV SPARK_VER=spark-3.5.1-bin-hadoop3-scala2.13
ENV PYTHON_VER=python3.11

# install from deadsnakes so it's not an rc version
RUN apt update && \
apt-get install -y software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt install -y $PYTHON_VER python3-pip && \
apt install -y r-base r-base-dev && \
rm -rf /var/lib/apt/lists/*

RUN $PYTHON_VER --version

RUN mkdir /opt/py && ln -s /usr/bin/$PYTHON_VER /opt/py/python3

RUN echo '#!/usr/bin/bash' > /usr/bin/pip && \
echo "$PYTHON_VER -m pip \$@" >> /usr/bin/pip

ENV R_HOME /usr/lib/R

RUN mkdir /opt/spark
COPY --from=build /opt/$SPARK_VER/ /opt/spark/
# this doesn't seem to actually work
RUN echo "spark.pyspark.python /usr/bin/$PYTHON_VER" > /opt/spark/conf/spark-defaults.conf

COPY entrypoint.sh /opt/
RUN chmod a+x /opt/entrypoint.sh

ENTRYPOINT ["/opt/entrypoint.sh"]
7 changes: 7 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Copyright (c) 2024-present The KBase Project and its Contributors

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# CDM prototype Spark dockerfiles

This is an extremely naive Dockerfile allowing deployment of a Spark master and workers in
Rancher.

## Notes

* When we switch to Rancher 2 should probably switch from the standalone scheduler to the k8s
scheduler. Haven't looked into this at all.
* The dockerfile uses mostly default values, which is almost certainly bad.
* Do we need to install and configure Hadoop? Jobs run without it... should we use Minio instead?
47 changes: 47 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
version: '3'

# This docker-compose is for developer convenience, not for running in production.

services:

spark-master:
build:
context: .
dockerfile: Dockerfile
ports:
- "7077:7077"
- "8080:8080"
environment:
- MODE=master
- PORT=7077
- WEBPORT=8080

spark-worker1:
build:
context: .
dockerfile: Dockerfile
ports:
- "8081:8081"
depends_on:
- spark-master
environment:
- MODE=worker
- WEBPORT=8081
- CORES=2
- MEM=1G
- SPARK_MASTER_URL=spark://spark-master:7077

spark-worker2:
build:
context: .
dockerfile: Dockerfile
ports:
- "8082:8082"
depends_on:
- spark-master
environment:
- MODE=worker
- WEBPORT=8082
- CORES=2
- MEM=1G
- SPARK_MASTER_URL=spark://spark-master:7077
28 changes: 28 additions & 0 deletions entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

# Haven't been able to get the workers to use a different version of python any other way
# Only want to do this in this shell so it doesn't screw up everything by changing the system
# python
export PATH=/opt/py:$PATH

# run spark in the foreground so the container doesn't immediately exit
export SPARK_NO_DAEMONIZE=true
# these two env vars don't seem to work
export PYSPARK_PYTHON=$PYTHON_VER
export PYSPARK_DRIVER_PYTHON=$PYTHON_VER
export SPARK_MASTER_HOST=0.0.0.0
export SPARK_MASTER_PORT=$PORT
export SPARK_MASTER_WEBUI_PORT=$WEBPORT
export SPARK_WORKER_WEBUI_PORT=$WEBPORT

if [ "$MODE" == "master" ] ; then
echo "starting spark master"
/opt/spark/sbin/start-master.sh
elif [ "$MODE" == "worker" ] ; then
echo "starting spark worker"
/opt/spark/sbin/start-worker.sh --cores "$CORES" --memory "$MEM" "$SPARK_MASTER_URL"
elif [ "$MODE" == "bash" ] ; then
bash
else
echo "Unrecognized MODE env var: [$MODE]"
fi

0 comments on commit d370904

Please sign in to comment.