Skip to content

Commit

Permalink
Merge branch 'main' into dlm/structured-logging
Browse files Browse the repository at this point in the history
  • Loading branch information
dleviminzi committed Dec 11, 2024
2 parents 62e35a4 + efa7e93 commit b009209
Show file tree
Hide file tree
Showing 68 changed files with 2,743 additions and 1,175 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/release-worker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
id: login-ecr
with:
registry-type: public
mask-password: 'true'
mask-password: "true"

- name: Set version
id: set-version
Expand All @@ -49,3 +49,6 @@ jobs:
${{ steps.login-ecr.outputs.registry }}/n4e0e1y0/beta9-worker:latest
target: final
platforms: linux/amd64
build-args: |
CEDANA_TOKEN=${{ secrets.CEDANA_TOKEN }}
CEDANA_BASE_URL=${{ secrets.CEDANA_BASE_URL }}
5 changes: 5 additions & 0 deletions .stignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,8 @@ venv
test
manifests
tmp


deploy
bin/worker
bin/gateway
Empty file modified bin/setup.sh
100644 → 100755
Empty file.
6 changes: 3 additions & 3 deletions docker/Dockerfile.runner
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# syntax=docker/dockerfile:1.6
FROM ubuntu:20.04 as base
FROM ubuntu:22.04 as base

ENV DEBIAN_FRONTEND=noninteractive

RUN <<EOT
echo 'Acquire::ForceIPv4 "true";' | tee /etc/apt/apt.conf.d/1000-force-ipv4-transport
apt-get update
apt-get install -y software-properties-common curl git gcc python3-dev
apt-get install -y software-properties-common curl git gcc python3-dev bzip2
add-apt-repository ppa:deadsnakes/ppa
apt-get update
EOT
Expand All @@ -27,7 +27,7 @@ RUN <<EOT
set -eux

# Install python and dependencies
apt-get install -y python3.12 python3.12-dev python3.12-distutils
apt-get install -y python3.12 python3.12-dev

# Get the latest pip version
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
Expand Down
45 changes: 38 additions & 7 deletions docker/Dockerfile.worker
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ RUN go build -o /usr/local/bin/worker ./cmd/worker/main.go

# final image
# ========================
FROM nvidia/cuda:12.3.1-base-ubuntu20.04 AS release
FROM nvidia/cuda:12.3.1-base-ubuntu22.04 AS release
FROM release AS dev

FROM ${BASE_STAGE} AS final
Expand All @@ -81,20 +81,51 @@ WORKDIR /workspace

RUN apt-get update && \
apt-get install -y curl gpg && \
curl -fsSL https://download.opensuse.org/repositories/devel:/tools:/criu/xUbuntu_20.04/Release.key | gpg --dearmor -o /usr/share/keyrings/criu.gpg && \
echo 'deb [signed-by=/usr/share/keyrings/criu.gpg] https://download.opensuse.org/repositories/devel:/tools:/criu/xUbuntu_20.04 /' > /etc/apt/sources.list.d/criu.list && \
curl -fsSL https://nvidia.github.io/nvidia-container-runtime/gpgkey | apt-key add - && \
curl -s -L https://nvidia.github.io/nvidia-docker/ubuntu20.04/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list \
curl -fsSL https://nvidia.github.io/nvidia-container-runtime/ubuntu20.04/nvidia-container-runtime.list | tee /etc/apt/sources.list.d/nvidia-container-runtime.list && \
curl -s -L https://nvidia.github.io/nvidia-docker/ubuntu22.04/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list \
curl -fsSL https://nvidia.github.io/nvidia-container-runtime/ubuntu22.04/nvidia-container-runtime.list | tee /etc/apt/sources.list.d/nvidia-container-runtime.list && \
apt-get update && \
apt-get install psmisc

RUN curl -L https://beam-runner-python-deps.s3.amazonaws.com/juicefs -o /usr/local/bin/juicefs && chmod +x /usr/local/bin/juicefs
RUN curl -fsSL https://tailscale.com/install.sh | sh
RUN apt-get install -y --no-install-recommends criu nvidia-container-toolkit-base nvidia-container-toolkit
RUN apt-get install -y --no-install-recommends nvidia-container-toolkit-base nvidia-container-toolkit

RUN apt-get update && apt-get install -y fuse3 libfuse2 libfuse3-dev libfuse-dev bash-completion

# XXX: Remove once cedana starts shipping with a compatible binary
RUN <<EOT
set -eux
if [ "$(uname -m)" = "x86_64" ]; then
apt-get install -y python3-protobuf libnet1 libnftables1 libnl-3-200 libprotobuf-c1 iptables
curl -L -o criu.deb https://download.opensuse.org/repositories/devel:/tools:/criu/xUbuntu_22.04/amd64/criu_4.0-3_amd64.deb
dpkg -i criu.deb
rm criu.deb
fi
EOT

ARG CEDANA_VERSION=0.9.234
RUN <<EOT
set -eux
if [ "$(uname -m)" = "x86_64" ]; then
apt-get install -y libgpgme11
curl -L -o cedana_amd64.deb https://github.com/cedana/cedana/releases/download/v${CEDANA_VERSION}/cedana_${CEDANA_VERSION}_amd64.deb
dpkg -i cedana_amd64.deb
rm cedana_amd64.deb
fi
EOT

ARG CEDANA_TOKEN
ARG CEDANA_BASE_URL

RUN if [ -n "${CEDANA_TOKEN}" ]; then \
curl -L -H "Authorization: Bearer ${CEDANA_TOKEN}" ${CEDANA_BASE_URL}/k8s/gpu/gpucontroller -o /usr/local/bin/cedana-gpu-controller && \
chmod +x /usr/local/bin/cedana-gpu-controller && \
curl -L -H "Authorization: Bearer ${CEDANA_TOKEN}" ${CEDANA_BASE_URL}/k8s/gpu/libcedana -o /usr/local/lib/libcedana-gpu.so && \
chmod +x /usr/local/lib/libcedana-gpu.so; \
fi


ARG TARGETARCH

ENV MOUNT_S3_URL_ARM64="https://s3.amazonaws.com/mountpoint-s3-release/1.8.0/arm64/mount-s3-1.8.0-arm64.tar.gz"
Expand All @@ -114,7 +145,7 @@ RUN if [ "$(uname -m)" = "x86_64" ]; then \
apt-get install -y /tmp/cuno_1.2.6_amd64_glibc.deb && rm /tmp/cuno_1.2.6_amd64_glibc.deb; \
fi

RUN apt-get remove -y curl gpg && \
RUN apt-get remove -y curl && \
apt-get clean && apt-get autoremove -y && apt-get autopurge -y && \
rm -rf /var/lib/apt/lists/* /var/log/*

Expand Down
Loading

0 comments on commit b009209

Please sign in to comment.