Skip to content

Commit

Permalink
Merge pull request #1034 from StanfordVL/docker-cuda-install-first
Browse files Browse the repository at this point in the history
Update Docker to uninstall cuda toolkit & start pushing actions runner image on CI too
  • Loading branch information
cgokmen authored Nov 22, 2024
2 parents 075267e + e4d6891 commit bae1015
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 16 deletions.
46 changes: 39 additions & 7 deletions .github/workflows/build-push-containers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
sudo rm -rf \
/usr/share/dotnet /usr/local/lib/android /opt/ghc \
/usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \
/usr/lib/jvm || true
/usr/lib/jvm /opt/hostedtoolcache/CodeQL || true
echo "some directories deleted"
sudo apt install aptitude -y >/dev/null 2>&1
sudo aptitude purge aria2 ansible azure-cli shellcheck rpm xorriso zsync \
Expand Down Expand Up @@ -97,6 +97,18 @@ jobs:
tags: |
type=ref,event=branch
type=semver,pattern={{version}}
-
name: Metadata for actions Image
id: meta-actions
uses: docker/metadata-action@v5
# The actions image should only be built if the push is to og-develop
if: github.ref == 'refs/heads/og-develop'
with:
images: |
stanfordvl/omnigibson-gha
tags: |
# We only push to the latest tag for the actions image
type=raw,value=latest
-
name: Build and push prod image
id: build-prod
Expand All @@ -107,8 +119,8 @@ jobs:
tags: ${{ steps.meta-prod.outputs.tags }}
labels: ${{ steps.meta-prod.outputs.labels }}
file: docker/prod.Dockerfile
cache-from: type=registry,ref=stanfordvl/omnigibson:og-develop
cache-to: type=inline
cache-from: type=registry,ref=stanfordvl/omnigibson:build-cache
cache-to: type=registry,ref=stanfordvl/omnigibson:build-cache,mode=max

-
name: Build and push dev image
Expand All @@ -121,8 +133,8 @@ jobs:
tags: ${{ steps.meta-dev.outputs.tags }}
labels: ${{ steps.meta-dev.outputs.labels }}
file: docker/prod.Dockerfile
cache-from: type=registry,ref=stanfordvl/omnigibson:og-develop # OK to share cache here.
cache-to: type=inline
cache-from: type=registry,ref=stanfordvl/omnigibson:build-cache # OK to share cache here.
cache-to: type=registry,ref=stanfordvl/omnigibson:build-cache,mode=max

- name: Update vscode image Dockerfile with prod image tag
run: |
Expand All @@ -137,5 +149,25 @@ jobs:
tags: ${{ steps.meta-vscode.outputs.tags }}
labels: ${{ steps.meta-vscode.outputs.labels }}
file: docker/vscode.Dockerfile
cache-from: type=registry,ref=stanfordvl/omnigibson:og-develop # OK to share cache here.
cache-to: type=inline
cache-from: type=registry,ref=stanfordvl/omnigibson:build-cache # OK to share cache here.
cache-to: type=registry,ref=stanfordvl/omnigibson:build-cache,mode=max

- name: Update actions image Dockerfile with dev image tag
# The actions image should only be built if the push is to og-develop
if: github.ref == 'refs/heads/og-develop'
run: |
sed -i "s/omnigibson-dev:og-develop/omnigibson-dev@${{ steps.build-dev.outputs.digest }}/g" docker/gh-actions/Dockerfile && cat docker/gh-actions/Dockerfile
-
name: Build and push actions image
id: build-actions
uses: docker/build-push-action@v5
# The actions image should only be built if the push is to og-develop
if: github.ref == 'refs/heads/og-develop'
with:
context: docker/gh-actions
push: true
tags: ${{ steps.meta-actions.outputs.tags }}
labels: ${{ steps.meta-actions.outputs.labels }}
file: docker/gh-actions/Dockerfile
cache-from: type=registry,ref=stanfordvl/omnigibson:build-cache # OK to share cache here.
cache-to: type=registry,ref=stanfordvl/omnigibson:build-cache,mode=max
21 changes: 12 additions & 9 deletions docker/prod.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,23 @@ RUN micromamba run -n omnigibson micromamba install \
pytorch torchvision pytorch-cuda=11.8 \
-c pytorch -c nvidia -c conda-forge

# Install cuda for compiling curobo
RUN wget -O /cuda.run https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run && \
sh /cuda.run --silent --toolkit && rm /cuda.run
ENV PATH=/usr/local/cuda-11.8/bin:$PATH
ENV LD_LIBRARY_PATH=/usr/local/cuda-11.8/lib64:$LD_LIBRARY_PATH

# Install curobo. This can normally be installed when OmniGibson is pip
# installed, but we need to install it beforehand here so that it doesn't
# have to happen on every time a CI action is run (otherwise it's just
# very slow)
# very slow).
# This also allows us to uninstall the cuda toolkit after curobo is built
# to save space (meaning curobo will not be able to be rebuilt at runtime).
# Here we also compile this such that it is compatible with GPU architectures
# Turing, Ampere, and Ada; which correspond to 20, 30, and 40 series GPUs.
RUN TORCH_CUDA_ARCH_LIST='7.5;8.0;8.6+PTX' \
micromamba run -n omnigibson pip install git+https://github.com/StanfordVL/curobo@06d8c79b660db60c2881e9319e60899cbde5c5b5#egg=nvidia_curobo --no-build-isolation
# We also suppress the output of the installation to avoid the log limit.
RUN wget --no-verbose -O /cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i /cuda-keyring.deb && rm /cuda-keyring.deb && apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y cuda-toolkit-11-8 && \
TORCH_CUDA_ARCH_LIST='7.5;8.0;8.6+PTX' PATH=/usr/local/cuda-11.8/bin:$PATH LD_LIBRARY_PATH=/usr/local/cuda-11.8/lib64:$LD_LIBRARY_PATH \
micromamba run -n omnigibson pip install \
git+https://github.com/StanfordVL/curobo@06d8c79b660db60c2881e9319e60899cbde5c5b5#egg=nvidia_curobo \
--no-build-isolation > /dev/null && \
apt-get remove -y cuda-toolkit-11-8 && apt-get autoremove -y && apt-get autoclean -y && rm -rf /var/lib/apt/lists/*

# Make sure isaac gets properly sourced every time omnigibson gets called
ARG CONDA_ACT_FILE="/micromamba/envs/omnigibson/etc/conda/activate.d/env_vars.sh"
Expand Down

0 comments on commit bae1015

Please sign in to comment.