From f86dbe6d455a672663da814a15f61d43e2ed3ed2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?= Date: Thu, 5 Sep 2024 10:12:21 +0200 Subject: [PATCH 1/2] Update dockerfile and add docker build action (#3283) * Use github docker registry * Fix DDP train for GPU in exclusive mode * Improve docker image - compile gsplat, decrease image size * Drop unrelated change * Add build docker image action * Rename build docker image action * nit * Remove commented line from Dockerfile * Fix dockerfile when explicit source is specified * Fix failing dynamo build for torch.compile * Lock dockerfile and tcnn versions * Add `torch.cuda.is_available()` condition * Drop set_cuda_device * Fix build docker image github action * Docker build save disk space * Set MAX_JOBS to limit resource usage for docker build * Try bumping `MAX_JOBS` 2 => 4 * Install fixed gsplat version from nerfstudio's pyproject.toml * Update docs * Finish docker build action * Fix ignores push when PR --------- Co-authored-by: Gina Wu Co-authored-by: Brent Yi --- .github/workflows/build_docker_image.yml | 62 +++++ Dockerfile | 296 ++++++++++------------- docs/quickstart/installation.md | 19 +- 3 files changed, 188 insertions(+), 189 deletions(-) create mode 100644 .github/workflows/build_docker_image.yml diff --git a/.github/workflows/build_docker_image.yml b/.github/workflows/build_docker_image.yml new file mode 100644 index 0000000000..97cb772820 --- /dev/null +++ b/.github/workflows/build_docker_image.yml @@ -0,0 +1,62 @@ +name: Build Docker Image +on: + workflow_dispatch: + workflow_call: + pull_request: + push: + branches: + - main + - master + tags: + - 'v*' +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} +jobs: + build-and-publish-docker-image: + runs-on: ubuntu-latest + name: build-and-publish-docker-image + permissions: + packages: write + contents: read + attestations: write + id-token: write + steps: + - uses: actions/checkout@v4 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + - name: Free root space + uses: almahmoud/free-root-space@main + with: + remove-gcc: false + remove-cplusplus: false + - name: Build and push Docker image + id: push + uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 + with: + context: . + file: ./Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + - name: Generate artifact attestation + uses: actions/attest-build-provenance@v1 + with: + subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: ${{ github.event_name != 'pull_request' }} + diff --git a/Dockerfile b/Dockerfile index f1b29ea7aa..4255755fa1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,185 +1,133 @@ -ARG CUDA_VERSION=11.8.0 -ARG OS_VERSION=22.04 -# Define base image. -FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${OS_VERSION} -ARG CUDA_VERSION -ARG OS_VERSION +# syntax=docker/dockerfile:1 +ARG UBUNTU_VERSION=22.04 +ARG NVIDIA_CUDA_VERSION=11.8.0 +# CUDA architectures, required by Colmap and tiny-cuda-nn. Use >= 8.0 for faster TCNN. +ARG CUDA_ARCHITECTURES="90;89;86;80;75;70;61" +ARG NERFSTUDIO_VERSION="" + +# Pull source either provided or from git. +FROM scratch as source_copy +ONBUILD COPY . /tmp/nerfstudio +FROM alpine/git as source_no_copy +ARG NERFSTUDIO_VERSION +ONBUILD RUN git clone --branch ${NERFSTUDIO_VERSION} --recursive https://github.com/nerfstudio-project/nerfstudio.git /tmp/nerfstudio +ARG NERFSTUDIO_VERSION +FROM source_${NERFSTUDIO_VERSION:+no_}copy as source + +FROM nvidia/cuda:${NVIDIA_CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} as builder +ARG CUDA_ARCHITECTURES +ARG NVIDIA_CUDA_VERSION +ARG UBUNTU_VERSION -# Define username, user uid and gid -ARG USERNAME=user -ARG USER_UID=1000 -ARG USER_GID=$USER_UID - -# metainformation -LABEL org.opencontainers.image.version = "0.1.18" -LABEL org.opencontainers.image.source = "https://github.com/nerfstudio-project/nerfstudio" -LABEL org.opencontainers.image.licenses = "Apache License 2.0" -LABEL org.opencontainers.image.base.name="docker.io/library/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${OS_VERSION}" - -# Variables used at build time. -## CUDA architectures, required by Colmap and tiny-cuda-nn. -## NOTE: All commonly used GPU architectures are included and supported here. To speedup the image build process remove all architectures but the one of your explicit GPU. Find details here: https://developer.nvidia.com/cuda-gpus (8.6 translates to 86 in the line below) or in the docs. -ARG CUDA_ARCHITECTURES=90;89;86;80;75;70;61;52;37 - -# Set environment variables. -## Set non-interactive to prevent asking for user inputs blocking image creation. ENV DEBIAN_FRONTEND=noninteractive -## Set timezone as it is required by some packages. -ENV TZ=Europe/Berlin -## CUDA Home, required to find CUDA in some packages. -ENV CUDA_HOME="/usr/local/cuda" - -# Install required apt packages and clear cache afterwards. +ENV QT_XCB_GL_INTEGRATION=xcb_egl RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - cmake \ - curl \ - ffmpeg \ - git \ - libatlas-base-dev \ - libboost-filesystem-dev \ - libboost-graph-dev \ - libboost-program-options-dev \ - libboost-system-dev \ - libboost-test-dev \ - libhdf5-dev \ - libcgal-dev \ - libeigen3-dev \ - libflann-dev \ - libfreeimage-dev \ - libgflags-dev \ - libglew-dev \ - libgoogle-glog-dev \ - libmetis-dev \ - libprotobuf-dev \ - libqt5opengl5-dev \ - libsqlite3-dev \ - libsuitesparse-dev \ - nano \ - protobuf-compiler \ - python-is-python3 \ - python3.10-dev \ - python3-pip \ - qtbase5-dev \ - sudo \ - vim-tiny \ - wget && \ - rm -rf /var/lib/apt/lists/* - - -# Install GLOG (required by ceres). -RUN git clone --branch v0.6.0 https://github.com/google/glog.git --single-branch && \ - cd glog && \ - mkdir build && \ - cd build && \ - cmake .. && \ - make -j `nproc` && \ - make install && \ - cd ../.. && \ - rm -rf glog -# Add glog path to LD_LIBRARY_PATH. -ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib" - -# Install Ceres-solver (required by colmap). -RUN git clone --branch 2.1.0 https://ceres-solver.googlesource.com/ceres-solver.git --single-branch && \ - cd ceres-solver && \ - git checkout $(git describe --tags) && \ - mkdir build && \ - cd build && \ - cmake .. -DBUILD_TESTING=OFF -DBUILD_EXAMPLES=OFF && \ - make -j `nproc` && \ - make install && \ - cd ../.. && \ - rm -rf ceres-solver - -# Install colmap. -RUN git clone --branch 3.8 https://github.com/colmap/colmap.git --single-branch && \ + apt-get install -y --no-install-recommends --no-install-suggests \ + git \ + cmake \ + ninja-build \ + build-essential \ + libboost-program-options-dev \ + libboost-filesystem-dev \ + libboost-graph-dev \ + libboost-system-dev \ + libeigen3-dev \ + libflann-dev \ + libfreeimage-dev \ + libmetis-dev \ + libgoogle-glog-dev \ + libgtest-dev \ + libsqlite3-dev \ + libglew-dev \ + qtbase5-dev \ + libqt5opengl5-dev \ + libcgal-dev \ + libceres-dev \ + python3.10-dev \ + python3-pip + +# Build and install COLMAP. +RUN git clone https://github.com/colmap/colmap.git && \ cd colmap && \ + git checkout "3.9.1" && \ mkdir build && \ cd build && \ - cmake .. -DCUDA_ENABLED=ON \ - -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES} && \ - make -j `nproc` && \ - make install && \ - cd ../.. && \ - rm -rf colmap - -# Create non root user, add it to custom group and setup environment. -RUN groupadd --gid $USER_GID $USERNAME \ - && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME -d /home/${USERNAME} --shell /usr/bin/bash -# OPTIONAL -# If sudo privilages are not required comment below line -# Create simple password for user and add it to sudo group -# Update group so that it is not required to type password for commands: apt update/upgrade/install/remove -RUN echo "${USERNAME}:password" | chpasswd \ - && usermod -aG sudo ${USERNAME} \ - && echo "%sudo ALL=NOPASSWD:/usr/bin/apt-get update, /usr/bin/apt-get upgrade, /usr/bin/apt-get install, /usr/bin/apt-get remove" >> /etc/sudoers + mkdir -p /build && \ + cmake .. -GNinja "-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES}" \ + -DCMAKE_INSTALL_PREFIX=/build/colmap && \ + ninja install -j1 && \ + cd ~ + +# Upgrade pip and install dependencies. +# pip install torch==2.2.2 torchvision==0.17.2 --index-url https://download.pytorch.org/whl/cu118 && \ +RUN pip install --no-cache-dir --upgrade pip 'setuptools<70.0.0' && \ + pip install --no-cache-dir torch==2.1.2+cu118 torchvision==0.16.2+cu118 'numpy<2.0.0' --extra-index-url https://download.pytorch.org/whl/cu118 && \ + git clone --branch master --recursive https://github.com/cvg/Hierarchical-Localization.git /opt/hloc && \ + cd /opt/hloc && git checkout v1.4 && python3.10 -m pip install --no-cache-dir . && cd ~ && \ + TCNN_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" pip install --no-cache-dir "git+https://github.com/NVlabs/tiny-cuda-nn.git@b3473c81396fe927293bdfd5a6be32df8769927c#subdirectory=bindings/torch" && \ + pip install --no-cache-dir pycolmap==0.6.1 pyceres==2.1 omegaconf==2.3.0 + +# Install gsplat and nerfstudio. +# NOTE: both are installed jointly in order to prevent docker cache with latest +# gsplat version (we do not expliticly specify the commit hash). +# +# We set MAX_JOBS to reduce resource usage for GH actions: +# - https://github.com/nerfstudio-project/gsplat/blob/db444b904976d6e01e79b736dd89a1070b0ee1d0/setup.py#L13-L23 +COPY --from=source /tmp/nerfstudio/ /tmp/nerfstudio +RUN export TORCH_CUDA_ARCH_LIST="$(echo "$CUDA_ARCHITECTURES" | tr ';' '\n' | awk '$0 > 70 {print substr($0,1,1)"."substr($0,2)}' | tr '\n' ' ' | sed 's/ $//')" && \ + export MAX_JOBS=4 && \ + GSPLAT_VERSION="$(sed -n 's/.*gsplat==\s*\([^," '"'"']*\).*/\1/p' /tmp/nerfstudio/pyproject.toml)" && \ + pip install --no-cache-dir git+https://github.com/nerfstudio-project/gsplat.git@v${GSPLAT_VERSION} && \ + pip install --no-cache-dir /tmp/nerfstudio 'numpy<2.0.0' && \ + rm -rf /tmp/nerfstudio + +# Fix permissions +RUN chmod -R go=u /usr/local/lib/python3.10 && \ + chmod -R go=u /build + +# +# Docker runtime stage. +# +FROM nvidia/cuda:${NVIDIA_CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} as runtime +ARG CUDA_ARCHITECTURES +ARG NVIDIA_CUDA_VERSION +ARG UBUNTU_VERSION -# Create workspace folder and change ownership to new user -RUN mkdir /workspace && chown ${USER_UID}:${USER_GID} /workspace - -# Switch to new user and workdir. -USER ${USER_UID} -WORKDIR /home/${USERNAME} - -# Add local user binary folder to PATH variable. -ENV PATH="${PATH}:/home/${USERNAME}/.local/bin" - -# Upgrade pip and install packages. -RUN python3.10 -m pip install --no-cache-dir --upgrade pip setuptools==69.5.1 pathtools promise pybind11 omegaconf - -# Install pytorch and submodules -# echo "${CUDA_VERSION}" | sed 's/.$//' | tr -d '.' -- CUDA_VERSION -> delete last digit -> delete all '.' -RUN CUDA_VER=$(echo "${CUDA_VERSION}" | sed 's/.$//' | tr -d '.') && python3.10 -m pip install --no-cache-dir \ - torch==2.1.2+cu${CUDA_VER} \ - torchvision==0.16.2+cu${CUDA_VER} \ - --extra-index-url https://download.pytorch.org/whl/cu${CUDA_VER} - -# Install tiny-cuda-nn (we need to set the target architectures as environment variable first). -ENV TCNN_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES} -RUN python3.10 -m pip install --no-cache-dir git+https://github.com/NVlabs/tiny-cuda-nn.git#subdirectory=bindings/torch - -# Install pycolmap, required by hloc. -RUN git clone --branch v0.4.0 --recursive https://github.com/colmap/pycolmap.git && \ - cd pycolmap && \ - python3.10 -m pip install --no-cache-dir . && \ - cd .. - -# Install hloc 1.4 as alternative feature detector and matcher option for nerfstudio. -RUN git clone --branch master --recursive https://github.com/cvg/Hierarchical-Localization.git && \ - cd Hierarchical-Localization && \ - git checkout v1.4 && \ - python3.10 -m pip install --no-cache-dir -e . && \ - cd .. - -# Install pyceres from source -RUN git clone --branch v1.0 --recursive https://github.com/cvg/pyceres.git && \ - cd pyceres && \ - python3.10 -m pip install --no-cache-dir -e . && \ - cd .. - -# Install pixel perfect sfm. -RUN git clone --recursive https://github.com/cvg/pixel-perfect-sfm.git && \ - cd pixel-perfect-sfm && \ - git reset --hard 40f7c1339328b2a0c7cf71f76623fb848e0c0357 && \ - git clean -df && \ - python3.10 -m pip install --no-cache-dir -e . && \ - cd .. - -# Copy nerfstudio folder and give ownership to user. -COPY --chown=${USER_UID}:${USER_GID} . /home/${USERNAME}/nerfstudio - -# Install nerfstudio dependencies. -RUN cd nerfstudio && \ - python3.10 -m pip install --no-cache-dir -e . && \ - cd .. +LABEL org.opencontainers.image.source = "https://github.com/nerfstudio-project/nerfstudio" +LABEL org.opencontainers.image.licenses = "Apache License 2.0" +LABEL org.opencontainers.image.base.name="docker.io/library/nvidia/cuda:${NVIDIA_CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}" +LABEL org.opencontainers.image.documentation = "https://docs.nerf.studio/" -# Switch to workspace folder and install nerfstudio cli auto completion -WORKDIR /workspace -RUN ns-install-cli --mode install +# Minimal dependencies to run COLMAP binary compiled in the builder stage. +# Note: this reduces the size of the final image considerably, since all the +# build dependencies are not needed. +RUN apt-get update && \ + apt-get install -y --no-install-recommends --no-install-suggests \ + libboost-filesystem1.74.0 \ + libboost-program-options1.74.0 \ + libc6 \ + libceres2 \ + libfreeimage3 \ + libgcc-s1 \ + libgl1 \ + libglew2.2 \ + libgoogle-glog0v5 \ + libqt5core5a \ + libqt5gui5 \ + libqt5widgets5 \ + python3.10 \ + python3.10-dev \ + build-essential \ + python-is-python3 \ + ffmpeg + +# Copy packages from builder stage. +COPY --from=builder /build/colmap/ /usr/local/ +COPY --from=builder /usr/local/lib/python3.10/dist-packages/ /usr/local/lib/python3.10/dist-packages/ +COPY --from=builder /usr/local/bin/ns* /usr/local/bin/ + +# Install nerfstudio cli auto completion +RUN /bin/bash -c 'ns-install-cli --mode install' # Bash as default entrypoint. CMD /bin/bash -l -# Force changing password on first container run -# Change line above: CMD /bin/bash -l -> CMD /bin/bash -l -c passwd && /usr/bin/bash -l diff --git a/docs/quickstart/installation.md b/docs/quickstart/installation.md index 99a81d2a6c..de79064213 100644 --- a/docs/quickstart/installation.md +++ b/docs/quickstart/installation.md @@ -222,10 +222,10 @@ Instead of installing and compiling prerequisites, setting up the environment an ### Prerequisites Docker ([get docker](https://docs.docker.com/get-docker/)) and nvidia GPU drivers ([get nvidia drivers](https://www.nvidia.de/Download/index.aspx?lang=de)), capable of working with CUDA 11.8, must be installed. -The docker image can then either be pulled from [here](https://hub.docker.com/r/dromni/nerfstudio/tags) (replace with the actual version, e.g. 0.1.18) +The docker image can then either be pulled from [here](https://github.com/nerfstudio-project/nerfstudio/pkgs/container/nerfstudio) (`latest` can be replaced with a fixed version, e.g., `1.1.3`) ```bash -docker pull dromni/nerfstudio: +docker pull ghcr.io/nerfstudio-project/nerfstudio:latest ``` or be built from the repository using @@ -240,22 +240,11 @@ For example, here's how to build with support for GeForce 30xx series GPUs: ```bash docker build \ - --build-arg CUDA_VERSION=11.8.0 \ --build-arg CUDA_ARCHITECTURES=86 \ - --build-arg OS_VERSION=22.04 \ --tag nerfstudio-86 \ --file Dockerfile . ``` -The user inside the container is called 'user' and is mapped to the local user with ID 1000 (usually the first non-root user on Linux systems). -If you suspect that your user might have a different id, override `USER_ID` during the build as follows: - -```bash -docker build \ - --build-arg USER_ID=$(id -u) \ - --file Dockerfile . -``` - ### Using an interactive container The docker container can be launched with an interactive terminal where nerfstudio commands can be entered as usual. Some parameters are required and some are strongly recommended for usage as following: @@ -269,7 +258,7 @@ docker run --gpus all \ # Give the conta --rm \ # Remove container after it is closed (recommended). -it \ # Start container in interactive mode. --shm-size=12gb \ # Increase memory assigned to container to avoid memory limitations, default is 64 MB (recommended). - dromni/nerfstudio: # Docker image name if you pulled from docker hub. + ghcr.io/nerfstudio-project/nerfstudio: # Docker image name if you pulled from GitHub. <--- OR ---> nerfstudio # Docker image tag if you built the image from the Dockerfile by yourself using the command from above. ``` @@ -280,7 +269,7 @@ Besides, the container can also directly be used by adding the nerfstudio comman ```bash docker run --gpus all -u $(id -u) -v /folder/of/your/data:/workspace/ -v /home//.cache/:/home/user/.cache/ -p 7007:7007 --rm -it --shm-size=12gb # Parameters. - dromni/nerfstudio: \ # Docker image name + ghcr.io/nerfstudio-project/nerfstudio: \ # Docker image name if you pulled from GitHub. ns-process-data video --data /workspace/video.mp4 # Smaple command of nerfstudio. ``` From d67b281f7d89e8f2387cdfc50f6979d0a7d35d0e Mon Sep 17 00:00:00 2001 From: Hardik Dava <39372750+hardikdava@users.noreply.github.com> Date: Mon, 9 Sep 2024 22:20:26 +0200 Subject: [PATCH 2/2] RGBA renderer for splatfacto (#3307) * Update base_model.py * Update render.py * Update render.py * fixing error * Update render.py * Update render.py --- nerfstudio/models/base_model.py | 6 +----- nerfstudio/scripts/render.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/nerfstudio/models/base_model.py b/nerfstudio/models/base_model.py index e1c9507ff0..febd4ab105 100644 --- a/nerfstudio/models/base_model.py +++ b/nerfstudio/models/base_model.py @@ -214,11 +214,7 @@ def get_rgba_image(self, outputs: Dict[str, torch.Tensor], output_name: str = "r RGBA image. """ accumulation_name = output_name.replace("rgb", "accumulation") - if ( - not hasattr(self, "renderer_rgb") - or not hasattr(self.renderer_rgb, "background_color") - or accumulation_name not in outputs - ): + if accumulation_name not in outputs: raise NotImplementedError(f"get_rgba_image is not implemented for model {self.__class__.__name__}") rgb = outputs[output_name] if self.renderer_rgb.background_color == "random": # type: ignore diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py index 70208a45d2..4eb4a71840 100644 --- a/nerfstudio/scripts/render.py +++ b/nerfstudio/scripts/render.py @@ -197,6 +197,9 @@ def _render_trajectory_video( outputs = pipeline.model.get_outputs_for_camera( cameras[camera_idx : camera_idx + 1], obb_box=obb_box ) + if rendered_output_names is not None and "rgba" in rendered_output_names: + rgba = pipeline.model.get_rgba_image(outputs=outputs, output_name="rgb") + outputs["rgba"] = rgba render_image = [] for rendered_output_name in rendered_output_names: @@ -221,6 +224,8 @@ def _render_trajectory_video( .cpu() .numpy() ) + elif rendered_output_name == "rgba": + output_image = output_image.detach().cpu().numpy() else: output_image = ( colormaps.apply_colormap( @@ -790,6 +795,9 @@ def update_config(config: TrainerConfig) -> TrainerConfig: for camera_idx, (camera, batch) in enumerate(progress.track(dataloader, total=len(dataset))): with torch.no_grad(): outputs = pipeline.model.get_outputs_for_camera(camera) + if self.rendered_output_names is not None and "rgba" in self.rendered_output_names: + rgba = pipeline.model.get_rgba_image(outputs=outputs, output_name="rgb") + outputs["rgba"] = rgba gt_batch = batch.copy() gt_batch["rgb"] = gt_batch.pop("image") @@ -841,11 +849,12 @@ def update_config(config: TrainerConfig) -> TrainerConfig: output_image = gt_batch[output_name] else: output_image = outputs[output_name] - del output_name # Map to color spaces / numpy if is_raw: output_image = output_image.cpu().numpy() + elif output_name == "rgba": + output_image = output_image.detach().cpu().numpy() elif is_depth: output_image = ( colormaps.apply_depth_colormap(