diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..9cb67cabd --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +models/ +Dockerfile +README.md +LICENSE +assets/ +*.egg-info \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..cb5718768 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +models/ldm/text2img-large/ +outputs/ +src/ +__pycache__/ +*.egg-info diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..f28012f08 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,38 @@ +FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu20.04 +MAINTAINER Peter Willemsen +RUN echo "Installing dependencies..." && \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y curl wget sudo git build-essential cmake pkg-config liblzma-dev libbz2-dev zlib1g-dev libssl-dev zsh clang && \ + apt-get dist-upgrade -y && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /src/python +RUN wget https://www.python.org/ftp/python/3.8.5/Python-3.8.5.tgz -O python-src.tar.gz && \ + tar xzvf python-src.tar.gz --strip-components=1 && \ + rm python-src.tar.gz && \ + ./configure --enable-optimizations --prefix=/opt/python-3.8.5 && \ + make && \ + make install && \ + rm -rf /src/python +WORKDIR / +ENV PATH="/opt/python-3.8.5/bin:${PATH}" + +RUN python3 -m pip install pip==20.3 +RUN pip3 install torch==1.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html +RUN pip3 install numpy==1.19.2 torchvision==0.11.2 albumentations==0.4.3 opencv-python==4.1.2.30 pudb==2019.2 imageio==2.9.0 imageio-ffmpeg==0.4.2 pytorch-lightning==1.6.1 omegaconf==2.1.1 test-tube>=0.7.5 streamlit>=0.73.1 einops==0.3.0 torch-fidelity==0.3.0 transformers==4.3.1 -e "git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers" -e "git+https://github.com/openai/CLIP.git@main#egg=clip" + +RUN mkdir -p /opt/ldm_package +ADD ./setup.py /opt/ldm_package +ADD ./ldm /opt/ldm_package/ldm +ADD ./configs /opt/ldm_package/configs +RUN pip3 install -e /opt/ldm_package + +WORKDIR /opt/ldm + +# Add dev user +RUN useradd -ms /bin/zsh ldm-dev && \ + usermod -aG sudo ldm-dev && \ + echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers +USER ldm-dev + +ENTRYPOINT ["python3"] diff --git a/README.md b/README.md index cd6558eb8..fe9057d09 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,20 @@ As a rule of thumb, higher values of `scale` produce better samples at the cost Furthermore, increasing `ddim_steps` generally also gives higher quality samples, but returns are diminishing for values > 250. Fast sampling (i.e. low values of `ddim_steps`) while retaining good quality can be achieved by using `--ddim_eta 0.0`. +## Installing on Docker + +- Note: depending on CUDA/GPU version, you might have to change the first line in te Dockerfile or change the torch version being installed. +- Build the image: `docker build . --tag latent-diffusion` +- For text-to-image, download the pre-trained weights (5.7GB): + ``` + mkdir -p models/ldm/text2img-large/ + wget -O models/ldm/text2img-large/model.ckpt https://ommer-lab.com/files/latent-diffusion/nitro/txt2img-f8-large/model.ckpt + ``` +- Sample with (Make sure to call in the directory of this repo): + ``` + docker run --name=tmp-diffusion --rm --gpus all -it -v "$(pwd):/opt/ldm" latent-diffusion /opt/ldm/scripts/txt2img.py --prompt "A large blue whale on a freight ship, vector art" --ddim_eta 0.0 --n_samples 4 --n_iter 4 --scale 5.0 --ddim_steps 50 + ``` + #### Beyond 256² For certain inputs, simply running the model in a convolutional fashion on larger features than it was trained on