Skip to content

Commit

Permalink
bump tests
Browse files Browse the repository at this point in the history
  • Loading branch information
j316chuck committed Jan 13, 2024
1 parent 04b43b7 commit ee5ddbe
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 11 deletions.
36 changes: 29 additions & 7 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -200,13 +200,16 @@ ENV PYTORCH_VERSION=${PYTORCH_VERSION}
ENV PYTORCH_NIGHTLY_URL=${PYTORCH_NIGHTLY_URL}
ENV PYTORCH_NIGHTLY_VERSION=${PYTORCH_NIGHTLY_VERSION}

RUN pip${PYTHON_VERSION} uninstall -y torch && \
pip${PYTHON_VERSION} uninstall -y pytorch-triton && \
pip${PYTHON_VERSION} uninstall -y torchvision && \
pip${PYTHON_VERSION} install https://download.pytorch.org/whl/nightly/pytorch_triton-2.2.0%2Be28a256d71-cp310-cp310-linux_x86_64.whl && \
pip${PYTHON_VERSION} install https://download.pytorch.org/whl/nightly/cu121/torch-2.3.0.dev20240110%2Bcu121-cp310-cp310-linux_x86_64.whl && \
pip${PYTHON_VERSION} install https://download.pytorch.org/whl/nightly/cu121/torchvision-0.18.0.dev20240110%2Bcu121-cp310-cp310-linux_x86_64.whl

RUN if [ -z "$PYTORCH_NIGHTLY_URL" ] ; then \
CUDA_VERSION_TAG=$(python${PYTHON_VERSION} -c "print('cu' + ''.join('${CUDA_VERSION}'.split('.')[:2]) if '${CUDA_VERSION}' else 'cpu')") && \
pip${PYTHON_VERSION} install --no-cache-dir --find-links https://download.pytorch.org/whl/torch_stable.html \
torch==${PYTORCH_VERSION}+${CUDA_VERSION_TAG} \
torchvision==${TORCHVISION_VERSION}+${CUDA_VERSION_TAG} ; \
else \
pip${PYTHON_VERSION} install --no-cache-dir --pre --index-url ${PYTORCH_NIGHTLY_URL} \
torch==${PYTORCH_VERSION}.${PYTORCH_NIGHTLY_VERSION} \
torchvision==${TORCHVISION_VERSION}.${PYTORCH_NIGHTLY_VERSION} ; \
fi
#####################################
# Install EFA and AWS-OFI-NCCL plugin
#####################################
Expand Down Expand Up @@ -290,6 +293,25 @@ RUN if [[ -n "$CUDA_VERSION" ]] && [[ -z "${PYTORCH_NIGHTLY_URL}" ]]; then \
RUN if [ -n "$CUDA_VERSION" ] ; then \
pip${PYTHON_VERSION} install --upgrade --no-cache-dir ninja==1.11.1 && \
pip${PYTHON_VERSION} install --upgrade --no-cache-dir --force-reinstall packaging==22.0 && \
git clone [email protected]:mosaicml/llm-foundry-private.git && \
cd llm-foundry-private && \
git checkout evan/no-fwd-hook && \
pip install .[gpu,openai] && \
cd .. && \
git clone https://github.com/dakinggg/composer.git && \
cd composer && \
git checkout 2-3-patch && \
pip install -e .[all] && \
cd .. && \
pip install --no-dependencies "git+ssh://[email protected]/mosaicml/megablocks-private.git@main#egg=megablocks[all]" && \
pip install --no-dependencies git+https://github.com/tgale96/[email protected] && \
pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable && \
pip uninstall -y torch && \
pip uninstall -y pytorch-triton && \
pip uninstall -y torchvision && \
pip install https://download.pytorch.org/whl/nightly/pytorch_triton-2.2.0%2Be28a256d71-cp310-cp310-linux_x86_64.whl && \
pip install https://download.pytorch.org/whl/nightly/cu121/torch-2.3.0.dev20240110%2Bcu121-cp310-cp310-linux_x86_64.whl && \
pip install https://download.pytorch.org/whl/nightly/cu121/torchvision-0.18.0.dev20240110%2Bcu121-cp310-cp310-linux_x86_64.whl && \
git clone --branch v2.4.2 https://github.com/Dao-AILab/flash-attention.git && \
cd flash-attention && \
MAX_JOBS=1 python${PYTHON_VERSION} setup.py install && \
Expand Down
8 changes: 4 additions & 4 deletions docker/build_matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
CUDA_VERSION: 12.1.0
IMAGE_NAME: torch-nightly-2-3-0-20240110-cu121
IMAGE_NAME: torch-nightly-2-2-0-20231213-cu121
MOFED_VERSION: 5.5-1.0.3.2
NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471
brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471
Expand All @@ -20,9 +20,9 @@
brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: https://download.pytorch.org/whl/nightly/cu121
PYTORCH_NIGHTLY_VERSION: dev20240110+cu121
PYTORCH_VERSION: 2.3.0
PYTORCH_NIGHTLY_VERSION: dev20231213+cu121
PYTORCH_VERSION: 2.2.0
TAGS:
- mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.10-ubuntu20.04
- mosaicml/pytorch:2.2.0_cu121-nightly20231213-python3.10-ubuntu20.04
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.18.0

0 comments on commit ee5ddbe

Please sign in to comment.