From ffec54b491bd7c1bd3de236707a6e9f5aadcbb51 Mon Sep 17 00:00:00 2001 From: Daniel King <43149077+dakinggg@users.noreply.github.com> Date: Thu, 13 Jun 2024 09:59:23 -0700 Subject: [PATCH 1/2] Add torch 2.3.1 docker images (#1275) --- .github/workflows/docker.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 6ca10fcd47..89aa917809 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -23,6 +23,12 @@ jobs: - name: "2.3.0_cu121_flash2_aws" base_image: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04-aws dep_groups: "[gpu-flash2]" + - name: "2.3.1_cu121" + base_image: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04 + dep_groups: "[gpu]" + - name: "2.3.1_cu121_aws" + base_image: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04-aws + dep_groups: "[gpu]" steps: - name: Maximize Build Space on Worker uses: easimon/maximize-build-space@v4 From c30856f96949a298d307219c4f13e7bd6aeddbab Mon Sep 17 00:00:00 2001 From: Brian <23239305+b-chu@users.noreply.github.com> Date: Thu, 13 Jun 2024 16:24:01 -0400 Subject: [PATCH 2/2] Make expandable segments on by default (#1278) --- llmfoundry/utils/config_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llmfoundry/utils/config_utils.py b/llmfoundry/utils/config_utils.py index 5c1ec9114a..f91ae79404 100644 --- a/llmfoundry/utils/config_utils.py +++ b/llmfoundry/utils/config_utils.py @@ -115,7 +115,7 @@ class TrainConfig: # Cuda allocation configuration max_split_size_mb: Optional[int] = None - expandable_segments: bool = False + expandable_segments: bool = True cuda_load_lazy: bool = False # Distributed training parameters