diff --git a/README.md b/README.md index eac298af575..7fbe9bca28a 100644 --- a/README.md +++ b/README.md @@ -140,7 +140,7 @@ Our comprehensive user guides are available at: ## Reference implementations -The [AI-Hypercomputer/tpu-recipies](https://github.com/AI-Hypercomputer/tpu-recipes) +The [AI-Hypercomputer/tpu-recipes](https://github.com/AI-Hypercomputer/tpu-recipes) repo. contains examples for training and serving many LLM and diffusion models. ## Available docker images and wheels @@ -195,6 +195,25 @@ pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/to The torch wheel version `2.6.0.dev20240925+cpu` can be found at https://download.pytorch.org/whl/nightly/torch/. +#### Use nightly build with C++11 ABI after 10/28/2024 + +By default, `torch` is built with pre-C++11 version of ABI (see https://github.com/pytorch/pytorch/issues/51039). +`torch_xla` follows that and ships pre-C++11 builds by default. However, the lazy +tensor tracing performance can be improved by building the code with C++11 ABI. +As a result, we provide C++11 ABI builds for interested users to try, especially +if you find your model performance bottlenecked in Python lazy tensor tracing. + +You can also add `.cxx11` after `yyyymmdd` to get the C++11 ABI variant of a +specific nightly wheel. Here is an example to install nightly builds from +10/28/2024: + +``` +pip3 install torch==2.6.0.dev20241028+cpu.cxx11.abi --index-url https://download.pytorch.org/whl/nightly +pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.6.0.dev20241028.cxx11-cp310-cp310-linux_x86_64.whl +``` + +The torch wheel version `2.6.0.dev20241028+cpu.cxx11.abi` can be found at https://download.pytorch.org/whl/nightly/torch/. +
older versions @@ -240,6 +259,7 @@ The torch wheel version `2.6.0.dev20240925+cpu` can be found at https://download | 2.2 | `us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.2.0_3.10_tpuvm` | | 2.1 | `us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.1.0_3.10_tpuvm` | | nightly python | `us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.10_tpuvm` | +| nightly python (C++11 ABI) | `us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.10_tpuvm_cxx11` | To use the above dockers, please pass `--privileged --net host --shm-size=16G` along. Here is an example: ```bash diff --git a/infra/ansible/roles/build_srcs/tasks/main.yaml b/infra/ansible/roles/build_srcs/tasks/main.yaml index bfc5cdf4d58..f3117cabecc 100644 --- a/infra/ansible/roles/build_srcs/tasks/main.yaml +++ b/infra/ansible/roles/build_srcs/tasks/main.yaml @@ -65,16 +65,40 @@ state: directory mode: '0755' +- name: Delete any existing /dist directory + ansible.builtin.file: + path: /dist + state: absent + mode: '0755' + - name: Create /dist directory for exported wheels ansible.builtin.file: path: /dist state: directory mode: '0755' -- name: Rename and append +YYYYMMDD suffix to nightly wheels +- name: Copy wheels to staging ansible.builtin.shell: | pushd /tmp/staging-wheels cp {{ item.dir }}/*.whl . + popd + args: + executable: /bin/bash + chdir: "{{ item.dir }}" + loop: + - { dir: "{{ (src_root, 'pytorch/dist') | path_join }}", prefix: "torch" } + - { dir: "{{ (src_root, 'pytorch/xla/dist') | path_join }}", prefix: "torch_xla" } + +- name: Rename and append +YYYYMMDD suffix to nightly wheels + ansible.builtin.shell: | + # For both torch and torch_xla, we would like to have one wheel without + # date, and another copy of the same wheel with a date in the file name. + # This script normalizes wheel names to: + # + # torch_xla-2.5.0.dev-cp310-cp310-linux_x86_64.whl + # torch_xla-2.5.0.dev20240819-cp310-cp310-linux_x86_64.whl (extra copy) + # + # and so on. # rename file name like torch_xla-2.5.0+gitac7fd44-cp310-cp310-linux_x86_64.whl into # torch_xla-2.5.0.dev-cp310-cp310-linux_x86_64.whl @@ -85,20 +109,28 @@ # group 4 (can be empty): group 3 without +: `gitac7fd441`; # group 5: anything from next - to the end: `cp310-cp310-linux_x86_64.whl`. rename -v "s/^(.*?)\-(.*?)(\+([^ -]+))?\-(.+)/\1-\2.dev-\5/" *.whl - - mv /tmp/staging-wheels/* /dist/ - popd - - # rename to append the date in YYYYMMDD format - # E.g., rename file name like torch_xla-2.5.0+gitac7fd44-cp310-cp310-linux_x86_64.whl into - # torch_xla-2.5.0.dev20240819-cp310-cp310-linux_x86_64.whl - rename -v "s/^(.*?)\-(.*?)(\+([^ -]+))?\-(.+)/\1-\2.dev$(date -u +%Y%m%d)-\5/" *.whl + + # At this point, we have *.dev*.whl files. Now we want to produce the dated + # YYYYMMDD versions as a second copy without losing the .dev one. + current_date="$(date -u +%Y%m%d)" + wheels=( *.whl ) + + for f in *.whl; do + if [[ "$f" == *".dev-"* ]]; then + # Replace the first occurrence of `.dev-` with `.dev-`. + # For example: + # torch-2.6.0.dev-cp310-cp310-linux_x86_64.whl + # becomes + # torch-2.6.0.dev20241206-cp310-cp310-linux_x86_64.whl + newf="${f/.dev-/.dev${current_date}-}" + + # Copy the file to the new filename. + cp "$f" "$newf" + fi + done args: executable: /bin/bash - chdir: "{{ item.dir }}" - loop: - - { dir: "{{ (src_root, 'pytorch/dist') | path_join }}", prefix: "torch" } - - { dir: "{{ (src_root, 'pytorch/xla/dist') | path_join }}", prefix: "torch_xla" } + chdir: "/tmp/staging-wheels" when: nightly_release - name: Add cxx11 suffix to wheels built with C++11 ABI @@ -121,19 +153,13 @@ rename -v "s/^(.+?)(-cp\d+)/\1.cxx11\2/" *.whl args: executable: /bin/bash - chdir: "{{ item.dir }}" - loop: - - { dir: "{{ (src_root, 'pytorch/dist') | path_join }}", prefix: "torch" } - - { dir: "{{ (src_root, 'pytorch/xla/dist') | path_join }}", prefix: "torch_xla" } + chdir: "/tmp/staging-wheels" when: cxx11_abi | int > 0 - name: Copy wheels to /dist - ansible.builtin.shell: "cp {{ item }}/*.whl /dist" + ansible.builtin.shell: "mv /tmp/staging-wheels/* /dist/" args: executable: /bin/bash - loop: - - "{{ (src_root, 'pytorch/dist') | path_join }}" - - "{{ (src_root, 'pytorch/xla/dist') | path_join }}" - name: Delete temp directory ansible.builtin.file: