From 75a3e41b40b16dc3e69b879510b028cef6bad934 Mon Sep 17 00:00:00 2001 From: torchxlabot2 Date: Thu, 14 Mar 2024 23:55:02 +0000 Subject: [PATCH] Update doc from commit 370089a984b346ba0c3bfb54ca9a7cc6eed26b3b --- master/_modules/index.html | 2 +- master/_modules/torch_xla/core/functions.html | 2 +- master/_modules/torch_xla/core/xla_model.html | 2 +- .../distributed/parallel_loader.html | 2 +- .../distributed/xla_multiprocessing.html | 2 +- .../torch_xla/utils/serialization.html | 2 +- master/_modules/torch_xla/utils/utils.html | 2 +- master/genindex.html | 2 +- master/index.html | 45 ++++++++++++++++-- master/notes/source_of_recompilation.html | 2 +- master/objects.inv | Bin 718 -> 718 bytes master/py-modindex.html | 2 +- master/search.html | 2 +- master/searchindex.js | 2 +- 14 files changed, 52 insertions(+), 17 deletions(-) diff --git a/master/_modules/index.html b/master/_modules/index.html index c7bbead1d82..5c596c23770 100644 --- a/master/_modules/index.html +++ b/master/_modules/index.html @@ -225,7 +225,7 @@
- master (2.3.0+gitfe3f23c ) + master (2.3.0+git370089a )
diff --git a/master/_modules/torch_xla/core/functions.html b/master/_modules/torch_xla/core/functions.html index 3793cec633e..1648dabb466 100644 --- a/master/_modules/torch_xla/core/functions.html +++ b/master/_modules/torch_xla/core/functions.html @@ -225,7 +225,7 @@
- master (2.3.0+gitfe3f23c ) + master (2.3.0+git370089a )
diff --git a/master/_modules/torch_xla/core/xla_model.html b/master/_modules/torch_xla/core/xla_model.html index 41f6983f473..9f6a118b918 100644 --- a/master/_modules/torch_xla/core/xla_model.html +++ b/master/_modules/torch_xla/core/xla_model.html @@ -225,7 +225,7 @@
- master (2.3.0+gitfe3f23c ) + master (2.3.0+git370089a )
diff --git a/master/_modules/torch_xla/distributed/parallel_loader.html b/master/_modules/torch_xla/distributed/parallel_loader.html index 1e29374079b..3c8d07f047a 100644 --- a/master/_modules/torch_xla/distributed/parallel_loader.html +++ b/master/_modules/torch_xla/distributed/parallel_loader.html @@ -225,7 +225,7 @@
- master (2.3.0+gitfe3f23c ) + master (2.3.0+git370089a )
diff --git a/master/_modules/torch_xla/distributed/xla_multiprocessing.html b/master/_modules/torch_xla/distributed/xla_multiprocessing.html index 9c9c4aa84dd..5c4ef217e92 100644 --- a/master/_modules/torch_xla/distributed/xla_multiprocessing.html +++ b/master/_modules/torch_xla/distributed/xla_multiprocessing.html @@ -225,7 +225,7 @@
- master (2.3.0+gitfe3f23c ) + master (2.3.0+git370089a )
diff --git a/master/_modules/torch_xla/utils/serialization.html b/master/_modules/torch_xla/utils/serialization.html index 69c6e66e1ce..cc978c14e80 100644 --- a/master/_modules/torch_xla/utils/serialization.html +++ b/master/_modules/torch_xla/utils/serialization.html @@ -225,7 +225,7 @@
- master (2.3.0+gitfe3f23c ) + master (2.3.0+git370089a )
diff --git a/master/_modules/torch_xla/utils/utils.html b/master/_modules/torch_xla/utils/utils.html index 0199e368177..b7125523c80 100644 --- a/master/_modules/torch_xla/utils/utils.html +++ b/master/_modules/torch_xla/utils/utils.html @@ -225,7 +225,7 @@
- master (2.3.0+gitfe3f23c ) + master (2.3.0+git370089a )
diff --git a/master/genindex.html b/master/genindex.html index ec04733ba96..0bdfd8c12e7 100644 --- a/master/genindex.html +++ b/master/genindex.html @@ -226,7 +226,7 @@
- master (2.3.0+gitfe3f23c ) + master (2.3.0+git370089a )
diff --git a/master/index.html b/master/index.html index 05510c30a9c..5001b9609d4 100644 --- a/master/index.html +++ b/master/index.html @@ -225,7 +225,7 @@
- master (2.3.0+gitfe3f23c ) + master (2.3.0+git370089a )
@@ -426,9 +426,10 @@
  • Here, mesh is a 2x2 mesh with axes ‘x’ and ‘y’
  • A tensor’s sharding can be visualized using the visualize_tensor_sharding method
  • +
  • Currently, model should be loaded to xla device via distribute_module.
  • Fully Sharded Data Parallel via SPMD
  • @@ -3710,6 +3711,39 @@

    A tensor’s sharding can be visualized using the alt_text

    You could use these examples on TPU/GPU/CPU single-host and modify it to run on multi-host. And you could modify it to sharding-style tiled, partial_replication and replicated.

    +

    We are introducing a new PyTorch/XLA SPMD feature, called auto-sharding, RFC. This is an experimental feature in r2.3 and nightly, that supports XLA:TPU and a single TPUVM host.

    +

    PyTorch/XLA auto-sharding can be enabled by one of the following:

    +
      +
    • Setting envvar XLA_SPMD_AUTO=1

    • +
    • Calling the SPMD API in the beginning of your code: +.. code-block:: python

      +
      +

      import torch_xla.runtime as xr +xr.use_spmd(auto=True)

      +
      +
    • +
    • Calling pytorch.distributed._tensor.distribute_module with auto-policy and xla: +```python +import torch_xla.runtime as xr +from torch.distributed._tensor import DeviceMesh, distribute_module +from torch_xla.distributed.spmd import auto_policy

    • +
    +

    device_count = xr.global_runtime_device_count() +device_mesh = DeviceMesh(“xla”, list(range(device_count)))

    + +
    +

    Currently, model should be loaded to xla device via distribute_module.

    +

    model = MyModule() # nn.module +sharded_model = distribute_module(model, device_mesh, auto_policy) +```

    +

    Optionally, one can set the following options/env-vars to control the behvaior of +the XLA-based auto-sharding pass:

    +
      +
    • XLA_AUTO_USE_GROUP_SHARDING: group resharding of the parameters. Set by default.

    • +
    • XLA_AUTO_SPMD_MESH: logical mesh shape to be used for auto-sharding. For example, +XLA_AUTO_SPMD_MESH=2,2 corresponds to a 2-by-2 mesh with 4 global devices. If unset, +a default device mesh shape of num_devices,1 will be used.

    • +

    Fully Sharded Data Parallel via SPMD

    @@ -3759,8 +3793,8 @@

    Sharding output -

    Gradient checkpointing

    +
    +

    Gradient checkpointing

    Currently, gradient checkpointing needs to be applied to the module before the FSDP wrapper. Otherwise, recursively loop into children modules will end up with infinite loop. We will fix this issue in the future releases.

    Example usage:

    from torch_xla.distributed.fsdp import checkpoint_module
    @@ -3980,9 +4014,10 @@ 

    HuggingFace Llama 2 ExampleHere, mesh is a 2x2 mesh with axes ‘x’ and ‘y’
  • A tensor’s sharding can be visualized using the visualize_tensor_sharding method
  • +
  • Currently, model should be loaded to xla device via distribute_module.
  • Fully Sharded Data Parallel via SPMD
  • diff --git a/master/notes/source_of_recompilation.html b/master/notes/source_of_recompilation.html index e4c1b622c64..a1389351dd4 100644 --- a/master/notes/source_of_recompilation.html +++ b/master/notes/source_of_recompilation.html @@ -225,7 +225,7 @@
    - master (2.3.0+gitfe3f23c ) + master (2.3.0+git370089a )
    diff --git a/master/objects.inv b/master/objects.inv index 4f0f7994315a8a1657b9fed6a8985363b14eaefe..5a37dc1af05e59f458826b62385bec70339c3999 100644 GIT binary patch delta 18 ZcmX@ddX9BM0K2idfq{i(;>PfDCIC5A1*-r6 delta 18 ZcmX@ddX9BM0DD@hahj2F^2YFTCICL!20H)% diff --git a/master/py-modindex.html b/master/py-modindex.html index abd8b1a3db5..71d6eb277ff 100644 --- a/master/py-modindex.html +++ b/master/py-modindex.html @@ -228,7 +228,7 @@
    - master (2.3.0+gitfe3f23c ) + master (2.3.0+git370089a )
    diff --git a/master/search.html b/master/search.html index f9e2082e4ab..579bdd39482 100644 --- a/master/search.html +++ b/master/search.html @@ -225,7 +225,7 @@
    - master (2.3.0+gitfe3f23c ) + master (2.3.0+git370089a )
    diff --git a/master/searchindex.js b/master/searchindex.js index a203b055d58..f9a2bcb6be5 100644 --- a/master/searchindex.js +++ b/master/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["index","notes/source_of_recompilation"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.index":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.intersphinx":1,"sphinx.ext.todo":2,"sphinx.ext.viewcode":1,sphinx:56},filenames:["index.rst","notes/source_of_recompilation.md"],objects:{"torch_xla.core":{functions:[0,0,0,"-"],xla_model:[0,0,0,"-"]},"torch_xla.core.functions":{all_gather:[0,1,1,""],all_reduce:[0,1,1,""],nms:[0,1,1,""]},"torch_xla.core.xla_model":{add_step_closure:[0,1,1,""],all_gather:[0,1,1,""],all_reduce:[0,1,1,""],all_to_all:[0,1,1,""],do_on_ordinals:[0,1,1,""],get_local_ordinal:[0,1,1,""],get_memory_info:[0,1,1,""],get_ordinal:[0,1,1,""],get_rng_state:[0,1,1,""],get_stablehlo:[0,1,1,""],get_stablehlo_bytecode:[0,1,1,""],get_xla_supported_devices:[0,1,1,""],is_master_ordinal:[0,1,1,""],mesh_reduce:[0,1,1,""],optimizer_step:[0,1,1,""],rendezvous:[0,1,1,""],save:[0,1,1,""],set_rng_state:[0,1,1,""],wait_device_ops:[0,1,1,""],xla_device:[0,1,1,""],xla_device_hw:[0,1,1,""],xrt_world_size:[0,1,1,""]},"torch_xla.distributed":{parallel_loader:[0,0,0,"-"],xla_multiprocessing:[0,0,0,"-"]},"torch_xla.distributed.parallel_loader":{ParallelLoader:[0,2,1,""]},"torch_xla.distributed.parallel_loader.ParallelLoader":{per_device_loader:[0,3,1,""]},"torch_xla.distributed.xla_multiprocessing":{MpModelWrapper:[0,2,1,""],MpSerialExecutor:[0,2,1,""],spawn:[0,1,1,""]},"torch_xla.distributed.xla_multiprocessing.MpModelWrapper":{to:[0,3,1,""]},"torch_xla.distributed.xla_multiprocessing.MpSerialExecutor":{run:[0,3,1,""]},"torch_xla.utils":{serialization:[0,0,0,"-"],utils:[0,0,0,"-"]},"torch_xla.utils.serialization":{load:[0,1,1,""],save:[0,1,1,""]},"torch_xla.utils.utils":{DataWrapper:[0,2,1,""],SampleGenerator:[0,2,1,""]}},objnames:{"0":["py","module","Python module"],"1":["py","function","Python function"],"2":["py","class","Python class"],"3":["py","method","Python method"]},objtypes:{"0":"py:module","1":"py:function","2":"py:class","3":"py:method"},terms:{"001ms110":0,"001ms228":0,"001ms32":0,"001ms339":0,"001ms434":0,"001ms61":0,"001ms79":0,"001u":0,"002ms921":0,"062u":0,"063u":0,"06m09s401ms746":0,"0mib":0,"0th":0,"100m":0,"16384mib":0,"16g":0,"173u":0,"183u":0,"20ab2c7a2d06":0,"21s102ms853":0,"236u":0,"283u":0,"300w":0,"305u":0,"36c":0,"38w":0,"4e8e5511555073ce8b6d1a436bf808c9333dcac6":0,"537d4b0264b029688281412214d252e9":0,"773u":0,"778ms572":0,"778u":0,"8_cuda_12":0,"90th":0,"973u":0,"abstract":0,"boolean":0,"break":[0,1],"byte":0,"case":0,"class":0,"default":0,"export":0,"final":0,"float":[0,1],"function":0,"import":0,"int":[0,1],"long":[0,1],"new":1,"public":0,"return":[0,1],"short":[0,1],"static":1,"super":0,"switch":[0,1],"throw":0,"true":[0,1],"try":[0,1],"var":0,"while":[0,1],Adding:0,And:[0,1],Bus:0,But:[0,1],Doing:0,For:[0,1],GCS:0,IDs:0,One:0,That:1,The:1,Then:0,There:[0,1],These:0,Useful:0,Using:0,VMs:0,With:1,YES:1,Yes:1,__call__:0,__init__:0,__main__:0,__name__:0,__torch_dispatch__:0,__version__:0,_after:0,_all_gath:0,_ctc_loss:0,_ctc_loss_backward:0,_get_xla_tensor_dimension_s:1,_get_xla_tensors_hlo:0,_get_xla_tensors_text:[0,1],_input_sharding_:0,_intern:0,_local_scalar_dens:0,_mp_fn:0,_rank:0,_the:0,_thread_fn:0,_worker:0,_xla_ir:0,_xla_save_tensor:0,_xlac:[0,1],_your_copied_token__:0,a_xla:0,abil:[0,1],abl:[0,1],about:[0,1],abov:[0,1],abs:0,acceler:0,accelerator_typ:0,accept:0,access:[0,1],accessor:1,accommod:0,account:0,accumul:0,accur:0,accuraci:0,achiev:0,acquir:0,across:0,act:0,activ:0,actual:[0,1],adam:0,add:[0,1],add_:0,add_step_closur:0,added:0,adding:[0,1],addit:0,addition:0,address:0,adjust:0,adopt:1,advanc:0,advantag:0,advis:0,after:[0,1],again:0,against:0,agent:0,agre:0,ahead:1,aim:[0,1],alexnet:0,algorithm:0,alia:0,all:[0,1],all_gath:0,all_gather_object:0,all_reduc:0,all_reduce_gradi:0,all_step:0,all_to_al:0,allow:0,alltoal:0,along:0,alpha:0,alreadi:[0,1],also:[0,1],alt_text:0,alter:0,altern:[0,1],although:[0,1],alwai:[0,1],among:0,amort:0,amount:[0,1],analyz:0,ani:[0,1],annoi:1,annot:0,anoth:[0,1],anti:1,aot:0,api:1,appear:0,append:0,appli:0,applic:0,appreci:0,approach:[0,1],appropri:0,apt:0,architectur:0,area:0,arg:0,argument:0,arithmet:0,around:[0,1],arrai:0,arrang:0,arxiv:0,ask:[0,1],assert:0,asset:0,assign:0,assum:[0,1],assumpt:1,assur:1,astronaut:0,async:0,asynchron:0,aten:[0,1],attach:0,attempt:0,attr:0,auth:0,authent:0,auto_wrap_polici:0,auto_wrapper_cal:0,autocheckpoint:0,autograd:0,automag:1,automat:1,autowrap:0,avail:[0,1],averag:0,avoid:0,awk:0,axi:0,axis_nam:0,b8b484515a97f74e013dcf38125c44d53a41f011:0,b_xla:0,back:0,backend:[0,1],backward:0,bad:0,bandwidth:1,bar:0,barrier:0,base:[0,1],bash:0,bashrc:0,basi:0,basic:1,batch:0,batch_siz:0,batchdim:0,batches_per_execut:0,bdhirsh:1,becaus:0,becom:[0,1],been:[0,1],befor:[0,1],begin:0,beginn:1,behav:0,behavior:0,being:0,below:[0,1],bench:0,bert_pytorch:0,best:0,best_step:0,better:[0,1],between:[0,1],bf16:0,bifloat16:0,big:[0,1],big_tensor:0,biggest:0,bin:0,binari:0,bit:0,bla:1,blabla:1,blaze:1,blob:0,block:0,blocker:1,blowup:1,bool:0,both:[0,1],bottleneck:0,box:0,branch:[0,1],breakdown:0,brian:1,bridg:0,brief:0,bring:0,broadcast:0,broadcast_master_param:0,broken:0,bucket:[0,1],bug:0,busi:0,bwd:0,bypass:0,bytecod:0,cachedsynctensor:0,call:[0,1],callabl:0,callback:0,caller:0,cannot:[0,1],cap:0,capac:0,captur:0,capture_profil:0,card:0,care:[0,1],carri:[0,1],categori:0,caus:[0,1],caution:0,center:0,central1:0,central2:0,certain:[0,1],certif:0,chang:1,chart:0,cheap:1,checkout:0,checkpoint_dir:0,checkpoint_modul:0,children:0,chip:0,chkpt_mgr:0,chkpt_on_preempt:0,choic:1,choos:0,ckpt:0,ckpt_path:0,ckpt_prefix:0,ckpt_suffix:0,clean:0,cleanup:0,clear_al:0,cli:0,client:0,clip_coef:0,clip_grad:0,clip_grad_norm_:0,closer:0,closur:0,cloud:0,cluster:0,code:1,colab:0,collect:0,com:0,combin:1,combo:1,come:[0,1],command:0,comment:[0,1],commit:0,common:1,commonli:1,commun:0,compar:0,comparison:0,compat:0,compil:1,compiled_cod:0,compiletim:0,complet:0,complex:0,comput:[0,1],concat:0,concat_dimens:0,concept:0,conclud:0,concret:1,concurr:0,condit:1,config:0,configur:0,confirm:0,connect:0,consid:0,consist:0,consol:0,consolid:0,consolidate_sharded_ckpt:0,consolidate_sharded_model_checkpoint:0,constant:0,constantli:0,constrain:0,constraint:0,construct:0,consum:[0,1],contain:[0,1],content:0,context:[0,1],contigu:0,continu:0,control:0,conv2d:0,converg:0,convers:[0,1],coordin:0,copi:0,core:0,correct:1,correctli:0,correspond:0,corrupt:0,cost:0,could:[0,1],count:0,counter:0,coupl:0,cover:0,coverag:1,cp38:0,cpu:1,crash:0,creation:0,cross:0,crossentropyloss:0,cu121:0,cuda11:0,cuda:[0,1],curl:0,current:[0,1],current_node_rank:0,custom:[0,1],cut:[0,1],cv2:0,daemon:0,data:1,data_parallel:0,datadir:0,dataload:0,dataparallel:0,dataset:0,datatyp:0,datawrapp:0,dcn:0,dcn_mesh_shap:0,ddim:0,ddp:0,ddp_model:0,debug:1,dec:0,decid:[0,1],decis:0,dedic:0,def:0,defer:0,defin:0,definit:[0,1],defval:0,degrad:0,degre:0,delai:0,demo_bas:0,demo_fn:0,demonstr:0,denot:0,depend:[0,1],deposit:0,deprec:0,depth:0,deriv:0,describ:0,deseri:0,desir:0,destin:0,destroi:0,destroy_process_group:0,detail:1,detect:0,detial:0,dev:0,devic:1,device_id:0,device_mesh:0,device_prefetch_s:0,devicemesh:0,devirtu:1,devkind:0,diagnos:0,dict:0,dictionari:0,did:0,didn:1,diff:0,differ:1,differenti:0,difficult:0,dim:0,dimens:0,dir:0,dir_nam:0,direct:0,directli:[0,1],disabl:0,discov:0,discrep:1,discuss:0,disk:0,disp:0,dispatch:0,displai:0,dist:0,dist_cp:0,distribute_tensor:0,distributed_checkpoint:0,distributedtensor:0,dk2:0,do_on_ordin:0,doc:[0,1],docker_imag:0,document:0,doe:[0,1],doesn:[0,1],don:[0,1],done:[0,1],dot:0,doubl:[0,1],down:0,download:0,dpmsolvermultistepschedul:0,dramat:0,driver:0,drop:0,drop_last:0,dtype:[0,1],due:0,dump:0,durat:0,dure:0,dynam:0,dynamo_resnet18:0,dynamo_train_model:0,e2e:0,each:[0,1],eager:[0,1],eagerli:[0,1],earli:1,easi:[0,1],easier:[0,1],easili:[0,1],ecc:0,echo:0,effect:0,effici:0,effort:[0,1],either:[0,1],element:[0,1],els:[0,1],empti:0,enabl:0,encount:0,end:0,endpoint:0,enforc:[0,1],engin:0,enhanc:0,enjoi:0,enough:[0,1],ensur:[0,1],entir:0,enumer:0,env:0,epoch:0,epxress:0,equal:0,equival:0,error:0,especi:0,etc:[0,1],eval:0,eval_interv:0,eval_model:0,evalu:[0,1],even:[0,1],evenli:0,event:0,eventu:0,everi:[0,1],everyth:[0,1],exactli:1,examin:0,exampl:1,except:0,exchang:0,excit:0,exec:0,execut:1,executor:0,exist:0,exit:0,expand:0,expect:[0,1],expens:[0,1],experi:0,experiment:0,explain:[0,1],explan:1,explicit:0,explicitli:[0,1],explod:1,exponenti:1,expos:0,extra:0,extract:0,extrem:0,fact:0,fail:0,fairscal:0,fake_data:0,fakedata:0,fall:0,fals:0,familiar:0,famou:0,fan:0,far:0,fashion:0,fast:1,faster:[0,1],fc1:0,feasibl:1,fed:0,feel:0,fetch:0,few:[0,1],fewer:1,field:0,figur:0,file:0,file_or_path:0,filesystem:0,filesystemread:0,filesystemwrit:0,final_ckpt:0,find:0,finish:0,first:0,fit:0,five:0,fix:0,flag:0,float16:0,float32:0,floattensor:0,flow:0,focu:[0,1],follow:[0,1],followup:1,footprint:0,forc:[0,1],fork:0,form:[0,1],format:0,forth:0,forward:0,found:0,four:0,four_d:0,fp32:0,frame:0,framework:[0,1],free:0,frequenc:0,frequent:0,frontend:1,frozenclipembedd:0,fsdpv2:0,fsdpvitmodel:0,fsspec:0,full:0,fullyshardeddataparallel:0,fuse:0,fusion:0,futur:[0,1],fwd:0,gain:0,gather:0,gcloud:0,gcp:0,gcr:0,gener:[0,1],generated_t:0,geomean:0,get:1,get_dimention_s:1,get_local_ordin:0,get_logical_mesh:0,get_memory_info:0,get_ordin:0,get_rng_stat:0,get_shard_metadata:0,get_stablehlo:0,get_stablehlo_bytecod:0,get_world_s:0,get_xla_supported_devic:0,git:0,github:0,give:0,given:[0,1],global:0,global_mast:0,global_runtime_device_count:0,global_tensor:0,globalr:0,gloo:0,gnupg:0,gold:1,gone:1,good:[0,1],googl:0,googleapi:0,gpgkei:0,gpt:0,gpu_num_devic:0,grad:0,gradient_as_bucket_view:0,graident_as_bucket_view:0,graph:[0,1],graphviz:0,great:1,grpc:0,gspmd:0,guidelin:0,half:0,hand:0,handl:[0,1],hang:0,happen:[0,1],hard:[0,1],hardcod:0,hardwar:0,has:[0,1],hash:0,have:[0,1],haven:1,heavili:0,hei:1,help:[0,1],henc:0,here:1,hide:0,higher:0,highest:0,highli:0,highlight:0,hint:0,hirsh:1,hit:1,hlo:0,hlomodul:0,hoc:1,hold:0,hook:0,hope:1,hors:0,host_to_device_transfer_thread:0,hour:0,how:1,howev:0,howto:0,html:0,http:0,huge:[0,1],hurt:1,hybridmesh:0,ici_mesh_shap:0,ideal:[0,1],ident:0,identifi:0,idl:0,ignor:0,illustr:0,imag:[0,1],immedi:0,impact:[0,1],implement:[0,1],implicitli:1,importantli:0,impos:0,impract:1,improv:0,inbound:0,includ:[0,1],inde:1,independ:0,index:0,indic:[0,1],indirect:0,individu:0,inference_tpu_:0,inference_tpu_multidevic:0,inference_tpu_single_devic:0,infinit:0,info:[0,1],inform:[0,1],infrastructur:0,init:0,init_method:0,init_process_group:0,initi:0,initialize_cach:0,inner:0,inplac:0,input:0,input_mesh:0,input_shard:0,input_tensor:0,insert:0,insid:0,inspect:0,inspir:0,instead:[0,1],instruct:0,int32:0,integ:0,intend:0,interact:0,interchang:0,interconnect:0,interest:0,interfac:0,interleav:0,intermedi:0,intermediari:0,intern:[0,1],internal_ip_address:0,interpret:[0,1],intersect:0,introduc:0,invest:0,investig:0,invis:0,invisible_watermark:0,invok:0,involv:[0,1],iou:0,iou_threshold:0,irvalu:1,is_master_ordin:0,is_spmd:0,isinst:0,isn:1,issu:0,item:0,iter:0,its:0,itself:0,jax:0,jit:0,job:0,join:0,just:[0,1],kb_free:0,kb_total:0,keep:[0,1],kei:0,kept:1,kernel:1,kind:0,kinda:1,know:[0,1],kwarg:0,l12:0,l214:0,l_in:0,l_out:0,label:0,lambda:0,larg:[0,1],larger:0,largest:0,last:0,latenc:0,later:0,latter:0,launch:0,launcher:0,layer:0,lazi:1,lazili:[0,1],lazytensor:0,ld_library_path:0,lead:0,learn:0,least:0,leav:0,left:0,leftov:0,legaci:0,len:0,length:1,less:[0,1],let:0,leverag:0,lib64:0,lib:0,libgl1:0,libopenbla:0,librari:0,libtpu:0,licens:0,like:[0,1],likewis:0,limit:0,line:[0,1],linear:0,link:0,linux_x86_64:0,list:0,live:[0,1],load_dataset:0,load_state_dict:0,loader:0,loader_prefetch_s:0,local:0,local_shard:0,localhost:0,localservic:0,locat:0,log:0,logdir:0,logic:[0,1],login:0,logit:0,longer:1,look:0,loop:[0,1],lora:0,loss:0,loss_fn:0,lot:[0,1],low:0,lower:[0,1],lr_scheduler_divide_every_n_epoch:0,lr_scheduler_divisor:0,lsy323:0,m1_shard:0,machin:0,machine_0:0,machine_0_internal_ip_address:0,machine_0_ip_address:0,machine_1:0,made:[0,1],magic:[0,1],mai:[0,1],main:0,mainli:1,make:[0,1],manag:0,mani:[0,1],manipul:0,manual:0,manual_se:0,manylinux_2_28_x86_64:0,map:0,mark:0,mark_shard:0,mark_step:0,mask:[0,1],master:0,master_addr:0,master_onli:0,master_port:0,match:0,materi:[0,1],materiz:1,matrix:0,matter:1,matur:0,max:[0,1],max_devic:0,max_norm:0,maxim:0,maximum:0,mayb:0,maybe_download_and_load:0,mean:[0,1],measur:0,mechan:0,median:0,memori:1,mention:[0,1],merg:[0,1],mesh_reduc:0,mesh_shap:0,messag:0,met:0,metadata:0,metrics_report:0,middl:[0,1],mig:0,might:[0,1],migrat:0,min:0,mind:0,minim:0,minimum:0,minu:0,miss:0,mnasnet1_0:0,mobilenet_v2:0,mode:[0,1],model:1,model_id:0,modif:0,modifi:0,modul:0,moment:0,momentum:0,more:1,most:0,mostli:0,mount:0,move:1,mp_device_load:0,mpdeviceload:0,mpmodelwrapp:0,mpserialexecutor:0,mseloss:0,much:[0,1],mul:1,mul_:0,multinode_train:0,multipl:1,multipli:0,multiprocess:0,multislic:0,must:0,my_dtensor:0,my_funct:0,my_modul:0,mynetwork:0,nad:0,name:[0,1],named_paramet:0,namespac:0,nativ:0,nccl:0,necessari:0,need:[0,1],nest:0,net1:0,net2:0,net:0,network:0,neural:0,neuron:0,new_group:0,new_rank:0,newer:0,next:[0,1],nice:0,nightli:0,nightly_3:0,nit:1,nllloss:0,nms:0,nnc:1,nnode:0,no_grad:0,node:1,node_rank:0,non:[0,1],none:0,nonzero:[0,1],norm:0,norm_typ:0,normal:[0,1],note:[0,1],notebook:0,notic:0,now:[0,1],nproc:0,nproc_per_nod:0,num_devic:0,num_epoch:0,num_gpu_devic:0,num_gpu_machin:0,num_host:0,num_output:1,num_warmup_epoch:0,number_gpu_vm:0,number_local_gpu_devic:0,numpi:0,nvidia:0,object:0,observ:0,obviou:1,occupi:0,occur:0,off:0,offend:0,offer:0,offici:0,often:[0,1],okai:1,old:0,omit:0,onc:[0,1],one:[0,1],ones:[0,1],ongo:0,onli:[0,1],onto:0,opaqu:0,opcod:0,open:0,openxla:0,openxla_ev:0,oper:0,operation_semant:0,opportun:1,ops:[0,1],opt:0,optim:1,optimizaiton:0,optimizer_arg:0,optimizer_step:0,optimz:0,option:0,order:0,ordereddict:0,ordin:0,org:0,origin:0,other:[0,1],otherwis:[0,1],ouput:0,our:[0,1],out:0,outer:0,output_s:0,outsid:0,over:0,overal:0,overhead:0,overlap:0,overload:1,overrid:0,own:0,packag:0,pad:[0,1],page:0,pair:0,paper:0,parallel_load:0,parallelload:0,param:0,param_norm:0,parent:0,pars:1,part:0,partial:[0,1],partial_repl:0,particip:0,particular:0,partition_spec:0,pass:0,patch:0,path:[0,1],pattern:[0,1],payload:0,peculiar:0,per:0,per_device_load:0,percentil:0,perf:[0,1],period:0,permit:0,persist:0,perspect:0,photograph:0,physic:0,pid:0,piec:0,pin:0,pin_layout:0,pine:0,pip3:0,pip:0,pipe:0,pipe_watermark:0,pipelin:0,pjrt_backend:0,pjrt_computation_cli:0,pjrt_devic:0,pjrt_distribut:0,pkg:0,place:0,placement:0,plai:0,plan:0,planner:0,pleas:0,plu:0,plugin:0,png:0,point:[0,1],polici:0,popular:0,port:0,portion:0,posit:0,possibl:0,post:0,potenti:0,pov:1,practic:[0,1],prebuilt:0,preced:0,precision_scop:0,pred:0,preemption:0,preinstal:0,preload:0,prepar:0,presenc:1,preserv:0,pretti:[0,1],prevent:0,preview:0,previou:[0,1],previous:0,primit:0,print:[0,1],prior:0,privat:0,privileg:0,probabl:1,problem:1,problemat:0,proce:1,proceed:0,produc:[0,1],program:[0,1],progress:0,project:0,project_id:0,promis:0,prompt:0,proof:0,propag:0,proper:0,properli:1,properti:[0,1],propos:1,prototyp:0,provid:[0,1],provis:0,pt_xla_debug:0,pth:0,publish:0,pull:0,put:0,pwr:0,python3:0,python:[0,1],pytorch:1,pytorch_test_bas:0,pytorch_test_with_slow:0,pywt:0,qualifi:0,queri:0,question:1,queu:0,queue:0,queuedresourc:0,quick:0,rais:0,ran:0,rand:1,randn:0,random:0,rang:0,rank:0,rank_of_current_machin:0,rate:0,rdzv_endpoint:0,reach:0,readabl:0,reader:0,readi:0,readm:0,readonli:0,readthedoc:0,realli:[0,1],reason:0,receiv:0,recogn:[0,1],recommend:0,recompil:0,reconstruct:0,record:0,recreat:0,recurs:0,reduc:0,reduce_and:0,reduce_fn:0,reduce_max:0,reduce_min:0,reduce_mul:0,reduce_or:0,reduce_sum:0,reduce_typ:0,reduct:0,regard:0,regardless:0,region:0,regist:0,regular:0,reimplement:0,relat:0,relationship:0,releas:0,reli:1,reliabl:0,relu:0,remain:[0,1],remateri:0,rememb:0,remov:0,renam:0,replac:0,replic:0,replica:0,repositori:0,repres:[0,1],represent:0,request:[0,1],requir:[0,1],rerun:0,res:0,resent18:0,reshard_after_forward:0,resid:0,residu:0,resnet18:0,resnext50_32x4d:0,resort:0,resourc:0,respons:0,rest:[0,1],restart:0,restor:0,result:0,resum:0,retriev:[0,1],reus:0,review:0,rewrit:[0,1],rfc:0,rich:0,rid:1,ride:0,right:[0,1],rng:0,ronghanghu:0,root:[0,1],roughli:0,round:0,rout:0,rule:0,run:1,run_async:0,run_demo:0,run_vit_train:0,runtime_vers:0,s64:1,safe:0,safetensor:0,sai:1,same:[0,1],sampl:0,sample_count:0,samplegener:0,satisfi:0,save1:0,save_async:0,save_state_dict:0,scalar:[0,1],scale:0,scale_model_input:0,scenario:0,schedul:0,scope:0,score:0,score_threshold:0,scp:0,scroll:0,sdk:0,second:0,section:0,see:[0,1],seed:0,seem:0,seen:0,select:0,self:0,semant:[0,1],send:0,sens:0,sent:0,sentenc:1,separ:0,sequenti:0,serial:0,serial_exec:0,serv:0,server:0,servic:0,set:[0,1],set_rng_stat:0,sgd:0,shallow:0,shape:0,shard_metadata:0,shard_output:0,shardingspec:0,share:0,sharidng:0,sheet:0,shift:0,shm:0,short_metrics_report:0,should:[0,1],show:0,shown:[0,1],side:1,signal:0,signific:0,significantli:0,similar:0,simplelinear:0,simpler:0,simpli:0,simplic:[0,1],sinc:[0,1],singl:1,size:[0,1],size_based_auto_wrap_polici:0,skip:0,slice:0,slightli:0,slow:0,slowdown:0,slower:0,small:[0,1],smaller:[0,1],smart:1,smash:1,smi:0,snippet:0,softwar:0,solut:[0,1],solv:1,somehow:1,someth:0,sometim:[0,1],sort:1,sound:1,spatial:0,spawn:0,special:0,specif:0,specifi:0,speed:[0,1],speedup:0,spend:0,spent:0,split:0,split_count:0,split_dimens:0,spmd_debug_1:0,spmd_fully_sharded_data_parallel:0,spmdfullyshardeddataparallel:0,spmdloadplann:0,spmdpartition:0,spmdsaveplann:0,squeezenet1_1:0,src:0,ssh:0,stabil:0,stabilityai:0,stablehlo:0,stack:[0,1],stai:0,start:0,start_method:0,state:0,state_dict:0,statement:[0,1],statist:0,statu:0,std:0,step:[0,1],steptrac:0,stick:1,still:[0,1],stitch:0,stop:0,storag:0,storage_read:0,storage_writ:0,store:0,stori:0,str:0,straightforward:0,strategi:0,stream:0,streamexecutor:0,strictli:0,stride:0,string:0,strongli:0,structur:0,style:0,sub:0,subclass:0,subgraph:[0,1],subgroup:0,subject:0,submit:0,submodul:0,subnetwork:0,subsequ:0,subset:0,substanti:0,substitut:0,subtl:0,successfulli:0,sudo:0,suggest:0,suit:0,suitabl:0,sum:0,summari:0,supplement:0,support:1,suppos:0,suppress:0,sure:0,sxm2:0,symbol:1,sync:0,synchron:0,synthet:0,sys:0,system:0,systemctl:0,tabl:0,tag:0,take:1,taken:[0,1],talk:0,target:[0,1],task:0,tbh:1,teach:1,team:0,techniqu:0,technolog:0,tee:0,tell:[0,1],temp:0,tempfil:0,tensorboard:0,tensorflow:[0,1],tensors_on_devic:0,term:[0,1],termin:0,tesla:0,test_put_xla_uint8:0,test_set_batch_s:0,test_torch:0,test_train_mp_imagenet:0,test_train_mp_imagenet_fsdp:0,test_train_mp_mnist:0,test_train_mp_mnist_fsdp_with_ckpt:0,test_train_spmd_imagenet:0,text:0,text_to_imag:0,tf_cpp_log_thread_id:0,tf_cpp_min_log_level:0,tf_cpp_vmodul:0,tf_vlog:0,tfrt:0,than:[0,1],thei:[0,1],them:[0,1],theoret:1,therefor:0,thi:[0,1],thing:0,think:1,those:0,though:0,thought:1,thousand:0,thread:0,three:0,throttl:0,through:[0,1],throughput:0,thu:0,thumb:0,tile:0,time:[0,1],timelin:0,timm_vision_transform:0,tmp:0,todo:0,togeth:0,token:0,too:[0,1],toolkit:0,top:0,topolog:0,torch:1,torch_test_devic:0,torch_warn:1,torch_xla:0,torchbench:0,torchfx:0,torchrun:0,torchscript:1,torchvis:0,tot:0,total:[0,1],total_norm:0,totalsampl:0,touch:0,toymodel:0,tpu_legaci:0,tpu_nam:0,tpu_process_bound:0,tpu_visible_chip:0,tpusubnet:0,tpuvm:0,trace:[0,1],track:0,tracked_step:0,trade:[0,1],tradit:0,train_imagenet:0,train_load:0,train_mnist_xla:0,train_model:0,train_model_main:0,training_or_inference_script_using_spmd:0,transfer:0,transferfromdevic:0,transferfromdevicetim:0,transform:0,transformer_auto_wrap_polici:0,transformer_layer_cl:0,transit:0,translat:0,transport:0,transpos:1,treat:0,tri:0,trick:1,trigger:[0,1],trivial:1,truth:1,tune:0,tupl:[0,1],turn:0,tutori:0,two:[0,1],two_d_fully_shard:0,two_d_partially_repl:0,txt2img:0,txt:0,tying:0,type:0,typic:0,uglier:1,unavail:0,unawar:1,unblock:0,uncorr:0,under:0,underli:0,understand:1,union:0,uniqu:[0,1],unless:[0,1],unlik:0,unlock:0,unmodifi:0,unnecessari:0,unoptim:0,unpin:0,unreli:0,until:0,unwant:0,upcom:0,updat:[0,1],upload:0,upon:0,upper:1,upstream:0,usag:[0,1],use_color:0,use_cuda:0,use_gradient_checkpoint:0,use_nested_fsdp:0,use_spmd:0,used:[0,1],useful:[0,1],user:1,uses:[0,1],using:1,usr:0,usual:0,v100:0,valid:0,valu:[0,1],valuer:0,vari:[0,1],variabl:1,variant:1,variat:0,veri:[0,1],verifi:0,verison:1,version_id:0,vgg16:0,view:0,visibl:[0,1],vision:0,visualize_shard:0,vit:0,vit_10b_fsdp_exampl:0,vlog:0,volatil:0,wai:[0,1],wait:0,wait_device_op:0,want:[0,1],warn:0,wasn:1,watch:0,watermark:0,weight:0,weight_decai:0,well:[0,1],were:[0,1],when:0,where:[0,1],wherea:0,whether:0,which:[0,1],whl:0,who:0,whole:[0,1],whose:0,why:1,wide:1,win:1,within:0,without:0,won:1,word:1,work:[0,1],workaround:1,worker:0,workload:0,workspac:0,world:[0,1],world_siz:0,wors:1,worth:[0,1],would:[0,1],wrap:0,wrapped_model:0,wrapper:0,write:0,written:0,wrong:1,wrt:0,www:0,xla:1,xla_backend:0,xla_cuda:0,xla_devic:0,xla_device_hw:0,xla_dist:0,xla_downcast_bf16:0,xla_dump_hlo_graph:0,xla_dump_to:0,xla_dynamo_debug:0,xla_flag:0,xla_graph_executor:0,xla_hlo_debug:0,xla_ir_debug:0,xla_metrics_fil:0,xla_multiprocess:0,xla_no_special_scalar:0,xla_optim:0,xla_rendezv:0,xla_resnet18:0,xla_save_hlo_fil:0,xla_save_tensors_fil:0,xla_save_tensors_fmt:0,xla_sharded_tensor:0,xla_sync_wait:0,xla_tensor_z:0,xla_use_bf16:0,xla_use_eager_debug_mod:0,xla_use_f16:0,xla_use_spmd:0,xla_util:0,xlafullyshardeddataparallel:0,xlashard:0,xlatensor:0,xmp:0,xpu:0,xrt_tpu_config:0,xrt_world_siz:0,xser:0,yea:1,yield:0,you:0,your:[0,1],your_cache_path:0,your_script:0,your_sharded_checkpoint_fil:0,your_tpu_nam:0,zero:0,zero_grad:0,zeros_lik:0,zone:0,zoom:0},titles:["PyTorch on XLA Devices","Source of recompilations in torch_xla"],titleterms:{"2x2":0,"case":1,"new":0,The:0,Use:0,With:0,aannot:0,amp:0,analysi:0,api:0,appendix:1,auto:0,automat:0,awai:0,awar:0,axes:0,background:0,basic:0,beginn:0,benchmark:0,benefit:0,bfloat16:0,billion:0,bound:1,build:0,cach:0,calcul:0,can:[0,1],caveat:0,chang:0,check:0,checkpoint:0,checkpointmanag:0,clear:0,clone:0,code:0,combin:0,common:0,compil:0,conclus:1,constraint:1,control:1,convert:0,cpu:0,creat:0,data:0,dataset:1,debug:0,deep:0,design:0,detail:0,develop:0,devic:0,differ:0,diffus:0,dimens:1,disclaim:0,distribut:0,distributeddataparallel:0,dive:0,docker:0,dtensor:0,dynam:1,dynamo:0,environ:0,exampl:0,execut:0,express:0,fact:1,failur:0,fake:0,featur:0,first:1,fix:1,flow:1,from:[0,1],fsdp:0,fulli:0,further:0,gap:0,get:0,gpu:0,gradient:0,group:0,guid:0,here:0,high:0,host:0,how:0,huggingfac:0,hybrid:0,imagenet:0,infer:0,input:1,instal:0,instanc:0,integr:0,its:1,known:0,layout:0,lazi:0,let:1,level:0,lightn:0,llama:0,load:0,memori:0,mesh:0,method:0,metric:0,mix:0,mnist:0,model:0,more:0,motiv:0,move:0,multi:0,multipl:0,multithread:0,node:0,number:0,oper:1,optim:0,output:[0,1],overview:0,parallel:0,paramet:0,partit:0,perform:0,pjrt:0,pod:0,precis:0,process:0,profil:0,pytorch:0,queri:1,quickstart:0,quirk:0,read:0,real:[0,1],recompil:1,refer:0,rendezv:0,repo:0,report:0,reproduc:0,resnet50:0,resnet:0,run:0,runtim:0,saniti:0,save:0,script:0,setup:0,shape:1,shard:0,simpl:0,singl:0,some:[0,1],sourc:[0,1],spec:0,spmd:0,stabl:0,start:1,support:0,take:0,tensor:[0,1],test:0,tool:0,torch:0,torch_xla:1,torchdynamo:0,tpu:0,train:0,troubleshoot:0,understand:0,unit:0,use:[0,1],user:0,using:0,util:0,variabl:0,version:0,via:0,virtual:0,visual:0,visualize_tensor_shard:0,what:[0,1],wheel:0,when:1,without:1,xla:0,xla_model:0,xlashardedtensor:0,xrt:0,you:1}}) \ No newline at end of file +Search.setIndex({docnames:["index","notes/source_of_recompilation"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.index":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.intersphinx":1,"sphinx.ext.todo":2,"sphinx.ext.viewcode":1,sphinx:56},filenames:["index.rst","notes/source_of_recompilation.md"],objects:{"torch_xla.core":{functions:[0,0,0,"-"],xla_model:[0,0,0,"-"]},"torch_xla.core.functions":{all_gather:[0,1,1,""],all_reduce:[0,1,1,""],nms:[0,1,1,""]},"torch_xla.core.xla_model":{add_step_closure:[0,1,1,""],all_gather:[0,1,1,""],all_reduce:[0,1,1,""],all_to_all:[0,1,1,""],do_on_ordinals:[0,1,1,""],get_local_ordinal:[0,1,1,""],get_memory_info:[0,1,1,""],get_ordinal:[0,1,1,""],get_rng_state:[0,1,1,""],get_stablehlo:[0,1,1,""],get_stablehlo_bytecode:[0,1,1,""],get_xla_supported_devices:[0,1,1,""],is_master_ordinal:[0,1,1,""],mesh_reduce:[0,1,1,""],optimizer_step:[0,1,1,""],rendezvous:[0,1,1,""],save:[0,1,1,""],set_rng_state:[0,1,1,""],wait_device_ops:[0,1,1,""],xla_device:[0,1,1,""],xla_device_hw:[0,1,1,""],xrt_world_size:[0,1,1,""]},"torch_xla.distributed":{parallel_loader:[0,0,0,"-"],xla_multiprocessing:[0,0,0,"-"]},"torch_xla.distributed.parallel_loader":{ParallelLoader:[0,2,1,""]},"torch_xla.distributed.parallel_loader.ParallelLoader":{per_device_loader:[0,3,1,""]},"torch_xla.distributed.xla_multiprocessing":{MpModelWrapper:[0,2,1,""],MpSerialExecutor:[0,2,1,""],spawn:[0,1,1,""]},"torch_xla.distributed.xla_multiprocessing.MpModelWrapper":{to:[0,3,1,""]},"torch_xla.distributed.xla_multiprocessing.MpSerialExecutor":{run:[0,3,1,""]},"torch_xla.utils":{serialization:[0,0,0,"-"],utils:[0,0,0,"-"]},"torch_xla.utils.serialization":{load:[0,1,1,""],save:[0,1,1,""]},"torch_xla.utils.utils":{DataWrapper:[0,2,1,""],SampleGenerator:[0,2,1,""]}},objnames:{"0":["py","module","Python module"],"1":["py","function","Python function"],"2":["py","class","Python class"],"3":["py","method","Python method"]},objtypes:{"0":"py:module","1":"py:function","2":"py:class","3":"py:method"},terms:{"001ms110":0,"001ms228":0,"001ms32":0,"001ms339":0,"001ms434":0,"001ms61":0,"001ms79":0,"001u":0,"002ms921":0,"062u":0,"063u":0,"06m09s401ms746":0,"0mib":0,"0th":0,"100m":0,"16384mib":0,"16g":0,"173u":0,"183u":0,"20ab2c7a2d06":0,"21s102ms853":0,"236u":0,"283u":0,"300w":0,"305u":0,"36c":0,"38w":0,"4e8e5511555073ce8b6d1a436bf808c9333dcac6":0,"537d4b0264b029688281412214d252e9":0,"773u":0,"778ms572":0,"778u":0,"8_cuda_12":0,"90th":0,"973u":0,"abstract":0,"boolean":0,"break":[0,1],"byte":0,"case":0,"class":0,"default":0,"export":0,"final":0,"float":[0,1],"function":0,"import":0,"int":[0,1],"long":[0,1],"new":1,"public":0,"return":[0,1],"short":[0,1],"static":1,"super":0,"switch":[0,1],"throw":0,"true":[0,1],"try":[0,1],"var":0,"while":[0,1],Adding:0,And:[0,1],Bus:0,But:[0,1],Doing:0,For:[0,1],GCS:0,IDs:0,One:0,That:1,The:1,Then:0,There:[0,1],These:0,Useful:0,Using:0,VMs:0,With:1,YES:1,Yes:1,__call__:0,__init__:0,__main__:0,__name__:0,__torch_dispatch__:0,__version__:0,_after:0,_all_gath:0,_ctc_loss:0,_ctc_loss_backward:0,_get_xla_tensor_dimension_s:1,_get_xla_tensors_hlo:0,_get_xla_tensors_text:[0,1],_input_sharding_:0,_intern:0,_local_scalar_dens:0,_mp_fn:0,_rank:0,_tensor:0,_the:0,_thread_fn:0,_worker:0,_xla_ir:0,_xla_save_tensor:0,_xlac:[0,1],_your_copied_token__:0,a_xla:0,abil:[0,1],abl:[0,1],about:[0,1],abov:[0,1],abs:0,acceler:0,accelerator_typ:0,accept:0,access:[0,1],accessor:1,accommod:0,account:0,accumul:0,accur:0,accuraci:0,achiev:0,acquir:0,across:0,act:0,activ:0,actual:[0,1],adam:0,add:[0,1],add_:0,add_step_closur:0,added:0,adding:[0,1],addit:0,addition:0,address:0,adjust:0,adopt:1,advanc:0,advantag:0,advis:0,after:[0,1],again:0,against:0,agent:0,agre:0,ahead:1,aim:[0,1],alexnet:0,algorithm:0,alia:0,all:[0,1],all_gath:0,all_gather_object:0,all_reduc:0,all_reduce_gradi:0,all_step:0,all_to_al:0,allow:0,alltoal:0,along:0,alpha:0,alreadi:[0,1],also:[0,1],alt_text:0,alter:0,altern:[0,1],although:[0,1],alwai:[0,1],among:0,amort:0,amount:[0,1],analyz:0,ani:[0,1],annoi:1,annot:0,anoth:[0,1],anti:1,aot:0,api:1,appear:0,append:0,appli:0,applic:0,appreci:0,approach:[0,1],appropri:0,apt:0,architectur:0,area:0,arg:0,argument:0,arithmet:0,around:[0,1],arrai:0,arrang:0,arxiv:0,ask:[0,1],assert:0,asset:0,assign:0,assum:[0,1],assumpt:1,assur:1,astronaut:0,async:0,asynchron:0,aten:[0,1],attach:0,attempt:0,attr:0,auth:0,authent:0,auto_polici:0,auto_wrap_polici:0,auto_wrapper_cal:0,autocheckpoint:0,autograd:0,automag:1,automat:1,autowrap:0,avail:[0,1],averag:0,avoid:0,awk:0,axi:0,axis_nam:0,b8b484515a97f74e013dcf38125c44d53a41f011:0,b_xla:0,back:0,backend:[0,1],backward:0,bad:0,bandwidth:1,bar:0,barrier:0,base:[0,1],bash:0,bashrc:0,basi:0,basic:1,batch:0,batch_siz:0,batchdim:0,batches_per_execut:0,bdhirsh:1,becaus:0,becom:[0,1],been:[0,1],befor:[0,1],begin:0,beginn:1,behav:0,behavior:0,behvaior:0,being:0,below:[0,1],bench:0,bert_pytorch:0,best:0,best_step:0,better:[0,1],between:[0,1],bf16:0,bifloat16:0,big:[0,1],big_tensor:0,biggest:0,bin:0,binari:0,bit:0,bla:1,blabla:1,blaze:1,blob:0,block:0,blocker:1,blowup:1,bool:0,both:[0,1],bottleneck:0,box:0,branch:[0,1],breakdown:0,brian:1,bridg:0,brief:0,bring:0,broadcast:0,broadcast_master_param:0,broken:0,bucket:[0,1],bug:0,busi:0,bwd:0,bypass:0,bytecod:0,cachedsynctensor:0,call:[0,1],callabl:0,callback:0,caller:0,cannot:[0,1],cap:0,capac:0,captur:0,capture_profil:0,card:0,care:[0,1],carri:[0,1],categori:0,caus:[0,1],caution:0,center:0,central1:0,central2:0,certain:[0,1],certif:0,chang:1,chart:0,cheap:1,checkout:0,checkpoint_dir:0,checkpoint_modul:0,children:0,chip:0,chkpt_mgr:0,chkpt_on_preempt:0,choic:1,choos:0,ckpt:0,ckpt_path:0,ckpt_prefix:0,ckpt_suffix:0,clean:0,cleanup:0,clear_al:0,cli:0,client:0,clip_coef:0,clip_grad:0,clip_grad_norm_:0,closer:0,closur:0,cloud:0,cluster:0,code:1,colab:0,collect:0,com:0,combin:1,combo:1,come:[0,1],command:0,comment:[0,1],commit:0,common:1,commonli:1,commun:0,compar:0,comparison:0,compat:0,compil:1,compiled_cod:0,compiletim:0,complet:0,complex:0,comput:[0,1],concat:0,concat_dimens:0,concept:0,conclud:0,concret:1,concurr:0,condit:1,config:0,configur:0,confirm:0,connect:0,consid:0,consist:0,consol:0,consolid:0,consolidate_sharded_ckpt:0,consolidate_sharded_model_checkpoint:0,constant:0,constantli:0,constrain:0,constraint:0,construct:0,consum:[0,1],contain:[0,1],content:0,context:[0,1],contigu:0,continu:0,control:0,conv2d:0,converg:0,convers:[0,1],coordin:0,copi:0,core:0,correct:1,correctli:0,correspond:0,corrupt:0,cost:0,could:[0,1],count:0,counter:0,coupl:0,cover:0,coverag:1,cp38:0,cpu:1,crash:0,creation:0,cross:0,crossentropyloss:0,cu121:0,cuda11:0,cuda:[0,1],curl:0,current:1,current_node_rank:0,custom:[0,1],cut:[0,1],cv2:0,daemon:0,data:1,data_parallel:0,datadir:0,dataload:0,dataparallel:0,dataset:0,datatyp:0,datawrapp:0,dcn:0,dcn_mesh_shap:0,ddim:0,ddp:0,ddp_model:0,debug:1,dec:0,decid:[0,1],decis:0,dedic:0,def:0,defer:0,defin:0,definit:[0,1],defval:0,degrad:0,degre:0,delai:0,demo_bas:0,demo_fn:0,demonstr:0,denot:0,depend:[0,1],deposit:0,deprec:0,depth:0,deriv:0,describ:0,deseri:0,desir:0,destin:0,destroi:0,destroy_process_group:0,detail:1,detect:0,detial:0,dev:0,devic:1,device_count:0,device_id:0,device_mesh:0,device_prefetch_s:0,devicemesh:0,devirtu:1,devkind:0,diagnos:0,dict:0,dictionari:0,did:0,didn:1,diff:0,differ:1,differenti:0,difficult:0,dim:0,dimens:0,dir:0,dir_nam:0,direct:0,directli:[0,1],disabl:0,discov:0,discrep:1,discuss:0,disk:0,disp:0,dispatch:0,displai:0,dist:0,dist_cp:0,distribute_tensor:0,distributed_checkpoint:0,distributedtensor:0,dk2:0,do_on_ordin:0,doc:[0,1],docker_imag:0,document:0,doe:[0,1],doesn:[0,1],don:[0,1],done:[0,1],dot:0,doubl:[0,1],down:0,download:0,dpmsolvermultistepschedul:0,dramat:0,driver:0,drop:0,drop_last:0,dtype:[0,1],due:0,dump:0,durat:0,dure:0,dynam:0,dynamo_resnet18:0,dynamo_train_model:0,e2e:0,each:[0,1],eager:[0,1],eagerli:[0,1],earli:1,easi:[0,1],easier:[0,1],easili:[0,1],ecc:0,echo:0,effect:0,effici:0,effort:[0,1],either:[0,1],element:[0,1],els:[0,1],empti:0,enabl:0,encount:0,end:0,endpoint:0,enforc:[0,1],engin:0,enhanc:0,enjoi:0,enough:[0,1],ensur:[0,1],entir:0,enumer:0,env:0,envvar:0,epoch:0,epxress:0,equal:0,equival:0,error:0,especi:0,etc:[0,1],eval:0,eval_interv:0,eval_model:0,evalu:[0,1],even:[0,1],evenli:0,event:0,eventu:0,everi:[0,1],everyth:[0,1],exactli:1,examin:0,exampl:1,except:0,exchang:0,excit:0,exec:0,execut:1,executor:0,exist:0,exit:0,expand:0,expect:[0,1],expens:[0,1],experi:0,experiment:0,explain:[0,1],explan:1,explicit:0,explicitli:[0,1],explod:1,exponenti:1,expos:0,extra:0,extract:0,extrem:0,fact:0,fail:0,fairscal:0,fake_data:0,fakedata:0,fall:0,fals:0,familiar:0,famou:0,fan:0,far:0,fashion:0,fast:1,faster:[0,1],fc1:0,feasibl:1,fed:0,feel:0,fetch:0,few:[0,1],fewer:1,field:0,figur:0,file:0,file_or_path:0,filesystem:0,filesystemread:0,filesystemwrit:0,final_ckpt:0,find:0,finish:0,first:0,fit:0,five:0,fix:0,flag:0,float16:0,float32:0,floattensor:0,flow:0,focu:[0,1],follow:[0,1],followup:1,footprint:0,forc:[0,1],fork:0,form:[0,1],format:0,forth:0,forward:0,found:0,four:0,four_d:0,fp32:0,frame:0,framework:[0,1],free:0,frequenc:0,frequent:0,frontend:1,frozenclipembedd:0,fsdpv2:0,fsdpvitmodel:0,fsspec:0,full:0,fullyshardeddataparallel:0,fuse:0,fusion:0,futur:[0,1],fwd:0,gain:0,gather:0,gcloud:0,gcp:0,gcr:0,gener:[0,1],generated_t:0,geomean:0,get:1,get_dimention_s:1,get_local_ordin:0,get_logical_mesh:0,get_memory_info:0,get_ordin:0,get_rng_stat:0,get_shard_metadata:0,get_stablehlo:0,get_stablehlo_bytecod:0,get_world_s:0,get_xla_supported_devic:0,git:0,github:0,give:0,given:[0,1],global:0,global_mast:0,global_runtime_device_count:0,global_tensor:0,globalr:0,gloo:0,gnupg:0,gold:1,gone:1,good:[0,1],googl:0,googleapi:0,gpgkei:0,gpt:0,gpu_num_devic:0,grad:0,gradient_as_bucket_view:0,graident_as_bucket_view:0,graph:[0,1],graphviz:0,great:1,grpc:0,gspmd:0,guidelin:0,half:0,hand:0,handl:[0,1],hang:0,happen:[0,1],hard:[0,1],hardcod:0,hardwar:0,has:[0,1],hash:0,have:[0,1],haven:1,heavili:0,hei:1,help:[0,1],henc:0,here:1,hide:0,higher:0,highest:0,highli:0,highlight:0,hint:0,hirsh:1,hit:1,hlo:0,hlomodul:0,hoc:1,hold:0,hook:0,hope:1,hors:0,host_to_device_transfer_thread:0,hour:0,how:1,howev:0,howto:0,html:0,http:0,huge:[0,1],hurt:1,hybridmesh:0,ici_mesh_shap:0,ideal:[0,1],ident:0,identifi:0,idl:0,ignor:0,illustr:0,imag:[0,1],immedi:0,impact:[0,1],implement:[0,1],implicitli:1,importantli:0,impos:0,impract:1,improv:0,inbound:0,includ:[0,1],inde:1,independ:0,index:0,indic:[0,1],indirect:0,individu:0,inference_tpu_:0,inference_tpu_multidevic:0,inference_tpu_single_devic:0,infinit:0,info:[0,1],inform:[0,1],infrastructur:0,init:0,init_method:0,init_process_group:0,initi:0,initialize_cach:0,inner:0,inplac:0,input:0,input_mesh:0,input_shard:0,input_tensor:0,insert:0,insid:0,inspect:0,inspir:0,instead:[0,1],instruct:0,int32:0,integ:0,intend:0,interact:0,interchang:0,interconnect:0,interest:0,interfac:0,interleav:0,intermedi:0,intermediari:0,intern:[0,1],internal_ip_address:0,interpret:[0,1],intersect:0,introduc:0,invest:0,investig:0,invis:0,invisible_watermark:0,invok:0,involv:[0,1],iou:0,iou_threshold:0,irvalu:1,is_master_ordin:0,is_spmd:0,isinst:0,isn:1,issu:0,item:0,iter:0,its:0,itself:0,jax:0,jit:0,job:0,join:0,just:[0,1],kb_free:0,kb_total:0,keep:[0,1],kei:0,kept:1,kernel:1,kind:0,kinda:1,know:[0,1],kwarg:0,l12:0,l214:0,l_in:0,l_out:0,label:0,lambda:0,larg:[0,1],larger:0,largest:0,last:0,latenc:0,later:0,latter:0,launch:0,launcher:0,layer:0,lazi:1,lazili:[0,1],lazytensor:0,ld_library_path:0,lead:0,learn:0,least:0,leav:0,left:0,leftov:0,legaci:0,len:0,length:1,less:[0,1],let:0,leverag:0,lib64:0,lib:0,libgl1:0,libopenbla:0,librari:0,libtpu:0,licens:0,like:[0,1],likewis:0,limit:0,line:[0,1],linear:0,link:0,linux_x86_64:0,list:0,live:[0,1],load_dataset:0,load_state_dict:0,loader:0,loader_prefetch_s:0,local:0,local_shard:0,localhost:0,localservic:0,locat:0,log:0,logdir:0,logic:[0,1],login:0,logit:0,longer:1,look:0,loop:[0,1],lora:0,loss:0,loss_fn:0,lot:[0,1],low:0,lower:[0,1],lr_scheduler_divide_every_n_epoch:0,lr_scheduler_divisor:0,lsy323:0,m1_shard:0,machin:0,machine_0:0,machine_0_internal_ip_address:0,machine_0_ip_address:0,machine_1:0,made:[0,1],magic:[0,1],mai:[0,1],main:0,mainli:1,make:[0,1],manag:0,mani:[0,1],manipul:0,manual:0,manual_se:0,manylinux_2_28_x86_64:0,map:0,mark:0,mark_shard:0,mark_step:0,mask:[0,1],master:0,master_addr:0,master_onli:0,master_port:0,match:0,materi:[0,1],materiz:1,matrix:0,matter:1,matur:0,max:[0,1],max_devic:0,max_norm:0,maxim:0,maximum:0,mayb:0,maybe_download_and_load:0,mean:[0,1],measur:0,mechan:0,median:0,memori:1,mention:[0,1],merg:[0,1],mesh_reduc:0,mesh_shap:0,messag:0,met:0,metadata:0,metrics_report:0,middl:[0,1],mig:0,might:[0,1],migrat:0,min:0,mind:0,minim:0,minimum:0,minu:0,miss:0,mnasnet1_0:0,mobilenet_v2:0,mode:[0,1],model:1,model_id:0,modif:0,modifi:0,modul:0,moment:0,momentum:0,more:1,most:0,mostli:0,mount:0,move:1,mp_device_load:0,mpdeviceload:0,mpmodelwrapp:0,mpserialexecutor:0,mseloss:0,much:[0,1],mul:1,mul_:0,multinode_train:0,multipl:1,multipli:0,multiprocess:0,multislic:0,must:0,my_dtensor:0,my_funct:0,my_modul:0,mymodul:0,mynetwork:0,nad:0,name:[0,1],named_paramet:0,namespac:0,nativ:0,nccl:0,necessari:0,need:[0,1],nest:0,net1:0,net2:0,net:0,network:0,neural:0,neuron:0,new_group:0,new_rank:0,newer:0,next:[0,1],nice:0,nightli:0,nightly_3:0,nit:1,nllloss:0,nms:0,nnc:1,nnode:0,no_grad:0,node:1,node_rank:0,non:[0,1],none:0,nonzero:[0,1],norm:0,norm_typ:0,normal:[0,1],note:[0,1],notebook:0,notic:0,now:[0,1],nproc:0,nproc_per_nod:0,num_devic:0,num_epoch:0,num_gpu_devic:0,num_gpu_machin:0,num_host:0,num_output:1,num_warmup_epoch:0,number_gpu_vm:0,number_local_gpu_devic:0,numpi:0,nvidia:0,object:0,observ:0,obviou:1,occupi:0,occur:0,off:0,offend:0,offer:0,offici:0,often:[0,1],okai:1,old:0,omit:0,onc:[0,1],one:[0,1],ones:[0,1],ongo:0,onli:[0,1],onto:0,opaqu:0,opcod:0,open:0,openxla:0,openxla_ev:0,oper:0,operation_semant:0,opportun:1,ops:[0,1],opt:0,optim:1,optimizaiton:0,optimizer_arg:0,optimizer_step:0,optimz:0,option:0,order:0,ordereddict:0,ordin:0,org:0,origin:0,other:[0,1],otherwis:[0,1],ouput:0,our:[0,1],out:0,outer:0,output_s:0,outsid:0,over:0,overal:0,overhead:0,overlap:0,overload:1,overrid:0,own:0,packag:0,pad:[0,1],page:0,pair:0,paper:0,parallel_load:0,parallelload:0,param:0,param_norm:0,parent:0,pars:1,part:0,partial:[0,1],partial_repl:0,particip:0,particular:0,partition_spec:0,pass:0,patch:0,path:[0,1],pattern:[0,1],payload:0,peculiar:0,per:0,per_device_load:0,percentil:0,perf:[0,1],period:0,permit:0,persist:0,perspect:0,photograph:0,physic:0,pid:0,piec:0,pin:0,pin_layout:0,pine:0,pip3:0,pip:0,pipe:0,pipe_watermark:0,pipelin:0,pjrt_backend:0,pjrt_computation_cli:0,pjrt_devic:0,pjrt_distribut:0,pkg:0,place:0,placement:0,plai:0,plan:0,planner:0,pleas:0,plu:0,plugin:0,png:0,point:[0,1],polici:0,popular:0,port:0,portion:0,posit:0,possibl:0,post:0,potenti:0,pov:1,practic:[0,1],prebuilt:0,preced:0,precision_scop:0,pred:0,preemption:0,preinstal:0,preload:0,prepar:0,presenc:1,preserv:0,pretti:[0,1],prevent:0,preview:0,previou:[0,1],previous:0,primit:0,print:[0,1],prior:0,privat:0,privileg:0,probabl:1,problem:1,problemat:0,proce:1,proceed:0,produc:[0,1],program:[0,1],progress:0,project:0,project_id:0,promis:0,prompt:0,proof:0,propag:0,proper:0,properli:1,properti:[0,1],propos:1,prototyp:0,provid:[0,1],provis:0,pt_xla_debug:0,pth:0,publish:0,pull:0,put:0,pwr:0,python3:0,python:[0,1],pytorch:1,pytorch_test_bas:0,pytorch_test_with_slow:0,pywt:0,qualifi:0,queri:0,question:1,queu:0,queue:0,queuedresourc:0,quick:0,rais:0,ran:0,rand:1,randn:0,random:0,rang:0,rank:0,rank_of_current_machin:0,rate:0,rdzv_endpoint:0,reach:0,readabl:0,reader:0,readi:0,readm:0,readonli:0,readthedoc:0,realli:[0,1],reason:0,receiv:0,recogn:[0,1],recommend:0,recompil:0,reconstruct:0,record:0,recreat:0,recurs:0,reduc:0,reduce_and:0,reduce_fn:0,reduce_max:0,reduce_min:0,reduce_mul:0,reduce_or:0,reduce_sum:0,reduce_typ:0,reduct:0,regard:0,regardless:0,region:0,regist:0,regular:0,reimplement:0,relat:0,relationship:0,releas:0,reli:1,reliabl:0,relu:0,remain:[0,1],remateri:0,rememb:0,remov:0,renam:0,replac:0,replic:0,replica:0,repositori:0,repres:[0,1],represent:0,request:[0,1],requir:[0,1],rerun:0,res:0,resent18:0,reshard:0,reshard_after_forward:0,resid:0,residu:0,resnet18:0,resnext50_32x4d:0,resort:0,resourc:0,respons:0,rest:[0,1],restart:0,restor:0,result:0,resum:0,retriev:[0,1],reus:0,review:0,rewrit:[0,1],rfc:0,rich:0,rid:1,ride:0,right:[0,1],rng:0,ronghanghu:0,root:[0,1],roughli:0,round:0,rout:0,rule:0,run:1,run_async:0,run_demo:0,run_vit_train:0,runtime_vers:0,s64:1,safe:0,safetensor:0,sai:1,same:[0,1],sampl:0,sample_count:0,samplegener:0,satisfi:0,save1:0,save_async:0,save_state_dict:0,scalar:[0,1],scale:0,scale_model_input:0,scenario:0,schedul:0,scope:0,score:0,score_threshold:0,scp:0,scroll:0,sdk:0,second:0,section:0,see:[0,1],seed:0,seem:0,seen:0,select:0,self:0,semant:[0,1],send:0,sens:0,sent:0,sentenc:1,separ:0,sequenti:0,serial:0,serial_exec:0,serv:0,server:0,servic:0,set:[0,1],set_rng_stat:0,sgd:0,shallow:0,shape:0,shard_metadata:0,shard_output:0,sharded_model:0,shardingspec:0,share:0,sharidng:0,sheet:0,shift:0,shm:0,short_metrics_report:0,should:1,show:0,shown:[0,1],side:1,signal:0,signific:0,significantli:0,similar:0,simplelinear:0,simpler:0,simpli:0,simplic:[0,1],sinc:[0,1],singl:1,size:[0,1],size_based_auto_wrap_polici:0,skip:0,slice:0,slightli:0,slow:0,slowdown:0,slower:0,small:[0,1],smaller:[0,1],smart:1,smash:1,smi:0,snippet:0,softwar:0,solut:[0,1],solv:1,somehow:1,someth:0,sometim:[0,1],sort:1,sound:1,spatial:0,spawn:0,special:0,specif:0,specifi:0,speed:[0,1],speedup:0,spend:0,spent:0,split:0,split_count:0,split_dimens:0,spmd_debug_1:0,spmd_fully_sharded_data_parallel:0,spmdfullyshardeddataparallel:0,spmdloadplann:0,spmdpartition:0,spmdsaveplann:0,squeezenet1_1:0,src:0,ssh:0,stabil:0,stabilityai:0,stablehlo:0,stack:[0,1],stai:0,start:0,start_method:0,state:0,state_dict:0,statement:[0,1],statist:0,statu:0,std:0,step:[0,1],steptrac:0,stick:1,still:[0,1],stitch:0,stop:0,storag:0,storage_read:0,storage_writ:0,store:0,stori:0,str:0,straightforward:0,strategi:0,stream:0,streamexecutor:0,strictli:0,stride:0,string:0,strongli:0,structur:0,style:0,sub:0,subclass:0,subgraph:[0,1],subgroup:0,subject:0,submit:0,submodul:0,subnetwork:0,subsequ:0,subset:0,substanti:0,substitut:0,subtl:0,successfulli:0,sudo:0,suggest:0,suit:0,suitabl:0,sum:0,summari:0,supplement:0,support:1,suppos:0,suppress:0,sure:0,sxm2:0,symbol:1,sync:0,synchron:0,synthet:0,sys:0,system:0,systemctl:0,tabl:0,tag:0,take:1,taken:[0,1],talk:0,target:[0,1],task:0,tbh:1,teach:1,team:0,techniqu:0,technolog:0,tee:0,tell:[0,1],temp:0,tempfil:0,tensorboard:0,tensorflow:[0,1],tensors_on_devic:0,term:[0,1],termin:0,tesla:0,test_put_xla_uint8:0,test_set_batch_s:0,test_torch:0,test_train_mp_imagenet:0,test_train_mp_imagenet_fsdp:0,test_train_mp_mnist:0,test_train_mp_mnist_fsdp_with_ckpt:0,test_train_spmd_imagenet:0,text:0,text_to_imag:0,tf_cpp_log_thread_id:0,tf_cpp_min_log_level:0,tf_cpp_vmodul:0,tf_vlog:0,tfrt:0,than:[0,1],thei:[0,1],them:[0,1],theoret:1,therefor:0,thi:[0,1],thing:0,think:1,those:0,though:0,thought:1,thousand:0,thread:0,three:0,throttl:0,through:[0,1],throughput:0,thu:0,thumb:0,tile:0,time:[0,1],timelin:0,timm_vision_transform:0,tmp:0,todo:0,togeth:0,token:0,too:[0,1],toolkit:0,top:0,topolog:0,torch:1,torch_test_devic:0,torch_warn:1,torch_xla:0,torchbench:0,torchfx:0,torchrun:0,torchscript:1,torchvis:0,tot:0,total:[0,1],total_norm:0,totalsampl:0,touch:0,toymodel:0,tpu_legaci:0,tpu_nam:0,tpu_process_bound:0,tpu_visible_chip:0,tpusubnet:0,tpuvm:0,trace:[0,1],track:0,tracked_step:0,trade:[0,1],tradit:0,train_imagenet:0,train_load:0,train_mnist_xla:0,train_model:0,train_model_main:0,training_or_inference_script_using_spmd:0,transfer:0,transferfromdevic:0,transferfromdevicetim:0,transform:0,transformer_auto_wrap_polici:0,transformer_layer_cl:0,transit:0,translat:0,transport:0,transpos:1,treat:0,tri:0,trick:1,trigger:[0,1],trivial:1,truth:1,tune:0,tupl:[0,1],turn:0,tutori:0,two:[0,1],two_d_fully_shard:0,two_d_partially_repl:0,txt2img:0,txt:0,tying:0,type:0,typic:0,uglier:1,unavail:0,unawar:1,unblock:0,uncorr:0,under:0,underli:0,understand:1,union:0,uniqu:[0,1],unless:[0,1],unlik:0,unlock:0,unmodifi:0,unnecessari:0,unoptim:0,unpin:0,unreli:0,unset:0,until:0,unwant:0,upcom:0,updat:[0,1],upload:0,upon:0,upper:1,upstream:0,usag:[0,1],use_color:0,use_cuda:0,use_gradient_checkpoint:0,use_nested_fsdp:0,use_spmd:0,used:[0,1],useful:[0,1],user:1,uses:[0,1],using:1,usr:0,usual:0,v100:0,valid:0,valu:[0,1],valuer:0,vari:[0,1],variabl:1,variant:1,variat:0,veri:[0,1],verifi:0,verison:1,version_id:0,vgg16:0,view:0,visibl:[0,1],vision:0,visualize_shard:0,vit:0,vit_10b_fsdp_exampl:0,vlog:0,volatil:0,wai:[0,1],wait:0,wait_device_op:0,want:[0,1],warn:0,wasn:1,watch:0,watermark:0,weight:0,weight_decai:0,well:[0,1],were:[0,1],when:0,where:[0,1],wherea:0,whether:0,which:[0,1],whl:0,who:0,whole:[0,1],whose:0,why:1,wide:1,win:1,within:0,without:0,won:1,word:1,work:[0,1],workaround:1,worker:0,workload:0,workspac:0,world:[0,1],world_siz:0,wors:1,worth:[0,1],would:[0,1],wrap:0,wrapped_model:0,wrapper:0,write:0,written:0,wrong:1,wrt:0,www:0,xla:1,xla_auto_spmd_mesh:0,xla_auto_use_group_shard:0,xla_backend:0,xla_cuda:0,xla_devic:0,xla_device_hw:0,xla_dist:0,xla_downcast_bf16:0,xla_dump_hlo_graph:0,xla_dump_to:0,xla_dynamo_debug:0,xla_flag:0,xla_graph_executor:0,xla_hlo_debug:0,xla_ir_debug:0,xla_metrics_fil:0,xla_multiprocess:0,xla_no_special_scalar:0,xla_optim:0,xla_rendezv:0,xla_resnet18:0,xla_save_hlo_fil:0,xla_save_tensors_fil:0,xla_save_tensors_fmt:0,xla_sharded_tensor:0,xla_spmd_auto:0,xla_sync_wait:0,xla_tensor_z:0,xla_use_bf16:0,xla_use_eager_debug_mod:0,xla_use_f16:0,xla_use_spmd:0,xla_util:0,xlafullyshardeddataparallel:0,xlashard:0,xlatensor:0,xmp:0,xpu:0,xrt_tpu_config:0,xrt_world_siz:0,xser:0,yea:1,yield:0,you:0,your:[0,1],your_cache_path:0,your_script:0,your_sharded_checkpoint_fil:0,your_tpu_nam:0,zero:0,zero_grad:0,zeros_lik:0,zone:0,zoom:0},titles:["PyTorch on XLA Devices","Source of recompilations in torch_xla"],titleterms:{"2x2":0,"case":1,"new":0,The:0,Use:0,With:0,aannot:0,amp:0,analysi:0,api:0,appendix:1,auto:0,automat:0,awai:0,awar:0,axes:0,background:0,basic:0,beginn:0,benchmark:0,benefit:0,bfloat16:0,billion:0,bound:1,build:0,cach:0,calcul:0,can:[0,1],caveat:0,chang:0,check:0,checkpoint:0,checkpointmanag:0,clear:0,clone:0,code:0,combin:0,common:0,compil:0,conclus:1,constraint:1,control:1,convert:0,cpu:0,creat:0,current:0,data:0,dataset:1,debug:0,deep:0,design:0,detail:0,develop:0,devic:0,differ:0,diffus:0,dimens:1,disclaim:0,distribut:0,distribute_modul:0,distributeddataparallel:0,dive:0,docker:0,dtensor:0,dynam:1,dynamo:0,environ:0,exampl:0,execut:0,express:0,fact:1,failur:0,fake:0,featur:0,first:1,fix:1,flow:1,from:[0,1],fsdp:0,fulli:0,further:0,gap:0,get:0,gpu:0,gradient:0,group:0,guid:0,here:0,high:0,host:0,how:0,huggingfac:0,hybrid:0,imagenet:0,infer:0,input:1,instal:0,instanc:0,integr:0,its:1,known:0,layout:0,lazi:0,let:1,level:0,lightn:0,llama:0,load:0,memori:0,mesh:0,method:0,metric:0,mix:0,mnist:0,model:0,more:0,motiv:0,move:0,multi:0,multipl:0,multithread:0,node:0,number:0,oper:1,optim:0,output:[0,1],overview:0,parallel:0,paramet:0,partit:0,perform:0,pjrt:0,pod:0,precis:0,process:0,profil:0,pytorch:0,queri:1,quickstart:0,quirk:0,read:0,real:[0,1],recompil:1,refer:0,rendezv:0,repo:0,report:0,reproduc:0,resnet50:0,resnet:0,run:0,runtim:0,saniti:0,save:0,script:0,setup:0,shape:1,shard:0,should:0,simpl:0,singl:0,some:[0,1],sourc:[0,1],spec:0,spmd:0,stabl:0,start:1,support:0,take:0,tensor:[0,1],test:0,tool:0,torch:0,torch_xla:1,torchdynamo:0,tpu:0,train:0,troubleshoot:0,understand:0,unit:0,use:[0,1],user:0,using:0,util:0,variabl:0,version:0,via:0,virtual:0,visual:0,visualize_tensor_shard:0,what:[0,1],wheel:0,when:1,without:1,xla:0,xla_model:0,xlashardedtensor:0,xrt:0,you:1}}) \ No newline at end of file