From f44e1dfc03d9f0817059e6418009b1dc493cfeb4 Mon Sep 17 00:00:00 2001 From: qihqi Date: Wed, 17 Apr 2024 15:31:08 -0700 Subject: [PATCH 01/53] Add doc explaining how it works (#6937) --- experimental/torch_xla2/docs/dispatch.png | Bin 0 -> 150015 bytes experimental/torch_xla2/docs/how_it_works.md | 134 +++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 experimental/torch_xla2/docs/dispatch.png create mode 100644 experimental/torch_xla2/docs/how_it_works.md diff --git a/experimental/torch_xla2/docs/dispatch.png b/experimental/torch_xla2/docs/dispatch.png new file mode 100644 index 0000000000000000000000000000000000000000..fcdd5e9e58a39032593fd3090fe1881d433e9a4f GIT binary patch literal 150015 zcmb5W1yq&W+9<4ugoHGrp6|Q=-Q$k2#!wbBp80eVASWY^b?fe}YuBz}Nj!rqT)T#%cJ11YRWuavjdAzs zAo%CHor1W?wSs=q^=sECuSvk4C^~6xC85@lDownbI~2p9MMB3Bd5%`!`jy~E*ym@6 z{Xu!T93+wNAN}p`2g1%iK8s|bTBZ_sEc*^GMcM1GC9_JM`)4-%P}f~Tt2 zoR2u+;&?G2d0rl*+!TWjiI>z}OL8nYKS=5<8Ioiiskb0xK!VAUBC$67NZZ4fToP0y zl{5?ZulLvkLZl7;qS$pLG$w#-8tA0wZ}68BX$mv>!VRZhRhDm8Ocf-H<}!f`y>Wzf ztA8v8(UuUT3yom@!4Y2V;H;rL{VsuM{=zb7yy3o)vyZluB4u81iKglz?=+lSvPy@- zW4s;m;;E7N0Nc}|CZ#_J{DT$-?*USJ9ReQE0K`cn$r$i$WPPG3;Vh*-w7@lyN3wkT z0wxuNeT_>4IbIr&Cz4+{l=u(SbkbfAV5;8YR#HcXQ!)cI4Z(74MTLLdVM9P67CKt22~)}`bF2X@7+V*^AYL3hsgD}94gq5^?kPK{I)#bsav3vTXc zay|c>D}ef-*;=cA0k1w&ip zUk|{xhoWHH=bER{xLz^TK;Nv?Xkrn0N#la2X79)u0=WP4u@wPJWgbb65M0MK!35xh zEaYVZd4PlC(!!~J5e82!UP5Pp=ugZuf|btOd?OkFUP1`orW_G)@k=~_dHg6Ae1Lgh zfO*BD9JxP%^oFpoi>-VB?vH@L363oQ=Vq~jb+3OludKVj@ zyN?FEEbj2g0$c#Ty76BMIxG>~zT9j74lt(|`ZI(3)@wPYu&^5dRNJo-IcaDFAHV`6 z$AWmeKXg*qU2szNN2MlkmNPh>Hnjfpqaa}$aQl53og7+Nm^e6Kf80E$^bfHTE(I5u zzP|na6l{xwfo(5lK6d*H_kafu*=iM_1POlzCv+(~W#owEI96LU(xixzy8m4{?`DrnUF$1 z3%>yd1Pc|=zzC3eNczmB0$lLFY|1kkaJ#Kh%~ND}>KCw?pvrE96Swm^0G01Mjgma_ zEl8HV_*I?*F8L3MpM?N8hQH?Mg5w3i@ev)`FOo!4d7%R`Sx4}2J0amZ`xJnq z&%1daU|IMl)yR# z`Mm`+;DC6I&Rq})-v732H1yzhK8_h6wsN{)^R4M>vv{;+K(`oD`-mI{q?>d=X1*ll ze*V(~N*4ogn3*UQf#b=+@m>0b+5M0q!BY$T77HxMayU3Z;5dGR=?_`?Y5{&9KXb#S z0c`t+zrWgo2bO8Z<~G5??t>K%N62!5|Kvj%NE9T*v*Q7}Ljbli*b|Qk00vCJV$O%) zv45μ%y2nHIdPP%zU4utMgM3@Y?94zOIJEl&V5$AOn|Mm8h^!2@6E|IHP@ z=@kJqTSQ{q4q)g%w7MTT;%E zpVpfbEWS$u9{ZL6(xOMX!gSoRP0ri7Fe`;+yD;4no%)BB+gi>(3NueZh( zt8#(T_p(P1zQf+!9!wIs^Hj0s7YhhWK}Kq5rm!85(Fd^F;D`UA=D;~ZDAT0!pWxtW zDabf#I2s0jQ6&roTxcq%1T-vCHN*~;TA6`C!c@@RaGo4oSeOQQe@+k+zw8HH{!ogP zsmID~qNyWj!80#05_nC**1BNf86z$_+mL(Z)edQA-cgqbzyjD1Pd=f~4*Z{DLj-iz zozK8r&_HS(mqqKh1js?Uh|BxiD*#FxWQIQc%ux;y)`pIelFd$r$w7Pxgx<~|S1BJ- zcZ??ulPnNphCue%Chv#_fItdy>{}THLebR!p(ufsVv|-9#eD{ZTneUH1NZ}QO{y3; z)eJck@>>ES$AkS`DId?P4mu__SwT=V71Bfi9jl$nx$1F1Xq9)|%eA1rcg4Z-CDly{ z0Kz}M*#9(*fIW0SiM&#U!K1Be9dM7BQj z1TfXrK@yYyTz4_`3HddTQ{tvi050=T<8p1_>4TF?U4;jTy+(m4fWG&8eQ}1gYU7)%P`UHLsJz9Y5u zpK&|r%g|tHys(|J8xZUvvKdiQnLwvc3|qz}EEHTzwA=dn^me|6^8b(dxr1woOAl>{qH#JSkbogb+Y44aP(xPth9J*pDnXWg#oZ2T^pfF6iP2`tmezP^VyS^w<$$VEXqfekGDz(Tuy??1t)i^masPmKfOCa z=v?l{zFrKVck!fnXz*;kX8g9lGgU(7W?`ep`o_nn?-mYFt zJ}9`yWfnE#yp}XtYCIrw!X;vGx^3lYE_AsUTE7Of@Z9gBpPfTAUU_Kfhj78B_Bw&be0}p5jR{$+KUKOVB47oNtxNKHROTDf&fLbs(E$$7ZTc=mf9o z)pvI`6b@)g?9o#!fh+hO1|ZZz&YA+o2K;(xvKxj0L%$b_Pba<2~bPp0Ra-mcX);KDE# zx-QS>u3Y%!(XO|jV@NGV;}`rm7!b0$_o_^Oy+c-D&j;%sHxr4f)=bH-$GHSBF`t*q zwb;p|nrx0FI*R9v#CAqJ&?W>HOAdr_L|C{$>{xIaCezEKMTtsu2wT6g zzIPNJs>gPZ?IbEB-qi0lW5m|S43N7Y4zjox+S_eH9!DJl7k5`XPSCZ&k)!$MsZv_q zm=n*!&&64#F)hcWH7{aA(;z0Np)eJc^LCyFz2%o`?nfg~#xA)tL{&j*TnuncN7&AH zDBpHr0)knqK(ysZh`~>{eA|cBL5$L`<9-zFezDhaFw-dz|5UhFqSg_STsNOMy?45n zGDWQ8rUz6M+vw>--0x7NrS&n>6*xh8kQflF-5FBRaWDV5OfPgl#8PCUicLrUX)+dx z4+$N^c&NuAHUH1UZ)>$>xUe4glNn>zt>Skl7Pd^*>Pba>K1{A>>zUNDxgDo>?Q#;W zMaBzicgz~kMhX|FKbylT*}-K2;m$mX0Clf-s@GyHR}nHl9E98!e2E3F`}q>Q{;}|G zD$Y1r?br)op-#n_^`%qM#qK+y!&Vsm(9q|%*pC?4aiBxi{UBF%<=tC2k-HM)QUz9M z_ZhQtejWvUWk(m-HesBibu89OB+$xt=TPYCK&axY2nMaEzb7hc01zEdC&Q!lZ<(vM z8_v|IrIxkZ+>jpJ=AS#25e!mRV}=qG7_V#MnUIcX9FYTS zcWQciG;p}N87ImN4)CanyCHq>Y#>(5pXq$3wopZ7Yo``bA@p-y_Vsr8n9G$I@7cPn zr~0?u*iHp}?is}!0D3NA4vxCq^GX6wtY($wE>?N48t!zh zIEx={jO1?J_(q^+1WXY~!xf#(dCs{$rt0^*O<~!1^%JrE{Zr_%M|^zTGqF$I0J?p&BYqJPH@3Zn)>ibxti>Frd6{9Wt zEj(uNFhd=N^qlH1dg%F_ptAp*xYP2%T5$KvjRW6;1sYxv6tQN<{h}f-B1n#gqviCf z1ninu0hs&T435(_;)RcN&L_v}uo|pwH+gkP1DI?W*5h4(JEFkfHm2>`$4{~r(NS1F zs-CRcn}!=I5Yemeb4Yz#%881^!!{n)L*_WGcfMVja?-gHqtV~f<4r|J`M7G=2knW@ z)!7*SlWZjez3`uJDK5UC=@S|Hgti71DkQT^B6-r|$P-Vlx900sAGR3AY8ioyq}If7 z(~_~!c^rKVzg5?68yXFLid%{Mrp8=OC%TtUWc)Jr(^KVIydw2BgIDeimJ zEco3MU^fH?|A3n^aj;;@s&NKCV&4Q$t@|V%1hnr>KrSKAI-Gboa|L(~5|1Axc9yi_ zNsR-$hNpJzKyh@xcaL`%c}+bytiE>}SV#c=pUOB_pEGFqDsp|i8i=FUKg=fb!H1o{Kk{`Wwq>K`|9NJlvTgkEApL18v7_DY8pF5jt zqaB%E1J&s2nYi6}bva8XSkc2$JFcXnrIeqO^jP*m$!;qb_`ax%i^O%{v0NCsvQaPK)Csjpd_yH-5Wq-oxCOu_f@c} zKT_6D?z{LZNJq6%&&B~Te8TTzdc19sJ$E~qW-_M=s__hf== zzttvs6mU;SY8Xsdw_r)>^^CL8ER(#RG|ijfXfZH z(=G(hOOgB8b|Dwm?Mw2nr$^Rp!0r~a_kJd)-`CO((AD6@?Qf@`&+Z{vym|VdQy$-N zIOw=!o!@%?OJEj5&>2+}oFrlD@TRvEpN4H0t>9y{`%Yw}PV-(F7d$)UXA!pQO~93? zkHu9}MSh9qKojby?d`E4as}V&+@JdWuD}iUS;8?*yXTn*TJm6xSLGva4z@6i zSsy3D{bU&Dz3Dy6Q=kq7Ld9qGWQ5zl`E&cVgx~bM$}1a^oUJuF{Qk=?cBZP~Rg#cN z%1n0Y&P7EqPLgAv5+3(1dvwE)>D+q-{n^%1JUzPJn#38#?6oEp$>9;Gq1-07uxdE{ zd}47HdBSo+`Tgf2aijl?j#{l8peZ>ByUGW1&isW1Am6^-+`(L%Hk{VEv4c>JSFWQb zUT$#U z0-b43Z?%L!91o%ly>%ecZR|c!7yosIw8FrnPmgS)V?j`)ri4y+_5?U!WBhgBneMOz zzvcEJH-6P~5p_!caWMOx^q*w|2FTY{_Qlwo9+8&eo^!La!oSrL0*o$21%P7eAnnUI zY2#Y5YfgKK>~1`RUaNB=YtszKT1eDrtS0gp)DXJUN}O!FZ^ zrjaQ*aOToq>_yoDUOnM`x|^az+>W@Po5j4Xn+40gB@EVGSeIse(WHMAr50Pi+*4*o z_|?)ghu;jvpqHe^t`$(&sdf{><(+k2BSVM-LgtV;FNYU5(cr4>Zm9xMk|ZZ*O2$=9 zV`5<2#ik|I|B!*NHvMz>p56yXov%*DrD(}~cArm{vJkvKMJY-;-B;s`gB?Z3Reeh3 z99Q7Zj3{;v@~er*F4N zcI~kF9*)r*GNS6>JY{%WO+}@LE3td)F>tf$pURV3>yZV~bK2>xdTj|Jw=~;c`(Ag@ z&#^i495SiAP1Ntlo_`R*3-|47z~UbNlF-Q~mKeW^d^>X_u8w_u8My-E z(_1;w#pPWw133e|k`8#}3*N*tJ^Z+kX8L5OW_%?a-9*u(QDcq>^n!yor0MlGg}zcb0;C9JP&UzrUQUDgd{3Kk|-nk9=<7md;AZJ4pbecSHN zJ#9YJa}p@B#?<(FBlHAi?!cDZYQZ(qpGoMnOLl7`x9~V{90Z%eb6(NGh)2l`s>A10 z3?)(cFEvk@RJG6~;kLGky!zoGUxOAFL=u`2;*QM8&OR!3nfOV>I4RU`cpIh%5R7Xd z@kja-tQscpJK1Lo2u$w$-3t(eCh>F8bQ?}opk~uXSbrq+?wT4s{!TO(=c&P-x{+4z z0Is$whn=p?peXUtXijf_Qm2TTK+0Pm(or~$$Q_Z^CjB_>Zg?tGI2u7)w*zrfAaWqL zX>=#v%#07FJ{}>r50(sdM{yaPh|XPlTC>EQd+Ni*&5*C0{rE@&nA3i77H#D;Fe$k= zy5B-yY&?lIIFXjQDkAxgCt^25p9p7TD6yu9)Dc1Vl^rUw>G9d^=SLg{il^(oh=}Cx zM?=^FrQ~{Lu;0C&!k~||ZY%tS@Ck#7v>Bg7E1ZbBwf{OQDF{s5`ss2QF=DOYvAgJ2 z^d(hDCa4RE)5~`YQ`3{hZO*xZ#v}}~Weaz;=3Vb`HY6;s_?w&hGu?jtL-#(b;`kB77k=;nx}HpE5=X4GZLR|c4Mv%pvU_1 z`gQKlx+0uo4`k3*Cj(l%%y*G&Kh}047LhY-PYzgCT50cL%MV}$GWm1+c-SFwsvNq{ z93bB;3?7pgsy!kL3Sl7F&hcQ81u@3{-~yaTfEz7?kQ4hhjGOW`4qf6Ahq(~^7Updm zk#{$Ji9PzCh+uEv?!;6_@-yJmmRRFNv^eGA@@*0KV|6DcJ21U7*cEO{>=da6ft^j} z5v?}6K*BL)I`Y^B8Z%sVUXJq0n0yOPgw0x_Oc@-MJojW<;~WFxs{EU&>J@b*NM3HoJ^DzDS2J-;%Ljc*b3!L{&gIa{saZ#%^Twfyt{5^3%K*l%}rlNR0Og` z!mEm6C{JROL&i_vbGZIZiXKBv4t9YuCnSHnW$D$?h|0-hSY^PuWv%&JfCt>aa( zyWXFyy#4OD^)OdPdHA5>1bMd7Il@Zr;>_~NpatTUeA>Bvi^FN|=F>ZVb|qm_pG_b+ z4h3>NX`J|KKLRo)fbFW2eFM8Z3&e- zK$p)_7|X*jP8E5b@Eb;F4nTzNii)g8VQYm<}1!0lurPFC4HK}NvUf#l zbQ7Q5U+eBMij#7Y#q<;qSya~yzq!}mwj0`Aj&meHXYVws8!SZU)9($#$h5KUU?G0^ z&UM^}hEz>a?TJ-7NFY1itCx4yeZa04(bF2=bwBcXRc+Ygl^qNh0?N^-$!Qk)1>3v-HDc=ns>4o4j!b+nj5tii+YWI?hvP!FwO!plV*?h&b#eF&q1ES~uT< zp<#6M&P`M$Gd&59eergrp=US}aS3wNQc84NB-&;GBq`PJSe21qwVYygee|lXF3wwVdxLL85ky!<7{=*d9GbDo)Dsk{v)h7up4TxCn` z#~=o8$OL}1Awh03?CY=W;KtlO@U&?t$ezd&H8-PR6`I${?>?lmcZg!=fu$4S;-gji z7L$AaJQ`a&co=}ErYcnWd6`my_pYxi`A5yYIx)E?sx~kTb9ER!a$f*9$PbvCCaVPm z;;VIQH4Z#JcRHA>dY2S&@@9tt59n0UjpyZ}$nnCEI~Kq+_!ez!nt@o2j@n|jqnHy; z#Jz>iu_TjTR2+CB8dba??w(Zqv7=p^G>EZeUPgM+FxY1x(Z9e*=xi`lGLl=w$wr|= z4Om;d@c@Yxpr?|X@?CQCD;{Wd`_XE1_glLfy^Z{~Z$SiGROFy5TV89z(eN+T=)*Dc z!z59t31)jHJ(_e=G)6M{(Ma-(r~%p%`+R!b_2~d^pBuJhRM?S~T_hM>FNIV}T|jE; zHS(tX_UXa^u3=g;w_hOusYrGC=QlzOjQdZ*eW#^YW;+B*<2#$VmW>vMX-@6CNe~4| zuP3{D><9^d?rFlKY-io;8W(Ap4SVOS1NL@olU*8 zO_bc>l;n^fwNzfiRTu%1XLC!-2HA=^2c%7maW_IqZy7Y)rF{brU~+%m%aPYuumiC0h*@4|XOEV$y^&^+FYL8pIv?Cn=2r_!ocgvB&& z3)lNLLN#xRk6kT`izCV|C{(WF&sv{}$vUwBo8c?KK=ZlK!0BFf-!pExQ^&94J_fdT zxi#w~F#YGf*`79kVbI5uBx9F(P)*7rI=wjB4wrai1QwG;0$PrX|7LK7ynT`&3L zKG8G(f)584LAM4}%>BStg~nl_OZJ}T?WOE~3N=WSR!+lUe&>-p#fI}&1yr2A$0 zR)#^eD$pdFqsb5-ev%bZ60NM>g`X$!CU9ZIr*m;%+@|}Xj7Mhb3E7xk3b?Wv;hS}0 zs^F(qtNniqW*k9wBc$&mSzWtYEgs7~gx+W{=`x0TRHw)tmg9Rk9J^w-?;Y^2YWA*m z<&PEWGzP*j4CU!Ibw%8LL)<(`TbuabaTS3M7JV_tD5$XX3F@Yh`t4Z^IrE0;`%I%y zxb1qKT+igkQ5x5Eq*CSOlOAW|9($lxFG>XTQ(7I!PH~0kO{Yu*2jlNB6?BReAlVWP zn^~OIxM4-%5~%%(BIly>01wtL%Qg8<`7#AQ2z@(K;ELGu{b3dREck z;Q@Q8V^aB~2W~0wm`Rt=lWSBc&t3=E;9(E;(-y*em+M#XvP+9#=z>?Q#zzEd&CRGE zvAnC_caM0rZV+ob>8)_x|dITM`t)Q2CLBD{F(f1o>&r@FV}0h3f8Td=&$# zIi!T@JJm!e*rXq_%zTjfx68gso}^Qi$nnyLST_EAZG|5nvN}(f;mTXBvx__*+H>Qn zKkWy-Wyz|IoZOe?p?$BoJ=p0+nvng0#F%%torfHyB5Bfqz?puZT*S$m{IyT>(;ryx z{9e|f%9i13hx#g23y1?c0Pd*kSoFEHBj7E6XDzHv!X!f;1}M-Rbs*w(y_s*T=4A+u zn)ea);}gEM#20v}rX3)?FG+>IS7M)rksRd2rp!U~V`ruv!PhLq3sRLK+QmY^Fl=2a zZ3cocd2?j(SaD2)4m6&$xcD=XKBUQGe)n42d4hMJ+1)!KK4)i z-B2&uAk>RSizJ#*CTXyK2OC5_Vwu+J3%ie=6r$|pMA;_~Y%-T@RnB?LN{|G<*ktkZ zD8Jq(VNDmQI^EX~-kUbua$9ICIz-s}L_!6ad0N#>wNfKzs`hdJ3#cZB_jH=ehk;s= z1TY@pMnpZ2^1=9)5eES^300bnpli9lwuN8;w3*%Ce~M%d;vEI&`K2`nq1-Uq&Ju z7f8n2{CAzrFVLx^u;nnR-X=2X*=f@(uW7ohXJ8EwVgF#OtUnx*st?okiKesXO2g0| zbIJy(fzc%FZbdvbC%RWJFN5bx_S)}F73sBX`4VgI)>XvCQh++DSo%%r{~t1$QegY& zE+7~6yn4gWoay`4FB4-S-9I{Xp-qJ8e11K0{s{0V+G)#YTBPgk5=&Wd2l==BiCcWX z7*xvoM(pz@2I3VOohnc)SU3|nU5=ck8THlEe>=ERv%nGCfn)a_cn~1DOOUnvms@o?16ag{zLO1UueXL?w!j&DI7i)PS!%3NmU4 zM+Lr6>J5YOP#T}^OxSU$l=Fm+(xe%#z0T{@4rf3^WClAFs~^x?ESSwqcnT8rM`j+KJPn^A6;>GAx!CN1`x6PVkmYUW>N-k&@MI?{mJdEwkPjR-5W7&kYtHbyImGB~Mz)zLl%b zBlNZdXz1z)-eCZtQ;-M_A3QOS>dtuZdxdGUK_B<`$C?JW@myJ9T~W4yloBr=!Q0 zp$i8|`Xt=uPPKYC(WG0`O=73a<~ZvJ$~wHlR2)$>&b1_bBI##Lyk-O%_$wh@4|kUC zL|Xd9>8kPtFeOb$7zA7~nzjd%%&v1@hT2GC4H|xVbDa=lf#4qfqNa%bEGnoNG_D5S z(_#%b1R>FGgPm%kwT^tW__MV4cQD`$;tRZEI}J3z6~BRga;LYp&6R0`%wxhZ);W_P zDSQr{H8K^*)+tcf^(UOkw0G!X&MIPRUt27_ZW6K8z4}oL#>C>F1KaXQRD7eoJz_dX(1Vw#3+yK4OgwS?ufhsBVb50C96rtWNonUs3gU75o zoc}zTL-=YK_TR1s4s{ zZ0!HhY!x+~&8Bmpax#dI%ErAnKCWMyn^>&pa5yL)`m;sKGh{{z)R?fSZ1hh^3P{Z?;mmo+kw(AIYQo(i&)v-r`rGQKI*6GjQn zh&%0Y`#gd(g!-Am|TubiC-XtlD?g-5^!JnO{(ET?z7+HmPL!O-y?Oj+^S z^#xxuz)8t(QIihRmgZ>k*R-S?zfw5;_;NG6Z3Q*xR-4}%wa5@CB7aw+@H!29ioG{G z66O0WG!s)oW2W{)3P(kAkZS;wgt%pK0Vtz|USb9@z1^+AR!R!?MwVQ@z0l@ekrgL6 z$*rP(pQ2BRSsw>N%&bm#PRoy&&mg?n*z|D=-5jj~RuS2Lnlv12biR4QM z%4+7hK}-wJnYmjh?i3m|g*X9~)q_1eYWldjWlVyg>^#(-9tnS{faJ`Y3$mN;d{cU`=& z^laJcB34$3UjU1EDH}02EI^l36;OLA5pI|sU|Ii{Z zbSp#?@*{r1we++7iC0F9x>|GyB=7{s*P zO}Sniq&0wI!1lyG!%NzPees~SNHFnog@7#~>Q_Xb(&Jlm|Ach^?mXe%62;ZH)Yo9# zvzvn%M3p*pUKUO-pT5H`2U*rM3zI^?i3uO0o~yDL*CHBfV>LscD{!;N`D=TeuC$|@ zV`@7UhdEN207JzqKd|o7+@yB{75k8zOhQ-4X;-(91&uK=%(3~lkoX-ni{iFNBaDeA zq*lamt~#_`?T@l(_^aIuM%=;^Y*-2Kn0`1-iQ-js*NHJGjIuhj;&^YyXjo?7jc3(- z=uLIvac1foXwc4wc+!lodxWF5n`(Wkvs?GXK7ZudL)kkh#%?@&+~^vhJY^`Iz}@#= z=+X%>v;6_Nhpv?7lL@uKJ9eX^>Q zP4WkvURG<+_2RG}hE4<&@4{nl#=T&w>$+UjU7QlhZ}pm|1p!$Y@b_-)VN#{XQxVvx z3wBTW_DRBF&CGnEF}5o<&57u|n4f@uV*CKKhK5MH&!Qm}HeS{ci(w3q_~84P&_Cu{ zJe41qKf$MJ&brsAg4QB7V}?6{PA1kV(P}*@%Ey>jBvKk5lf72sD3|zQBQD43#sli` zjO$^WyZ-8;G3!PZ>2ZArn{U$Ht;se%!Qy2hk9ZZo&k_qX3s+cmPMwbHhmts}p?i^) zTUuCzK!{?25Vca686>>(t6vBNe=H6D)G|1f^87YCJQD_ytxE@0OYb+MLRY}R*Z$CM=( zDW+=2ybEvLJ!`NSqXYASuPRt&;H!63u&45**XG@2!$86<%N@=n4w=#fb1A^pi1$V5W%(l=`s zb(ZA@~r8>aZL z&F>A7E0)Z#@RYXfdwn)ER|8&G&ufEVKPiKN>k_e`q41wh_%f8=v0hnwl@jRZ%mq0w zgK}dY&?B?=>~R@*y+&e^%`-h6Uq8r?04UC{!`G@0CS8Q|O%Gur_s#|k!Y zh(7e-lXRms=*R0)&gv(~npp=%r3=r~y~bWvk=O4VYr=+0?}0vh<$j73dqu;+#=a^eHnl zLf(&3l2-B;kVW1GiFH=Yw1t*;uCq)~bH5wZULNXUrq1lhQ+lrtb8P*wk=XYl+TJZJ zj?I}e_4Ot-*zZmwrVy49QREVF%KVV$0ZbE{<|EILOQb3D9mC-uYokVfVzZ$f)VbLG zGb8RNX z(v>Sd$GPnF$t*hiq~GO&n#XjC-e?4I!s_=D0A&QshPiRGg#Ny5*t3kgq7|^&uzggV z;EswjDo1U3@yOUW1xhHsyXCK#{=1!?3`lGCjn*`x;+QygiMICBfU5>0Bb^^Y3A#qP zgynl?Di*)f904;&HFC)65zu|upYi4@mx~lWiBhuvzOmcU*S3U){PXkbkGZfOTcCk- z4nFm6rF||Fxt&iBG52Pdm?6;KK(P&n0{wed%%mtG8#hto5!BhV|32l=s*t;QMfP&w zHcx5K%L*5l1eIRY%W}a!3*QeX>m^(Gpx;voCUxAAXvP3WL&8SeKZBX5;~X#mb4ndH zTKfzZ1dq9JzxQaYiV@b6YsAedM_Mi6*vF4EqO;Ka_ZaV8s9*mn+ZuvO$U@56vs!2S zXYbbCE5u1zwXD_Vl{*zjf|1dUmz)STj@qF{>_L=4QDsaZH)e>A7gh+KGZ`f+@| zx-x|MoJY{C6c{H-fRuQtPP{rJ-3OXF$7K4ehox3$T=DuwJs&1CE*lFUmcG>=R)$Go zKI5xT2YSu9WZ9lFXA<_6@;85(%Aszs!{l&~OzVRn#nN@ILX+M3u%&m0tyJffszmEt zZK-4wYs%Q@wf6kGIquS<1MK|yBUaD|K0lMO3XwwC5^#6dtFa6fn`SRuBFq-8GeaT8 z53Y+guVxvAPOge8;&9dT;z2c5jG-}w7)rR)HtNfB5qpW@0O9^NqLs`P@?BK$De}JT zFTsl0*MFMKboyh~M>NLcV|ku=n*hgocdwB+C&u9Nb&gH^JrZ1!&KDcO-*4wR7*5ba ztoT|Y*9hYKuaf{HWv{QqD?Fq)&TFr_?!Ss0`u0>s(8`D!Nyv4$S4?T)hfiGncbygk z2YMS;%-=#sU}Ta*1!fPu%DB0;3~5W!hhBksw_8B$fuP6iKLt|-j~m4F!f7S)UE?Np zjE*^kN}dkagWLvAUwb*|`7wH%WA7P8 z{+#39o%8!OMbCcUsgORHUEweM6Ft4W_v>hhmyn zecFo9bRV4Z9OAde(7Vbl)eU3axtI8!=0@kS&S&o~yHesV*NEdM2C;Ip$)o;CYIY5M zSI~k}y8iX5`Ud3?IAzb@H$<%16xIX?GecM8`+Dtw*=CTEeg@409FnZ9qA%-FQD6WA zE(A;^p_yk+T)6p7jSOZF=4$mS*-`bs5U0MRl27QB{!5UK zgyp-m2P{ZfRKJykpM45B49OS#^jk?ZyGab=;O_p4BHsTRdBN?3CVD2eV*SQ8ZV0WX zU|cCl-F(ennqXEff3h6^Ti@CWk}5glK1=$oZxs-a1ci}riT-U*e9zLd*kQUxfT40; zKnNE`E`f%kxX&{d%4?>3J-!P$*l_Su8)03V8u6Tu9dnkBU1&11Ta6js1bP8#RYt;; zM;D0U}Bz*lhGQiNy8y#Y<>^2DK$;FxMRWRw|FKh&fuf-T=#mmk3i5HjD zxEda~2Yaju6M;tD1C8W1tIHowHcDOsg9>4Vy*)#n3dt1rw^>#f{A>%2X#LGIy;i~M zBi+AvW~D)K>KD)SKs;0JcvrD9v(IAtODQ==DHfhUf&kOiexXi#@8@{6P~?-%A-BJD zb3?}`Ggc(izwG{`VCKGPs_O3x0?6s1NJq=AkL$P@gLS+gpKiQ=se{;DIGYO^a|382 zs>-dI<{aH05V~@U?dg)d4$VG+{2(9Wh`b;`XAZFZX;d%8kp)zgxl1L`<1U(d8;Gmc zj=2jzY~#m#>^b<=CI^zwWI;LAXq?`^33YeS)x{5u;N5SDB-3984XdRfS2KRjVXDw^ zO9Cck(;#jpVGem6}+t-MUK1F~8CS`qmD9Y<9uqFaTWTa*SUEdEzH_JX~waoztb+flp{Mce{5h_R@W( zZ%dTp9udPDWYexr@Qk?Y{fxiISo|d{+#PXdv`A`y2}|CgwZ{g#)vRU$$7X2`;m+)* zSO>&J<%UBO#%f=ZzFouF^tpVc*E0ZgeahB9P2hdUWH2+{$PT&rmR@MW3pn((j02ken22wU!N(+6S729Qikq!HtOVhQ8HLe{;gg`XmiGs zUBe{q$f7v9vZOG-IsrU0bE}@BaW7raW`(D2;GDUpF66QDDg8??%Xv{?WYr}MpdMAp zX3)z_%4J^ZFs3zzuPhe2bMZ5xbLh+U0`UN0GDtwx#W{T$C1FwvHHuhxUYa0`ipbr` zu`<*-s8?P+YrE|3#a?fw9%bXGMDEa70eXbU8fCe@&1*d^{hoDk$S%S95RE zXeLiQlx@0KUGUz}<(a9Uy-kOu|n(jhdW7CBVY_s}LHb<;d^UvR&=U=^K z>>V=zTA1P{z;)n*5^0bPj51yU16g~IoInA91)Y&GkL~J&QsCDAGu(Aoi*wuSbK76D zmshw2+m**Rg2!beJMr!*eP)&Z6{UQx6^x9=2x9u=Dey6Gm*1*i<@mIgb#mAV?;a>9C3 z?g&hd=eN6^J@u7A@dB#@g=S zfq>)Dqg%wv!8Sww%Al3j`^fnep}!sN>0r=5HUBPj97!sF*8jmKi36}MwG0SY&vdn& zRC6~-@@l~v_PZHf`IK=~h`{7uyo(4->iM`m=rRRy2AID?jbFG0tc>+qmKp3T5(K6& zeQp3OjPAGlmCmQWuXe1G(r9%%sWz=IiKB-|`*qZjwzWjU-t@M%zVg)1yXTX4YunAn zr>=fRx?j#p&6dAaJ}+P{B8bZ~a0ae>`{yMvx?n|}=6sZ|WDs{Wf$6%NsAmMmG^2GK z-sy}FZ6fCC{JwW_;Q|jkN+sM7Om@@S7_Rolk$-C=)~Sp7;!(X4lN(^SO9!T+#CUbC zrzA{RTxI_3W$LqVK$gBJd;k2qsnO3()#N})5G0T0u7`Jfc7gQ6+&eQbv}I^CS>Pmj zg9VA?FH4MxnG>wE5F%bs*5&xKHFu=wif`hK<^5N4^Q_DE9}t)y?yj#Fh$oTqEFz?PT zt8eUuz#tOkHo{}XjtnkSUgUO)c*LOgtB)FaP9$HrwiYxN(~eDk!VlvOK+w#{hT z-X(uU_2CN<+7iyj+v*=tfKN~%k}@(0X6MQyzPW=K;RqU^BPUSuLn~wZE9BnQeQ$C` z|1G(~Ce~`YIN-;(Y@?Z)2|J!GD};b+)eWxuadQ}{ENey0BPcw*Z;V@kq6{QeZ);yx{yf>(uKVnW7#ZMx zv~zQF#Isn20Vp9)1CZ3{>Cmm93kfB&W@;4KS_hI^S@eqr_&wAi}WhG=n}>!%?n?>MZxN3<^q3&XuC%7!Q<0@ z{jCu3!u@V-vm|$mAN`GomzT54=UZ`eh~=MT#Ri}7wM)qBQ5Ex^mbad0C;OE0H*e>a z{1oMO8g9xilky#v`Ni7Q0Q)D;b7$0oz=+xfGdDOkyYgqTzfJj3()VSwYWjFsi{I7$k=+V{LjE(4u~q37Rx&t9xgy z>vOZ^3|q6GTi~@f@q^w8sgwVYtoM$Fv+LW2uPD(XYKR`4Xi1dlq6Z10j~a~LdnZIE zdW}K!=#1W@j~=~@8r`VD=)C7#_x*g&x8C;;Ygx=<=G=S#%JDmnW5-m_(l*ZBYzHY- zS|RA}Rb>eQeCV|@3a@Yl&_GiNU+y!YAs*ljv-2v!zhgEiAh^>dw){27#k zircj=*E7I`L*}C{&e4~-uIyFz9!zrmDyGI4t%QH+Z2)eLjo3u%wdfuEXzoV+epB=F z)kuK#kb{$2(V(w@X@Z#hNpnGV`Ndtpywp&cQGM66OiEKtM=b@(5!+J7>Z)5AP%H2e ze95BzH~3xv!U0N8CmdH<@a{}o7}skPA3ZC$OZiyW*KMDCnUu!1->nGLr&0r_3%gkl z*o1r)p8N6$UGx*ClNq3&x1(JRXMHOLrssugo4W_4FKx{R&i7y3b|i}7I7G2As^hmy zY|yT$81d?LKh$VENk}}I;^$>J;z1Zal%?PGK2T8X&6KJ>^|yNuuKYfqYToz6oSFAV zc-h|Ukb@Q4YGb}W3=l*0x`qY#c!Ro=IauXRO$;WUsiee?JTU}l7_Pi}ZRG^Be}!DB z$0I8GGR%?14@ATIB3JY4=18^`d|thNo*Eaej=vvGImH$Q{CBAO$kr`_A1uPtHA}Ut zz))P}vxio&Iat!fcYHLY+bYFcTv+5FM*fts_4~5v-tCmNq+fyQ=AP z85EC7(aRipv0gLRT*3i6Evyx{g?_h>_5R6psh9aw%mO7%1Y`@zK=xo{1`?4eOF3DH zY|7?aiE;09i<^=f`qk0&^Wu@)WB!}tY5D!Ai#cnXhW02j@ zUAYKgsDLn(&hPA#f=>FI0B3jrxFJRfHFq-*056bAiI9EKuF||OK@D? z$bRpei;AU`ue%=ig`Q1+Zqnw<1U`kVGuRFvjn@a>Z~yLnK%|n~zMf5(-_34+Pr@Bg zSAXkfI$|3Zjuj@Gk~tFX%cuMqcezVW9Q5dz-tb4ef!@nvtOR+BS#~jG#`A=NM&4iZmrm@&?|t_ zd0tCaGC^B`VhwrG@vv0*F~fYaNTmUjOy^hl=PvpPK-cp_BLW}`!uq~ZZPWY^T>XMx zCLbHqfWIag02nPSK!jXu!)V;A0GorE%D*oaFutSwToQjFsu@&C(G*z9QOSn2@kRY{ zfPb~{dTlNqhX5GcqV&5d5H-8SY*aF@V{7d8B;XCN$tarR%UzC*7)vj=);p>d8t69t zW`L#&etAh=ZqXsjP?-$q>I^H&#evGSc>zF}%Z+qTDge+wQN?;sVys&GNMdYjBm-fr z=eS#Ed)H07L;Ca+Dw}8k*#yW2{j@&AB-(SfPIAlQ0^rNUYMc0OJK4qLr#$#3n@iGo>SlAwmv@EW?>NjPu^n716PFH(+i;r9#3;NRqjFs$H z5V>kDbM1`pY$(&cfbrx-+WaH)nXH`3?%ag{9rz6rZKa!SvHeBZiFtkO5s`XCH<@|g zKA++XNtmESuJSG0B2Tzbz{}Ggv%Ul*qIo40xg(V$`yxc-k7ZNnZkB~StAh_7@phY* zET@3mbdM}bTl-K(CKAh=GAStGxqai>V-;!;(8{zqX7O~~LN6R_`)NFC4LoFt-`?Dw zjvWM(C|3cj(peN0_U6#T3tk6DnOY13ZZDK(02u*%_p%~E=0^ljg!2)#({iE7zWAN2 zfB*muy}!XjIjSXxA!3Q$Sn86b*mRMJI;>dKH|aj+4d*6<^nmdWVu z4~Uge4Vo6vpaG-(|6hal14_Rhocv4+ru}{xpvMJ=GE9;6iyoeb(PurzW}kawm&Mds z(_YkYM@WEf;Ymup6LxN59+V%_7}yZO8%!i{+r0(Q=ZA2XL$&di8CPD_JY3TfsgKwS zJ@mCmQSEt_LDnRE6eP+>X*ox!9V)<@nCBl;JZpMldY;PHF(!*H+tRw%P%_7kxj7Yc`yX| ztLY$@EQ<$zMf(Z7DkCcC>azSs{}deDuK{rSpNKec-M1Ig2}D;pdYUlf7?Zt@W(=50 zKDS=9*lD&!3D-OP!-+xRYJV`|ffjdO%h1d0DA36FBm`(6{jx5B;@GGKIP2vqbpEAj zC{qe|fZm`xmL+xoaT)zrz>o5gfmiyUbLg2wFt=s{Wnzc_Gi?Qi-!j0!;ybocELk7} z;m_IpQfRzVVhFe4QxD#6#h5IQk~d%$j|RNaUwYn zDS|8EvmH-W*km%U&`)QI!Vqim3j6D%L&;7W8%xP)=C`P9%IfSavgIAa}q& zg>qbJ6hJ*yJ~$zZB<=A{L zJ)N*A-xKmTJmZXe*JbfM8;j+zE`_KyHo{~92M_T{C7CGu9cAJSz^`dxYW$G)OD%s% z5I-;s&jjy*wcer)sNmZowklhHhUxl^s7kwGj|{F#|}+20QaiJky=AM>hX{cP!TM#I&OLzj9&-#vKscg4yae)E>m+6}|V{(=5hUxH-IX8KJs!_@O!>U(gw zQK!B{umn}#uIt0Hvs5<6_jM;xtRvQaN{NP;8&OW^zMXl%Pm6v{c8+i7*65{`^2{jn z*hghAQ&|*zC47*!Jm5LN7-B2rakq$B(Yfv=D&1HT_l3j_6xNtq5SOWY>RH<*GYWoS{g9%vUjkw#!XqxcU|1q?I)4q)Yo{})t2a-9L7 z%A#B>?Lh=bCw|I4Nx5M+(BIjXG3pUbM4;OOG(M2pfX-xfefQ&ful`oLWne%2D9j?$26wK^ z<1mM_2HTZ_gN^j2I=*~Opa$<#HPXZA6@zh748EZo$_78_M%wyx7#T=%a%fvPud5O~ zIbvu{E2(BF%REiNjq*2+4^j0D{%Vs{&}L|-A#w)U1J0U;1?fekBEt~MXo$MR^K|5` z-9oLE0KlXG#JPh6QJlBYHh}ez%-C16%i}eaCkA4M>)StpPI^BPGC%GyaG9ZTL zA3PlkxN;T1>?W04Eg!!@YMs^Iur>M~KT=9@XRK%#T9;y&)&&kx`9`GE3RSG3S>h4X z&rne1ebhVv8VPB2i+f)AXNc$c+5FJE$f_TA;?X(66oStg1D01g!?&`ivjs4I25t#q zA23@Opm8pHYuO&Q12K!Ix>S5nMkoNg47<9_M*=`Y2@7=9ZoezAx;n`1Xn6_HWNW8I zJ)4FyK^=eo-u%5& zf03IYGZEMl9VE2|wpKK|JRdHQ6K_ zxsmu6jPo%;ch{x$?N|P*a$2%X4J&=aayLzyC7KsO0xI-9yqtpw_PzTbueu$`TYVrv zvCrOK^r6ms3UMk|p^ebANLHsp4J(|Ynuh=rw}aZm1E`XZ`74YBJf7CySzd)=bDsew zuPwlpKEE#m9BcZH1{#*VIn==PDsT!5i1Gd|Cltou4wwhbqv0hH3>b8z{A2^dB!2zL9~RS}QI02Ny$L58TW>#Hnkn&9$%H+<)rC6g6;uDaM61 z0;x@q8HwO_Xq9AIaYxxLQ0t0?wCW+4}czYT4-x65;cT|19VekQgB`jKml;E7^^L+ysGfK z9F#5ywMF5!HLCLawm=!Rv#wb8{+gR1sh@vV-?ZhRK7+g@4i4Q><-xS&*17cUYI z8xQ+Q@)LlY{6li1xE>h|Z#Zlg1WKY=?8!}E#L1|Wm(5Et0^`mQWY;7(>{qYkQ%_8x zK*kc$1p+LK{QyqV%eTf;$2tt;ALbhEm5dy(+%f(4AHAqGE~>xVu6_Oj=i^qsPpjea z%IVtF;h~dY>n&+|}R|)17+AaEC&;IM!xIe9JREHAOhd!mT7CGAYrGXDM?&L;A zQ*-(pHWm48iyT}jqU2F6h>8~5uT1;c%%H|6~MIjSe$nQOctr!QtQ0%?_dFopCyb9*0U;D z%>v(Pv6Q30DFw%LzJW|GUdG3bsHocJqwVevu(3lvO+$o%(NGQY(kvfg+i>ph*ullu zlL>^qq6fpaa=+s@aAI>ma$1wTfLn-L+G_^E|KS24maBu4U}~Ahv=#50tJ7;g0;B?B zn0`%O%Mz;&s!#R&WHaBP}Ul`umHGQm8wcoJ( z@R!>}9X&8)gj6}-z8Z5En#To5E7f}=IKug!V}Xu!*Hm5=+cCnI%Ph8I>Qe$zMvL4FQ7 zY9BlG(^yd3Z$8+R*J%}n(O0~Hem3oosWa||?N8Z{a+v3R6ZqScPNFQ>i82Eb0qCR$ zR4Zu`sa8;}9YDT>vYnmZjnd(_s%2HGQ$&$aeqLr zd_o=01fNIsbSnV?Rul2H4gZ<@;nTeWkzs#mNAx567(32NCIU{L9U$aAn2x@wcQjmx>N}ceym%t>(&9Z>+{k z02mDkI8=x{EOpxC&?6Rvu~saWeB8yuXyn1^uKk~oTc#Z+xCs8wPVkK$!ADWLHI);V zZi^m7SStP(Eh}EEO0T879lzH?!~mfl%4KW;z@h-B!iBLVmv_B5913E=la@X;gTo21 zAdg3DbyaN(!~==p;xXliVI_YS5VsJT!}cFAsTYD$L9!HCBvBaRo>%=cPt+XCpM{gXBCOC#OLK~e2RtD!TmGs2u?C|P(Cg%9Mj#)*7!6P?9`?kW?mfruL~eP z-rp5k_c>!jY@XoQY3=QXU<#qxW4X0$z&|pOt4`y_0b`4R+8*D#;n<}sJQGki3iyjs zbS<>5D2t4c*jJ%Ce3Ym`nZ66Q8$<{|8FV5Si!SAcS8j=QhT2g8xdng?BqlYK08$O$ z{%43DSf--ear;+E#Zl6B?ZLkLOW~iAJ(*N`=>>HwmS-T8oP10+1!qYL4>R+;v>SCY zKjl@ViQFjp>?}SzDCJo=y_E^MDbY%8GV(Jf$Z821$(nDEubUgxTloHfP>Tg@^tOht zhYfwcoCY7%&%M;6qWS@~B?@n;2HBJYbYsPe^Fv@nsDSN?7$0h{zaFjc(z4La4%u2! z1mKQfbC-&5r`*m#3XP$Vclay?52&6VppPHlJ6ccAQ`(e$BGwLCd-r|v&z1x~&~Lxe ze=gog)A{hy#iUU2DZ&1cIoX*Nz!q}kY0^Lb+}Vfux0~spfawpKnS~_yYEXsg2m>!T z+AKjp)YXspse(+(swrXrj!VKQ$XFlIJ_cHgw!8Dnfb;v2#edemHvK|jLnz;cBfl}sB0eA)!T0;9 z9QdSyoiBs#NJaJLEIN5Z^f_)H0Go)~PkC|_Las#SfZ2_@2y)KUZplq#rk?trCIGM& zeZ7^Wu6**eXHe$F5r#7l^cCm@Eg~aOfg!QKA6-luGUeR25!d6=Ls;;nc834fGdKAw zZhG61iX2mn2% zqCk7_IYCJT+fmx2MOK|Q$KV7^@A7C!a5PfgL%Ds`vC$mUyh7;1!lIXgFVW}iCs`jh z*fN5H5SQFbPAbnoaYLH2Ph=&CjZ#RIB*}LKE!k~yCECMdhnoKBBaca(_790O?&&=+ z(^m_)M!NIf{Eqc&)r@xjGgZSe3pN18?FRtZ&*jzhA^R{_ErY9O_)XSomx4;GdW`TM z8{-E#W>fV)tP)od9ylbCC^=KO3hgZL@R^hE-6s22On|v=!<{5BsoF8^*XszRaRRT; ze4aRvpU;SG$lgDkDR1$+OS33j$vx;KF7bj6U)zR?dDi2VeXbuwlFzSxavFWwSDsl> ziWMkGm0{o&Ba>3`+os$HDB`eR`jP2Rc=I$SvHpDWytV}2@Zlds&8tAeJk+$NU#-q# zTzS!6Tr%1!zq;@k$ zH2z|QzJRI{XkqhQ*_qqb`|)3DimP{-0JB$ZaxO~4v%9EN#XyASvr*RRJ+`zJCv zWE>I(%8|g@U{aj(=%uQr(&C(HuLeY$M)0q-%@)m@?(^$|-64xR@7JfUB)2cKPu2Au ziPY=ulr;Lyn|bw%iPVAy{wW?y_m7^6;aZOI@kG+wRs8N*z-K*;A_k?+5verX-MWRH zyrfp{ddYjErN~&S6~9?npC1*5sr)Vk@$H7sELCZUV&h$q3`2y`(RP?qQAv;)^k9US z!!Cd^^!K($8U+qb_hS{b2M8kSZE~F2%qMeX!y9sXTmz!Ca5;m}FMo*180rao!kNMk ztm@$SCn1<>5O5Sb{-dD`81cq09DHSiV~f41;!MJ|mo0#JRo6CEJ`0WT?zHatLr4)GikG&*AFT zVVwa}pr6YhE`+891X`|LfeAeG&l`l~ix`+%?Do&-1sn};^7u4lPT1g$0O5U<`cFE) z9w&yH`@0*TBBfb92Qi1Qn}FFYQYOujcyYl-FIohsbAvGxvEVZ_RGOW`3*B~3)nuKC z^l84L5wNZ%^H!@@497;G&H0vw@^1UHN@Kf0VH_m}Zbv^Y8M2vR&##{)j%VGH35UimR!Yj})VXKlOz+dd8O@1>wB>v(-zBUx2-2!w6 zFK%Tr#K})>S_$jPP9R zp8NF^;QN1nYyVXx3m`5aVOOQ|tzHK!NKKE%sT9M9FW)&XE(`MZCq0fWY{%5A#!nK< zePWjZrZmg7Qe8JM}X20_xS9S0jetDP<{qn)Djo>?ogJjc5=X6u1oA93W9@|#XLHf@A1_B z!(X|_0+Z>k(dR1nWvgN3T|WP)kccY3VtL7_*_D$E6je?-Mi-U+HX-USA#8t=#-{Jb z=G(i=;W(VOt0b1#MS@cAiRj3S?z127^`8@9v|0g$d{rfmQvFBxhBvQ7&$DW+;%pmq zDSR}7#By(4wRp4HU&Uk#W9%%!kGs6b-*T?0;4y*oc@(0_W5{SXep#GJgzoh4rja_+ zS&R#THiL-;B}g89QFu*D|D5{dvk7EX;p=hGEBTJb2$rJMT(noWLj-!;ENQ_K-HZ_h z9Hv1yf~=9pznta#mIf+5PX_@OPeu@e%R(d8y`h1;80_@({rO9?XR=@?E_b{g2fzyz zfLfkpyfO|-g^kP~uXQU0p$u8fYRpF>gX;iSQ%|WVrAsomcmc-s*ct|X&`4qdww-N= z0Pt`ZhrBDFt?L~h;$6(u%)XI+tp~pPW|#Rj4KxVQ4R5)<0WhZx;{qb}%@mt6_%Ajf zW&C?a$quE4ZwWc(7$u7>%m@TT{v-?QIiB!9w3*(%Qdv9gx#Xx$S3W~8tL$LG@wsFr z0D6nt1umYUf)A2Sn?vsHeF1+ijL-HwJs2xFdn+P&#q{vyJ6 zWg`L6YC?cKR*nK-x@2ScPwzv%)rlg6g8}=-z|>%F#jYIP##cxJ!JiR!2Q>uVoK5mI z9i4UD@&s#(0t8bC;Q+glSV4iu1Tzc9-8P}(p<;c)kUH<t zs4faP8-`#f{4JU2D6mRQjI-g(5C7;>nzU-zpPm*6cRC5staWjz&wZ8XuY2FDn9LHLOuSuBhhC2&Y+Q)AIPVVR_n#2~`pIT!)eg`dc=G{!#y zI|wVlK8+=(qAgga8{IT9eDJq@F#4+=lfRXBPlTuoEILXspGEz`u_Q{|d zcgmYwTD-u)cH;B>gB>V9?rVf__RydrkG|!%Mz9d3;{GYyq&ZkWvbXWDL)*cIuJx9# z@^$IaxxAhUf65fJ6(y%@>OV6af``Xc909aEJN1Wg@lzT;XDq6wBD^OW(a2{x&pJJb zTu3oec-kG|jVHRED4giaBXIIQ0MBgB0|Wq^it3+4ULR7V_9+qdU(jXI&ac%SYCIITFG-%jSjjcNfOKM)Ke*xfH4C!TyuZ*zVA>bWjvS zmOeFh>gaO@Z2~O(uK;~wbZ3(Pf2mO6@5(WB6~Xiy3GfLM^{j8#iuP@tlT_ho>wel6 zSR$NI4Uq%l4{>2b9tE$AJk<5l>u1kjhrV9iG%92G66X>2`s-Tz7#h=#6)7nwt?CbEH+yjz zXk0es)lsUU_W+50xSO(0j9`7|W2!lz{1h`PQ?rxn%h86|^chq~`2A&Z;Hjx2XzqW^ zO~=ejUeqRQ2QaAy#Y!)9{pIhYBRm$$REpq54zUdlS=V_ZcO`z^{>ev%wqM`A5QB0m zVJx=n0JjqyC;Xlr^W{t^nWuFB26z)X@^JngL?tJoIi<&U%m%fhM6*eQL=* zi}uvNx_Xq72_U=DUc=c3N>y79&(Yr3JCyC*BN!iTtA=fWx_qXL|ej;hWcj3RJ zU`JKVYpNf?IksJ&NODDD)(*a^5>|F5&m+wNt#8QT^C2_LxncG zdb}cV+mH!In4JS8$}2BYJ{vm)285|ZVnm6}Nr-ii<+eIm(+D261MUlGfl<93bVL`_o2JYafBp5L|l)D&3y?5*UDFw(AteJkR0hb+g=!@zQPrK~pf5!}= zQ1)_s*#i<}#f#V)q=>*InBmvmQgVL;L+WgZ{%euT zoEbvlTxBZRX}_FzAK=A-gy7@_3R3gqpH{K{%l1GWdTiySOsHx4y>Gjm4f-MdO)q!r zZ`na*IXA$*1h|~U>f-ZnbMPB?uNLpo)a(TWfswtfnzGNI3*@feLA!4ox&DNJus_rd z(Fd1Hc?%4eUtXjw+jvopADm~t-R8Ktpir7$oTq`;+>se9&n#Yy%>>}6X$?W;Rz>at z?jH7+Fw-=rulr9tzT~t>?;a*;sea(R$2C-aqETiOh0DN<{rt-;8WkoHl?XRE`mS7P z9BIrrD?e%TDvzL>qo{4i!U!yOTgrT#MfuOJ ziU?)v&Nb6@*JpPXJL<-nakx|f>$FwRS%~6Y#Y8%VD`j0YVngOAIxse$SK89=3>3oO zR@Z(@F{)cm$+o+XH*@~eW_FJcJU`$53ys14#*aTufOStGm@{JGw3aGxj`01@I9Yjo zozwBhE#eU|?|(Z=MOBpi!0Q6_(n7(voTeAnzjFe^QaJ4J2tvIn=DIQQpS^iXKjztL zTz{_1w=s^CsllLv?liwfpHMFA?mZ71dAIp1Jx-R~bt;$fBiX!rmB`MAiRSW-T$`Dn zAt_jPcnZ2VI}4)1Tp11NWo}IIqw1xnWA)F79AahN{K#zljqy9qh-6<$*zW0>&E#H4 zPb>#P_rL_8H~wTWZVm|#I6?Ug&+nP{CT>b&nX0q_4gZn zMpU53TsMO{#nTne{zk>log_}DR9pg z`VDB8LEk5IXnXysEq5|Jpk_tB4?I{I+We+GaI)Ud3|N08BqfoKXUEVyr1wO`^+HwUxCTmQGJ>Mo$4M*=O%Ru{nN8eo-=1?( z%qCZdaW*eU`_o4C+DX(tCV@Vo68La`ub2AmNry&>h5|C>h@ImC@l4t&@7J{zquD_K zzlk!JuyetL7lG2WGP`%ujHt5ggpTa#S=GCdTaNvWRy@L^JuP|tkJ&OGT z_ctoA@x!x((rb=Si?e4*(f0~aRi=^lmKpC z8ad;?TfDv|S2`W&YrER8q^IGLb;|fPV2ZVQJj(CvdEq@^@S~QLbM~F{)(k9-cnnQh zY_9f085TtORxLRp!Q{{=)xbU7^{{N6H_r$glY2kW|EKs#YytzXez zuOWvP&S=3B_$^%SIhAk4SAENc(9t3|;(*WRUhUWX%I}N)H__wSyXL*1I_iAx(EC!m zbyX=cgCSY@D$DVH%9({%IfmXjp?KBA&?ln2BPq&W1qHL|#dXV1>eeXg38v}i;(Gll z4PK~jlg|8>eXv|Xi1urZk8_R{-gS!`q&=1tK1kzcd;W80%BlHz4q5rS#8GRJQDR6O za~$|goPZQsrdu~?k4d+Rg*XvqWWl zvyTVo0Svi}dS%3lu)lZdy~{ib@J}lsBkG}(8S6ZbUx@7deP%1m$N zWn95gn$HJ_*JO#e4g;l9UJ)SN-b$rdT03)MK;?cFV`XR^;DDfobV7J&C*1#CuVhIW z@zDzZl*A`YY!GqXw$)oS>=$u$XgJTHi#!b1FZ7g(c%#xDp8wLFbsD!S7bvtVGLGV> zly*yOl6OzZfYOS*E@%wQfiWKnGXI2s6}~z0sB#Rj{!< ztVJTOS?&iB7PtJ6VonAwnGITaUykr49>}%10>l7iIJmZ4FH&nxQPG5N!)5X{7CNX3 zzWI&^?19w+Q|j&wWL)WNGGE~9@ikWC(ksm@KPj&vf`Vg3EM#98VnDpBtm?Akm<=m1 z_9EUPaRuFY7{Dh>mBF0zJ|iyf7+pZ%!qJYoBYgMIBMd1b$B(dF8^93G#;Z?uf-BC8 zVPXQQswdCndX{${6fPQv^mWl1(Pixr6`1}=~r8-u)S3taFeFY zvZ6?R4DAH_AKzy_K-37L`9Niy5052_51Vl3WMF8vdo9tw$H0(X3b2E|sEc zZ#E83iDULop~iXi78kWOp$uTBbxr9E!tnMl>K6=BWGjBMFycEz8Y4s8Gqhk%P|mQ> zzn@f}1n@xQlB!rL$s@M*+BGuJ*Oz~9IZi|@e5b9>1NU<>uazC+B^bFV@N-{Jw`%FL znBz{0ll92VB&)St%|*EFoK&1TAMRSM#Xcs{EnFbeB;eP_CWTt2z3$Qq8=46E`MHCi zj}2PbSQ1AjbG2|$^du)`=HP_arq1$<8Bg=!0KbJ)I=hw8R6LKcrZxp!?msJzE0n3I zti(-pb(9{mKXy7E4!1=N)Jjb9bj;OH-cQ7{&v5=AhcrEAotJCY$=HKEquI^vjMoGb zaxrWlIK6YFubSaRP?_1pJo;6EM@6)@y2`2(@F3$m;{BBe!RJUN>t3w&@y>nZX8z8 zGNC0?5&?TdbVYBs5gtmfdxp4H?NAN1VVMFR(d?=m!ZbN!g7z4}_et1#9l=YmX(V2Q z-TbUd)Z0A2=RF(=1TNqp8d%Ig#&=<5xp^K_jB`L3$nD?fW}usUTH%-fC?&>uyh?Vq z#7n%Ku56?)vwGZd*ig#tm>g>PGcPbS6r{^%xtm=7H0H6B{Ad|#&GqoLU?l`6!e1HA z;;?WnPSO2bEL2(6BKfaXmhe(~jP>}q#)7|%0r^rzM)(ILnlg-dU-S9W0T8GW1LK5_ z8DG>11h`nG$R4zZ2iC(j&e-oH8s3lUdoi;YDR(JGME=^HbpA8<$mJ)KLIT9gwN6N} z?0*i^dN618_0qdG!egFyu&!r7&;w${Gqeg^&@}|)dn?3a-I0l-ypOQmVI#+XyM;kt zmiNq+FDPaLrqcx+qK)v!td#jW^f8szw=3yxAn^}+QN0T+Q;zgAQe^m?e8<0w>&oih z`_>s1t*fo~x7rexNdE{q;&-(Ha_VQlAGxz#7FLKJPq@}sC|nlo@&i57nV>!Af4Bf3 zo9zldJp7wCWKtA)>SIavs_<4 zW~M}wN7^P)jLp0b8%%Dfg52cuTiNi%jK0q0`cIJbPysgD&C4xMZ1A^#fWRZ@LWV*3 z?!`E!{xhhhWlj`4fQg}5E=qG*u*S2kqQ#i!`soI*m2tn3VMS1=1#QAgyg-nYynrXi zS}^B~lWXm8x0Sk**%K;PE@GMyr-IbPP#{j~!c6$BLc@2iS)*h&bVA*&8=cS73C(|{ z^D9x_Q~`-j6_#5xey-V9i0Otz^p^yMa!}6PZ4bMfp)U&Y)s7=~lPrAR*p@Q7bAM>j zDz_SJrHP4S_OTy}AcO8utzDZprsU^SC-62@Sj$GF{3>1#+ng$%s*VtH-i)Ufazm3* z6vabDmN)d^lK?kDsNlttH0%$goJ8Jz&rk;Tr=JTl7Bz()Lw|7#K28_f{MyO>W>JX4ktvXweov!34_t*XQ3x$ftJhNf z5yt{K1<#73s+)1C6u*qEp;m2m*7hOo*2r3aoukksOB~LJGwylO2QimM%1rsedAq}i znH(Wy0i1vH`BVC+hZttC(_t4Y*lBbof>Go@NkT7}Q(1q2n&iUZAjWT97bylXLAV-l)W*H$E9 zH#mC9uA|&ftDZ{_l#%$oWg?;fMrf&&vX)385;PDasx+#R`&NSrM%ckGfmfA0hckY8 zC;b2YixCGWRO0UIs|M1V0w9vT%YfAZg?Ht$2Wl~ZH(|7bGes8o3k}urI7ZE*{ri4J zFt9;T=}Z@fRR+|b`ff=-H0~ELWaP5Urwy*~l`SxV`xv;onFpu?uj?SlH9!aACNWf| zrAW~5Cy+IABF&DF%1cq$AmH~GgIoL9GDg4bIx6Li!WkJ*Z&pV7OB^$0BsF><2ALz! zr~rG5>9`<+`6DZMfOv~jECPrvIG`Ye9Db+@t%#+-sKL*&jX`y`rI;{}ie!Lbpcn&b zVs@xj%o>ZKnPc&JfN)_6@WhMCjwVQLQYm%5absq;fR2%53%1I~&@5){>h}{IB(+hQ z543lXDLS|3CREDsvm3VMZNQS>Hp*Sj?@dMIIfG*>a8oILx4BgZYtC~rZNMwpHJis3 zZfZYVX(k^VC@Cc*1c@Kz&;-DiCAQN{H`q0_A~o#oVv>`Rq=C2;bFjjI)liurEuu9L zzeL3(?<}sq-DSc5R=tUN6SiApALhPO>rnJfZ#lO)G8Y!e)iE@hypq07T0W(Vt7&Q7 z?!VVVq%OfVa22WG9GKvIJ%ux0bN_a}#=d!jU0*qJw(++MB$`#s?>eb?CFh`YV>L$8 z`SzEe zUOG*qb{e^DhDeAtn@%N&3KgMH3Y@4b@V~&x4Y=W0xSw7*Po?4LcqZg0yi_=FrS!hd z-a+ABjo+-V#%#Y^Yz9~I9bSD>pSLlftv zk16XasJS-v7XJu^hSDGkzr~eDNxYH#R;dO>7N{WxUndm34{^<~g`7ZdPtHxZY$#J| z{RNMczUz^-^RUxh2;A&AQ6X<(53)2{MgJZWUk_}0r8IJm7W%l6e-C1YkdPRV62E*Q zcqB!}jgLVX{vNd$EI@1z5YbuL8PzUxZbo$8vv_26Z8BRNqHVeOeZBm(Rj}1L8+Zs?TB0R0TUb|fH{#*9 zuD>!jlk3oZ1m_!Toi0C^xF&x&tLnVP{`}1A{L$Ls^aF_><4sR|+m^9$d}R4B)sC(d zK1i8*o;9q#lGG2r*t(gS1dh@CX^YOTe?mQw4gt7SF?7pZ{RD@vvi1B52&6qKt|gz^ z>v3Nx&BP_t@toEk?JRd}Y@A4&-`p~nWVjSh$7~u6opk%;t6s^ewaT%^YWI( zH@ST2ZS-^HPK^B%!lW`|$IpA9Wj1n|(f8S%f0^rYH`IUOeN9AVob1tf)N<3{;=@<= zJq<(&`2;52j6V{9h;quZ1sEpjOWnQIUV8X)O2eEx_ThqK(Ix{r-;wv@(O<@M51tl2 zl$-9nPiATGvpDqA8u!^di|5}s8(7~`qI`UX`>o86tlh0?-|#f?%Hk_6Nr6UNX;2QX z7gf=rY%phgOHds1#!wOsVK@#Do!co%ero(DY$yIhk4KIPL|3NOdOBSaF7SMVRr|2d zxu^@ir;pO?ecZSxXNgZi+5I8Tl?{?A3+oJt_kIO?_HFk$zLP_*_t2tnYI_~z?*L7% z@w>iUf+wX$Z>Bz&ktl%PtO=7)TUtHzWN)qALkHdtQ~>({-*aEKsJ?h{?RBy5{knKJ zX3gF1WUhmt!=+pFj_aFUf&``lgaMaZ1&zhB_zI9qPpifA#ac$h(leZ|Yi&V%3NsJ4 zklBWVavDC{VyAg*_Sv(?mDE)J{>n4AuVB72JAEol7FkkIPhaw}DaC8IgH)CVe^Xk9 zy{{7n0U!9`*>5c~Kp7V24R;f5n^_%TH=*t2`2SF+&BJX+XY>(9y!BQ_Q8Drfjj#rab+!wbDWu7tbijYHH-YC@ ziX{t4nCIeTrGq9}Rnl%W)@t-%^W{fx^7QzgOk=HPn0FlFF!P_;!7XG9>1j(XS?E-e&jWL%_vf)w!0XXztAh)W!(gvQVVBv+#ZP6odef`7(6!W0eY#Ph?bmHt zPSye(;PFrID1)sm%O&N3zi9Tb)yr~LcSwYc=k$t#Fv}zD-}KKsaO+eAroZ_BpSjLq zE@q4XFhEcqRg`jdq3lh&bC|eVzM9Q|dQ~*VLH^po%IR|a$mz=YZO0GSJ8qULXlen8 z4-A>aGo1Rewsk2wUGoJlJJU&cM`D)+aHXD5bN%3AD%?x%2mj7Vds2yqS+iYvT<^6>h>LwPWoGtC za5H6`WoF%+G?|T0rd*kRB5mJ$^eV=i`@hkBQ*H)8XH-6~A#W+rabqzVupAkx$mQF4o?MA)qy1{fBDo?o;ge;mNeP{7bvbD=*M#_q!SdsN6f_-DokYLW$>W_lb_{ zEiO%8XIL#mL8oq7QF;Gvj$jU;%&nllq5txQ2vs=(@lcu!QEqJzr?cJ7qkFDDpw}3* zza*$#&vT>iUUoTiYT%@WXvyE&95cV>u9gB`>$voej#jJ1vx=0{V77Ij)uQ_e=w!Y_ z3=a#DF;Y&e%XwYzA_$=RI(EO`mbzI}XEJbFIE&A>9cs_FewJ!JF6cZz%7_=C7+Rpi z8oJKDx5{WYHoS)zZs`c`nQZSBLy`UTSz!$`_sc*xs;ws8JYt(Wxu;?27MA2N!|SG4 zw&9!WAdAS@LKKM~crohHv03XRIR(dxfvY=q%k#AO$`)#j3zO^?4fbwVIJSI&MP4wJ zzalN2SpE>0PoRJ>ylGCdn^>}?D{o$)dxH3mfBkB~D4o9~zUYAK@Lm(BC5cDP2WiJ&!HPY_r9MsuV3E{YT z(EThf(Q?o+0;^F~>WeF|O{7)3SeW(AKHYSOJ{-wzxzOa~B9e7v3_3>iF8d0$mcQ#Z7^_v^-R;&lgmD&D z9%h;y`YbE<0lBort)*@E^fqd_eIgn;o@-OCVVUloeXhm#?%wLNLacgJOF+ETv+v#l z(_;Ztpveyw#j?^?ncm}i>0*WH)QjyCzDXy7Ifi(F_5b!YfAq5An15>yY-9HnqIS=> zk7m{(D3Ad_8~A|`Lxj_K5T*ryUG!0gY)yDc30od_NnS~VyOZXUmNw%i_;RkgtTy=M z2!n9yIgfD2TYUenRMa@$FzhyN&9(V`X$J>Os_pXuebk{|*jRk` z!P_|&wRa=Y(snmm2e=%3R6n<)OL?%5Y1c z+vN1IY4WxX8!PV{0<~J_h1yUnkzKy@fcxv9tJKOb3g`lDD%RtKt($i)(g*i(|6S}B zDr}Hcv&{VSz%(Ev9F@LCr~3MAz72kH?ebb@>5JxI#@ot`q3t`&q%R^^)t&K-@*BX7 z_L8<&V=N(fiWfJ#9KHa>K4Km!wO> zrvb8=Q&Q?z##R6SwB>&!z{RHlm5NQ~Vr_D+(EBuX8OY_5s})1YDiYzTQ&W%6Vw3 z;r%ELw(GFk(gB@MOQ;*0Jpt_#;JXEXgv)DtXJ!oxFE>AkaUlxDGq1lImMi6tf1@iZ z)dLQ>dunRF?`ejg^Sm^fflEQJ*rZ=Y0Lei!x8H-e%8MdKbiU7jwOn!OG6&_Aeqe3o z7?Q8qA5DGzSSlXOdGEuL1RS^Ab)-*pXtc1}pdpZu(?e$?mD6`Bso@=(g|Y85mldOw zISN+bIZ4sLaD&kdGkZDa|FHL#QB`)`->4!gAzex+h)9FdptN);pp=9Nn-(QST1r5W zkQPxIDQVeAC@9?s(umTfbo0(@*-<eC3Q2$P$r{stH&{$?`auTMaaIm zQigs{4DP(l(#E6Fz6gp9_CuRGpG_)SzWV#ubKdh98tP9uea_5LuOXRe0-r+hU`3YC z6_Bk}VMJm$9s`!5&M;O7h8g7~=}6Q@ycBg+TpTK&NOaykP`-CYC5}htq8hYLU73o3 zaeeZQ5eLdAt(=G6yA)mhRYztw|Ml|BXmxc@p1vO6t8$oC--`NP|BQ%TJ6_ba^!-n% zStSwCN2_7Y+|@(28#BU4-Zl>AaLy+9@r;QeG~ScBpj7EoUxydYh#xC~xi3_kQ%tPL4Culyr>` zO!^+JpdN<)*o`|oYkGAt+HXmzslKu`CitePTKv$qBU+`)k3&JB>rRJLV>o+$`!+&U zTCcYfvoz)&vUlcX?|m!9jXkRuHACPkI%Qj%-T14vLS?gOz4K-qAgtqb! z+TuiLizQ6oD!nneEbkI?kHwmUW>n=@>1YW?&K8dhwPrOR#YeJeKit-ahE&MI}7dc+Tlw9bj!3jW%+fAbh6y1^?uxXtY10IFSN@-(!d-7i{k(#55pL zoo|U&Eu)jw&bcc&C8mj&t~ZR^tUjqM1gTZKOz7jZNsz`Xu-fo4#P0qa5AT|hrKnay zwWH$ar=NgM23uT9e3MA<9bzJCru)yaQ(bCZ|ur$5cTFx?VMKc{3CYsfAqGfKXE zG|Q?}VC;vh`qFJ#p_|LKbA7h2q_t1qIq|mRmz$H{&3!rAudY1h&_`=R!=^j3sZXi- z*}AuK(es04YG*xnAJ~s5MzJaQoXE10;LWIZXj)OP+4b9b5P6!r(MO_yI(r`77^E~aB|&%#l-WSiG1O#mBkp_ zP9<=`lft;QT^mTvzpX~r6E?N`G(T*6O(c|S@oDv2e|{~Si&*y!A+%Fa_3}>Y%ff|? z_Lhqy>G=5Rm$!f38#TmlN$%03ROP84&mFoIKeF$R9_Gz!?3(PP@F21U9KbMzz9&-` zskOf2>xI#mSra-3uYYkG%Kw_p>VB*vbB=IM+L+V(V}bk}UDG~B=ShIh69k=y72<@$ zWRm6h<#g8l&?QIF9Nl)eDsS8~#eC=)4b~eg$=A8K5e&dB^;b)GJsFxG_i^!AUADk2 z-R~70FoY~i4~e_TvYHQ*;iszgTd%^>RFWnl%BK6po`GI=VA$z>wtKeYW}kZJPQL1j zX|CRP$BfhqWXrSe3c*OA-~)R`YMx8v<>}IAk5@~p)M}di3%n1LZHbUI{p{Dog;VE+ zu~UwA#pYr$DE-i1fl179oopgZv7uv6qDbNX9Y+i92Hhj2e{V==3p%Q& z-Kq7*G=-E3lb5DUw44XjU6hF9th_hYaTAbANb?D`hY_7?FykP4)MH1bO?2b-%y2~+ zE~?nPVyy^9l_^v>EnH2pGEOthlzRjLr#zhh1e{0>DH&b{xRdOXm3bROU8B%Y2#!|k=qT=TdrMK;GV1f7L15U< zF>Tt17Uw;h=N}bkrSUjpqx#+E!4ebMKGT(GPgAjtG9%~AKIzoKE0An>Lj@7}vrTzn zGRF)E9vzFCQPcYM)WGV|ptE|hVueIS>dw_S49zq#4KE$LTYotcFzYw7UhkY~a~R+? zWC-4SX)PY@Prjw78THtdCf4G-Qcsnp!-bAf`%!^}<9Iq)D!C}QX!kgv`{hCM${22! z{Dk@2Ytrcp4!(vEmTY}=Q--ZXfcuy^eOk)x;!Suv#g>`c?6{KL;!0|-3D(;q^C?!N z)6_ybF4cCJG{{cZc@tcRli%#IjWv+a_~&#x>z<%6re%-&AWhDT?Dc}WVG*{Ue9j>4 zFCWF0rO`Gk{}MW)#@{$Ek{IhQk|!@6-7bDGq`dc~($$&u=9}lcQ{o5Vl$^Q^K15=% zS&R1w6nW57x#H;++H8U)$9vcH3`TA7;Q4+t)ljW||7G=NNhY(%S|n&K<~avbd)L})z0q3Wqz=O=T(X6t;D{c>t- zJ&MOwc{W~Dgd)*^2u&Co{XM`bu7uOUL>b9241c$}))x}!H;yO*8$=NZS4c%S(VdHY zllXE~PuP8Pzq`hB@)04b*x>UqSzZ$zfh|-&`hw#^deFj>;GFL!yNJxF+A*crPFC>4 zcJ{QNX}rCCtY|TEBmqL!-?#Ql+QOGRZ3<0q%w8QuPgnYvuNxaiG7T^^2emJTS5NJ& zteZ9t#3;=8_sPFr4q@^PoQ+GT?hbeC?YSdDk#ge8NK;{BSwk;V)}87+rZg+XL?N_c zy10J7GFeR2f6R(%B^({&ZwDWHj(T?>DIv>gtiYm^z~Jt50QEWQo9A4JYNaj`a*N|J zO$BLhHxZ?U(ls*ZbL&StW)HbcCGy<)h`j~VztT{x@le$s^Y9~~euqcf3*kcRejc4_ zk4kd(TgtP2`L?~63b0P@$VWqa5WT9cNgAE^4-2gO3uW5GcC3pk&+>kEFp7TS&r1D9 zGxNISOBe~n`wQi+ZJRJfTXx_kzpnU)&odMozn$u2#X1;W-kXG zUFLN16ne8f+W6*xQsiJ2L9tSLdkSS?&dyEBs&J7#9lI*yuBwC2UXGnvZ4OPmoFh`v z!&aW*pO$WOZr|s<+bNj3redYfp8T*ixMiF3))+a92`xKQht2&t);o>U=A17xe%z%XxZ#M#Yzaw8bjMkA-UwPd;2&8+ ztF2eP8G8Lp6RN?{-B&0Ir*$t^6K1nys3q}Cb+62B?L{S>j9T{{8cF9aULP*m_AH!U zU~d%X>}WYQ9Q8B{w7FIFme8p%7;@qn43d+>TLNn57xicW=7Zn5p?h%fD} z&f}B(UpvB3V>RB%lEIMT{c8Tues}Xh(e=u_tYk)^oG>P>yxWtT)VYNpQNS7+S=xP~(2|GHsBk;d)L`F)Bixtc(L8huUN~1#t7AU*OaJ1NyRV@hbUKiNi zeczv|Tx*p!;<)6y8$yu_zT;v%KRR$~cYRIQ?WX0zGoRMyX1YB794E^-W47+L3))Ir z0jBJ^rummaWp2&4%AyKwhF6F0tmRs}U(r+QY_hpV6;w*K6VK5}sFSR(l|ACw?yhvp z%_C_mqsn6B?Nq>I^};u$l?o4mZlX_$4`x54e-Q%!gJ_~Y_$1G|X1_6cWZ)EQ9J%&i zEPyA`EVcCZTd^L})-Y3Nf1`JfN#yOT0VA`f2Oce_;gxi%2}K9VZu3>#eM6oVE8w0S z<6}|tS|ejd_m(bO;?670ip?wsJrRn+dF}m@X?MfmysR(#f#Q3izDj}=CSD~D#rnKS zx$H>XRl~nzp+a_lbNg~XT4-$~Errw3MC}&&p@ZW3ER2ag&$`W9VihAuR{e(TuSrEW z>~tA6*p1a3b%9ZSrb{E0cC`e84QZfPy)qdEk*eO#1GQ+D8&WS_7lpx>b)>|C*knrw z$wd*zXs+qJgT=1}DRF15pko87(`8RtE^pci?a{=Z|NcBaB}Z6qu3kckN=}%}vp7Lp z1DDNj0V?wLvpuXu!%EzhCXdgE8iY{uzn97}s2p(1tM5wQnPZ`}O)&&Xk|QA#f&|p< zx!50e-rx|4XZqU7GH{U}3aP~>w&(I=+7i3f>d(47v-dT}BQCtA5qXy(%cCrK8RJzP#B8WZY>f zHbPH6&5P{V4{WeBrH8*ldG$tN1w+yh11cjz2Kp2j;KT<@B`!2+b=jN$&A3E8rH1IU) zGi0vQAGO-w%MCT$Ge zQ?Mim*Qg5LLkGxZ=-r_J3_bDERo!sEh$@j7?p=3qGvdN8uN!mHdNy;GHNd3SX|nOPKEoJ` z?)13!wRbnOZ>^zi-Z2E9kkb{n`tZoAION0)qN_i>tVgScbDERqim=(x&qAr=uQ=yv z<{RjrAfUb=Z1)4Fm?p_A1)N!daeGVbA^P76gv+Fp^(LebTBph+KL_T8j`bOAlk zIxndUS+rK^S&F1lD}heI(KUii?Zfn;Jh%vDMZ6{oT-Vj9M=l9>;}BMA&1^FcYE zwHqeKzpx*Dp`60Zr#$qtnFY*=p7-*2Ns0SZIiM>ZC09?rX8`M7wLCLLWY*DDL$Oyu zU$2RcnlER2{DXs4tO*cF<(s6FG5o<*R#}g}9f54wjr>GY!{DZYS8l<2sq0_3(+5Yg z!c5!!<4_E)*?e<*hOFvYH`CFUIBPp6MMXDmTKPO09*N)yL2tb$Ug-Wf!KHZ%kOmQ) z|FJQaBmqse>UnqevBXbI_INXzc_iAGBYdO08q7GJP75@jP!@FcPmgR(5LAN(giI>& zeBom5>KT?LQpkW$r2s;@t_9u)K%(8{bi6S>g+*VX928y$;E;lgF%--3feVyV`e0D!5 z)Too>G87B)r1+dTbvi7oHGD%r?gVD6s47`{x1UAlNgbPW zdZy`SxFy$kP5Sw)UgT^q-_h{9?Gh}XAlBSUGddFFE=sv~^=`xR+*`$$Sq_k{ztzNF z`N6E)DL3w-(r3f(Iu$Zhw)Y;iuR07~IRns?y!D_ZNT_nSv9GC4-%d?AOc1@PL?)sE!8Tvf73n{TV%1Dq3B z%W~9Lmdz$|dY#j(?iWLNgaq~R+gf$nOJ-IDwiou<<~5OJ4gC-|_gn58DpR?VGeg5q zzg~K=8M3I&y-epaN_b-)u#C>jHfpSL2p8`W^%r@drw3F-^kXOq9QnMu)ZZ*pbd7B* z%nZzGHYDGP4n+&UcdZmQwA&|DU;D_clCW02VH3~o+N%P*q->nR5J?88%v0dp@S!}?Ep?4TGj`X9l(P+pZ^X-9)k zaIW9rTmL?tyo_FNMmx?ae@*%3=g%pk5|`H*QQ&(e;*({5=+&(2ucgqnvwT+Qs-P2c z)UsKTWp+QuBwhBmrcA-IK+zDGD8_KnxlGw%*MntA%RGsV^G@#&jAEkSkUV;3B7W_; zu2yB3MWRn4dv(kx^QcLBXqWobfkC)kt2z~BDV&7IC8Coh(+(O@ZAt_Z^Gxv!%>Ys~ zX~Um-cy=Tz)3HE@`J&cKshPH{;Bsz{`~+I@ceN{P5Sl!JPkqUp)!;jpgulsb1e1G=A-sJIe+R%5>Pl?-XtyI`h`|0_2n@0DF3oE-4!@2Q>$yp z1fZeLa1#V3vcYM~5Ud)=KlSMrN;E{y@=6GP;m^&a(b4pP(~P|fgt>s zKCBp4$U9(%Rzv#xY?epkCEnQvYDqK0b4`6!2~*X~&Ek8^^z1iQq%0h|&Ahdfx62O$ z5W^;WPqm!~OEcG*PD&p+YIa<`>#V9SzReX^fzX$U0Obmw13&N4+N|m0K=2|}@F+QA zXG{_VZ3V2-rqPUO*&s@GhmPZej^CVIJ0)a&&Ukh^=?v;IC>;hwdGfod!2>j!gb}E` z9|HWZpmFwSY~SDMmF;xYmfB_qyVu(I@yts6WTs zam0QvR9OcD!6aYYOpbmL2;$??M{fgpb8#g5XLy&gY=5-@$YI4h4n84&OOUo?wjuuwMrt-ew4>IvsTh~#Q2NWvxfAzyDEG36YOkL z>+QQgx#Qt1&H82!8poaU)_b@;v*IW37>5Tiu(z`zn0g z8=17tNDg=cM`zbr!^w}sG{HfzTR(SunWP`RdROg8PRSP%`bW95t2&Rwf8Hzvw|aYe z`ngJxVWfD*fn4a~-dqyfMJ5IB2>!Imn|D*Sqt=(_x})6vIndzU_Nvfb^lWuu*YJA1 zk$FB!mnI-NYB1Km#VxOj`T4!CE7F}uYS~mldxiSwY5aI*yVIB_zMqh#Ek#zqkom&m0Q3rANiLr?r?2hWIJs!Vz z6fsBpK!7l^30Qxnuk|O0WV`vsM5G^FNtxQ`O;EM?N+K9^lAKjblK3b#Z)-e%v%^&a z>g6AecV%0rd+wkwDhV+J^M~lz*%e-0>(NjzFqX0Bn%hM$2KVZ}Z9Sr!{IgE~?TobvJGYE=;B0;Oo#TR@C zaL$d4OQDYcv{*9zB-+WnFSb*m;1;H|AQJMi&mjJl>{9u)kFql?4FJRmwR5^>PnL2- z`RK(YWEJ8?XIZ7c4hl%V)9jtgYWHx8y0eoVeMU}|+CzMlm~!zNwAG(&J`d}mpJO75rs5hfBmiqj%0_`;1fJxAQH>}rSJi?X4uiC3Z!yrQO ztn$J{1Jm*5-pHcC-9e6zIfNpjA*vcF2--Dm+q71+LWr*LS`nz%?i`u|F2}?;ys077 zu5ke-xBwZL0|Ud&NmJA?5vNGy&8$ZwRHaWpmwab83H)3jEbE4Et-l?}u`_nD`$>`R zC@Vj=Cs#Ag4}N~~1OBg8#D&hVH#dCtDkuIqsAZG0u^ z^8=ENsPJ`ae#_VS#f+b^rd0Bxh(NPK8vnv z+p9B?xaPmcKfxpi`IWVurD{XnQfrI-D?MSRAgNJw@eo{yzusUdu{@bz+XP<`60UkP z{S)5Xfl?c*kJO58z&v$k)iHdy~=xIu1cRoahr? zd7{aTXO^RKE*@9x3v3ijt}L1Ty3c8Me-N*;EDb-}ivD<04?+1N`a~lnTmWBNVz>O2 zp?iNf&B|0a`{KYJN!za%Z|25->6=WwIj0g8Ke`r4E|%>g^u^xo9D+clU^e+R>Su3g z!3|gia{)`c!7@}3dj=juutwo8&mlisCnSg@Kmcek9n=N2TT~biT4y-&1rY|_tc$2M zrEPqj)#;_mjhAjPrX>QrTi$JHACko0IhfVKBiR6rp5}1b99%saf<5l#rbTcYip_wl ze5N3|+co!gd6#lp+J_KTaZq5!Af9=+4Ag#-@9ydNp zkcqmgH>g~js<8#0%Y3X&E@Fp3P2=X`c4R$K--;A^6@EXrN0r0ud4L+^21;wopZz717tFqani-Hzpmdirf44j&zbx{j+m1 z9(&t?rIo6QBB+79O4p5ei%KjCo1NIpca`6zraWAILBXbpQ=E|14c+h4z;RK}(Y}m~ z>%v-0p?OMorPoZGyyxiGh8vKpkaH0h!dj$)lC0`ycNt6oT%@P7Y@pTsB>ComLib=5 zezm0JJOJOZ)v@j+2af;_>w5PxAT8LmTQPQE(rtXc*!J4EZrS=BEL)xJFk z=r@Um2%FUxr;j@0AR6jf0WkzfN4pE}%|aPO+MFThT7??Uf8XP=P4b+Td581zSII`G zGAVX(NYm-Ncq%^eK|sxKev8ZJDX;8)Rcol zcZ3U2%!xJ&_mONwgpGbvAPG{?cAgw4jyPwmpLI@1PL!lfv+(P^l_GsmVvjhW4Mc6XVybq@#1R{D^x`PIoXJ2L1Fh< z-(9}9zRcPM@p_qVj22E)e2w9}&dQjI7E3;WA};oks6q`wcx^?W(jrz`no;?Q2wrFJ z*ecn~7dzNU3BZmZHwl3@UX<YxHl}af2!7mWz+O%x*tWXm9Y{7Yoh+k%-XdTUg!jLN$ zc5$fgM^CuhUx1Ph$;C$0J<`pEI9)r3CBt+D;m8%GU-5}Zs6_bevP^9UVmc2)8Cs_j z?Xja9_?Q~K2L~n6_y`4;fuh15Ey`%)n6Fwu&s|U(ES-5wNua$t7jdQRg?5pwqT0(B zT;`o#rb|WbilGrqOryuCZf^4^3E*F#`o-73TKA< zD2r(yE{XPwI%NLf&{AD4wdMYlv~Bi$cZN~UWUhLj31HB;_gkt;U&$Pc9`-q*&{otI z@!n=26+b}r+!fS->vj*TaFttmR*|T=@X!67Lf4ujjV|l`5oL~Z4&ORvzSnDPh`OkU zW~8Pf`z3Vy={>*Lr)GCEkHe)+ST5+jdTYwYA}u zG_4;inM3CzADhl8RT)R!9nT&5$<)s~f(`W}wv?CpI`ZYydJ#PJJB!C4UT}5BCrMu= znYjMvdLcezLlU3)nU9ZY9c>dSP3rR^@cx1 z@sE2Ww?kW#mTslzM?hjm*t2^m_JR%EU zKWvPu35tC3Z8*?hD0huPskA3H^1K%~U8_r@hO>|2@_8?^&t<45xh)4yC!IX| zP{Oh&^PBk^t`6@;m8>tn(p!X_hU`TR71Uqsk52+uy} zq|(3O(tf3VPK2G>ZWSh`=91P48QdH^d{j1C))O< zfKAKP8CZ8`)KyLKDQ%oZ2ql4TaT~Rwi=LTGd)8Dh3EP=IolX)C)ad7s%DkfF&nHJ* z$(eb_A3wWh*ISFY>L2@Zd z!PWCN5;ej|=`m@cP@fs?8KgEFDJXpK{fkrrQk8{hGTiDU{;Tp$WFzElA`7?FN8aB_ zyj>fSy1P~6FuVGg69J?L?B1;c zL9BaJ_mOhL;K6*_fdC1I!=41!%94brk)k7sUPN9bLb>%3?-qFEHU(0Nq~aCk*9_H+ zyGR9OWSHL^A*T8ru^vbjx!-bPp5%Zh>(p(pQ(=>;!G-X2u@udh?4nr0)FWu-q!GG;|T}VKnYq*+2who zZ#zQh)0u2o{VWK1gmd(N&3ZwN(X0O$EB{z9HTtmo#aT67ABp1QVCjJwxbSlPAref) zBu*;wX>*{w4Gr+B zI6UlkJLuPr$gw7XJuoy>QB2_d6^z)xJ+WkeaLWB)<2p}TvxuKaACg7gw@(sLBn`4h zF|N%2w_kfg%Ym}wlqQ$!REZI zOZXH~L1S+r;apoodlo1ZEP(soNKLGOcg`stu@?y%VlQIIJY#Ul4X>z25amWE^7}Ug zUY)qzi>|w;S~kg?rWYI;dS07IXs$Px9pV%x-D23pI12Ra($QgT zpFclf{0V5S=4ayjEC{+2LR6nsDznV3!Q`)#6~B&;O?neJ=7#G_Crp_Grlj}Uc`#c8 z&+eS}@|pv7x!sqhGx91;DW>Ub!QC`S&e?qr5kzfm3Ey#pO6of2gBz~cp{mr>Z51sTZ0koVoh&{!mXs&}T%ATdy7~k)Z#x zi3d`W15)7vB0Z)HxPf0(fnOZ;<2ebah3!B9O(k74>3r+=SmG`G$p^&(IxHL^C|Mr& zWvjmif(}#OO?wE$Jj+BaKhgkdF{nziI0WRv>&RNk%W4;m{@SLPZ z0R!v-(Ej5`*aH68LnZ8Ct|ImXC?*#8yVvFgx!m6FJL?LgRqkm3mRbH0sfOkE=NkE+ zFU$O=FTnq!^~Am5WSd|f+QdvoQuKM5ATK|O>&Tui7m&c7VB_P&904-_oJ$GpDZ3)( zGn~s|>2yIQw>Qk4Y+Rk`o`CMnQE)!6r~mmHfE{bt{PjIeI)JdN#7gTGF^(e82FpbO zP^bYwPPC6F-onLVZubaQ9wiX1<3^Vd5G_K9nwLvAnBLk?sh+uH zAL8DUJ|`L>>NgQ*_A(snI3Apb1W=*ZHEZ~4Lf}=QC(W-hg7mNt0Fc1=KwjD%@S#J2 z9)&FDzblPFG$s;|ij8#6Lkt8o0@!%oURXLLg)oy@dX?xn&{zY@s}1@+7)jq3D^C6o z&A`iW0I7U^!U+uP4KGcaSyc2Yix<@r7D&ddn4y}OsuaVeb6-}KWC>ZYu7YGu?g*6c zLyrQ%$y5Iv&msvNkD6f@DKODvFN(sS(az+TgneltjETg$p&b&xRnc`~99apUXvlmi z9_;V=^j}T2Sv@N!DY2v?(xjTf%b`=4T5S165yIWaPGQNug80Ye*=v&jB?VCQW3a`? z_Qvek#PY=6&_wHuh=cj#;%F2QY!2Q}N|&5<<=sB3`cOKaN2}&KW%m;K?hR?5b^9r) zpsE^r;NOY$=c`N5C%|%rxG%fm@p(M+4k<}`U92Sc^*9c_Qf0LB5*HwigM04?hc+L7 z7(6sV_d=x=jNBAl?NaN0W7+&6m|oVeNv4W1UH?NTIPc_u`$gRqlrYz2e0?n7oJW}R zh)aD=Bam6&=^TceU^rYO?y#8VGwLrgd*(3z^~OEu;eE|qz~&!bM1l>ph@gBCONJrB z3x!?)_Jy2seeYL!6TJcO=0D&9l(`3+LoSpy9PlQZkJjhU424=_DtZ39A9~{JK(6#8 zG>4>299M%731qY*pc$D`D6R1ME2mu%X~=-n62~j|`RSRv>qgh~aqe3O{U-gxTgGMu zLb*Vmz<^nqIIkx2MvBSEdly6|C?<+HeiS_gMQ5-KE{t90=5de4TR`dt_hseTH zK=zMo1bY03?2yl&$dAqCS4Sfx2h(SN%x33@PHWF2iVK_3DEDOA;QH(rB>?S`x7@S# z`PZ)KiIaf|o)}qUf*Xj4$kxh_K6>nge2Q$ykAnKM_>Rt{K(W|Tq_I4vK`_U!f8M@b zbVDBS9?}CzA^$KI=DHpuk+P8>&cly!^dv?v^euys@pC`rgxn1luKBxUz(O6Yj`s(;ZD5bOlV<< zIa@5;X{-%&hkGC?lX8uEiRBlGil4%Ntwj#xp6u1YaHw%`F1$7VgvT3{2F$tKAx(6A z;6*#UxA&{#Km&~SlP%$R6l}3xX>MD~l1t5CcKt(A@KZRLvezc-M}EI0)BWvLi*isj zt$bpD(#N+vhlo+IhG|6~X(Esc5F=5RU*5!dr9J#L{_2p3{=1;bYq&&8V|zA)J-zEF zPm4HYrlzJg09r0hJxwt%L~;%ZxOt1Zt~X3~rc8iZ4BK9tOYmYcta2lviv-&eA*28P zBZXY!@Ke#>FRC>5{h4^Qn)sgPJqWv&!&sh=A}6jOd(bPjzI?P9!lCL@iLQLEZ4m9c`@GkkPhcy`0IGHue6V0tS(MT~BK!{k% zhV1Y!{dM&HAhIJ`>1t*U_oQS5AVw5{AB+G{ZHnPe(hkWe0p(Cy^U)7qrN(Z&4!0bJ z>7~MW+dJoi>Ha#YWM<%MQKe8{3|C1=U^Hutp$986MGqv?<*{`{58(i=vX*x{*QzfM zl9g1~|2b;RPsZ2a)nr8;TOLX(|C-0g@mpA;fzh;Jz+YRPIbZIzBi?V1LsIOZ5y@|_ zR|Ba`L@06L?r6^=@BIgH5hG54U*#| zyQq9AX=8C%PsCx`i}GKKcu71(d@b#O>vWi9Ht<*H`qX;!4Y|4B|9t$M?Pk*+LAoCt zQ(+BrlH2)hxn{r+b93sIp4vJ+|6jU1+|}57FhN#6ANHy_;_ZM1vp@3O;F6JxR)XSy z4n#+F;VQonx>2>iZPl1~rR?hFm`_*c4MyL;Pf8OxsVn}L7!G)+Wwbuuk0Ngd8|*)h&?{J8|WYLqfJ_k_cSWt7Cuh@SvX7K%o_! zI&f_R%t;N#g99Gy0-+f$Hx;<@q7ht)!}Xm)u-pFJ8T5qv z4M#w}ME?3wE5%n(#ia)D?Wk9Hd^AU~3C~_N$BG8^&CCB)nn?75A07qP-@8%;Sv8)T zl6eucs|sw$s@#(;xb61qlU=upV0rd~VtC>)DOmaexY_{2WDcPb6kybb{);{XF(H4S zx-^`+z_zz8W}g>gYY=-28Lb34_htw=BpT&rl;hC9>OO8+gEVvGjaQK?x$)N`)y& zGE>w1dt2JDtS5{MO(G64p<#D$ktxl7ShP++}GQVyFY0Qg8oe1~NK<>20 z{p&v@fcc5O6xmUQXC}BOKnKCc_xE>fU4DeirGoh8dHBgI5jA7h}t>+J56b zJyKb5`@tuQSASkUkmWlGFo2FH%9CL>I<7S~daz$MKTx8CjJ^WiaNZU0jfb#@n)f-6 z%@1zRtY*Gs$2iRP;Bp?)@qgbh1{CSLR7Q&v$c7}`@ZLk(o5y}q*=6@vJyO>5s~^WQ zgDL*I!u@RBMOMkKodOZy`xEm+$6+KXauaos@!)JhtZB{ zo(KDjmbAH&kca&I>>T%~=RxI<_9R2Z@}i=!W~0Mo`sbf6I*3;Qc#X?Kd~GPy8k3yp-V9ZVwdj;Ul}E z_rOMUEqL3JAl1}i01BBnMMgSAOcabjg7cWQ(E_^cF|Ib~iD#t9S^Oh<;l^R5p>jR~ z%t~U|h$xE_dXVfe*Q=5Tne)v|z7mk)4p?%}&H~&`4#C^EG*A#|J6ym=T#Wx}UpSp% zFc+w!`8cZW1uJ*eX0ottU!Fch zO1N|kPOAQ`f3Lvh6?9YeU}Fg3!*LjC$i~Xb%Hy_qKT9)9{jpVt8@92U5eyk!1~_a& z(D1|WAxP;>odHwU-?|_g(FLr6%y75}*Ob-BopZl{xL8AOE3I~&GE|b_tz5qF6Xr8D z_~Mc&hdkO?WKF|1QHv$T-px(Er}D}OiIxWPe>S39p`%O0I4tz zw%g_NKOwHrRDaIX+R}d1eM13g%QzLKdF(H$ssLgytT-toe#itC7^2p@S1uDo#m&WC zxk}1~#LQ~IRZWGGP>=Ry&=$>FhVD#_jg5%0tN`u>Z5~Ac;#&N18BdtOc6m6_F0xcvNZGVNBbuFeh4HcSrbA5JBfLj+NejwS`>NcXUM^;UjcYd4UJRrXssVNvW_ z@!}BJ;3slp1u}~B@{gmow4CY7PwIg#;=@M`J+@4@p!zKpAnqt^B_LJCD-0Mf@DFl- zrw)QVYFIG^^Md0yYjkf{m^4P%L77Z{g>!xetB8Il1W>-0SoF|%$C94rMLh)%vc3mG z?QV#pnq^Rlej(zhh=?SlSON|*j)a4{_;2fx_$TnxN5{k0ln}tvX4K57CZZGhQ6wOu z5-YZw1|r)8OGLyPn01B_0Xw*id7m_aFhK-#Nlrh}?d2DKH(aL(>8Sh9F^xGvSiNlH z(gKF`AhPEkdj+KYt2vOzqAufs%#TN25|BvLRMY?mbFWObx39hqrR*j#LBFe1B}qR> z|6?u8s$-!4{pJn{38LH`9VQ_8CBM9a_?R7l|Bsh&(g5E(d&A{6zP(fHwoJ})UBZFw z@?HX_f9F3t10w;bL_+GfhVk;j65$NzLY-4C-=58eF<f=Ipai><=m(TW;%EmOR6bIG2oBc0`V+94zENWY z6OVD-lUEPp1#H-!U0s&HKKQ@cZsWkO7w(OA+aXl#eVTL{NG8Xydc-yaAX1Egiw}IQ zt3iQ1u4DwWgbRUsUa=3C+US*o^Kp2d|Kf)~vK*jj>Rw4)>h_;GHV%aF$!H;q6sv&y zEg*g%V{0azH+`JyHa$$hwQATYIQFlb^pY?JOW=a6c()>ES3<-%d^TS}tfc}{`auWj znUV&j?s+r&G@iu0rLh+o>}{_hwTPF%41a=E1YiOxgNF>=20$8XPjy*!3QeBK zncDv;hmaN3LBzcyJrqt)B)y+@`zOLETDz%4=g&qe$l-HdGO9VEQ~9VQBLV_b0^byO z))($pjQ2q1>JO17LC=Ml;>ih_7m!<#NXFd-S-8B~t>zj({M(vLzT9DeeV!BmO8)Ox zyObtR!gnoCcx$u2coG zGQpU224a41q>39fL9)|T!b4B@54%SYm3(r9H+N}n=)}?A@qd5y6 zM*kt*i1R_u{Ti_E0gsVb{?btbo!NVX7xyw$Ro+H---d6dFTfVF=#UW0GK6;I1?a@y zKOZhbl;ohJCgOL=a{z1A&D!dMoxR=rS$4%7b!taNi=qM1?PpSmYSHX4ye6_^CMCWfh9lrs;KkFJIf;eZm zNXFDFQ8f}`SqyM6q=kbPe8^$`CdLOycBf2V?h|ayyAF&Ug`E9#2<3#mgLerW+$jFT zhxC4k0MY6VQrh5o2`ofv8)bBxz`M%ByZFvKQDa2h;nEUA@cTNOz8en?PYu30yV?R$ z(if0y^7y(a=O1Rw-**NLFTT$30S}XcIl#nFktmCCG!J)8{QtXS-W7Ls#@c5tTyBbq~!xSNfQb6g4+eOAj1S#L}zfJP7HR@n1 zug_qry3KN_sPAmM!Oqsz{63d&FZi#v@94M+o4NO`TqVR1-ce>Cj~9PkqBX*QzbX1Y z(A}q840Vjb9R^qYMbJpWAnk9m!{m?-ukZyB&4a(Ft^j<8z~8h{!-?2T7;^Ti0HRy` zi!?$Jruxk%LO{*qhctH|F%%pBVv}1yEv1LVe6$QS$j~AGEdnXN`-_%^5c2#jCB}gL zf_*SdQlkL7Vg8HY^nkp-{^q_R;E@N1l>81!i~YxL`498K4oQEr2pCzu9RKHV|Iguq zto=>7F?S-hV*9xRopOy)j!kbvzFo|!v@B~vqe?9aZT4$hJe20E(Q&zvwFO^G9Dw%} z%WN~x{2{J!av%=>@#Z1I45jioyAYQ!|8F8%wMS^7Y1N|)n@o}>^ zpA`1AOvd#Gpw`3VfW&BGbyn(JG+daon>|ShmkqzPtY0e9Z?Vp;`yA)b(cL=l&Z}pR z8e|Hc+W1h3a=JbKE+DqQPHOCa3pHf3vVL*B`r}@}DDZ~&I-z}-q`htrsM|5N#!BNM3f8hV|XTO@8nDAbzzuA6^ciPG-;P|)|)Y#eR!RTj6O z&9i}bU(jxjc(%^J2_l|U<<)!fTWFd9R4IDtPb|;j>F+UPi6!nd=?rh>7bzj-uo@ zap8LKek0@p4dWMW2b zzPH3#Or~p}W+czrYimVOg+&*_#<$ZY?owm}GZFaPJ zSas-0hxY4&=;sVk>!#xgmY%^EPSRSS%;NilEN3&qrQ|M1aoYgDoDzZFX{3d9IklO8 z+WVn^!4dK}3IQ@OPq`3nIg`~go5fneCllQij#{@FAFV$j`_6neA04>F-Y{)~(jYy^ z4Zk>blQu=kIR26!J)Rn0&^3*RS>_34Wa31*jCO=Hp@j9+Q#|JbEwz_#I!)2bSt-2> zs&Ia_!@+{Lb>17Vf<$3miL&rLF?CkTOfR35f)h2v%2FLpOuKYFqePoe-0 z=GcDyFU7A_jH!8Tl2>Nl$3<(kT5r^;u92nOx|KaiPu-dveu0geA@vfgwiChANnz1X zPbfTHkL$I4rAJssCr+jFn!IiRB2Z^ zg@f`!I;oQ5J-^FkT2Gt4id>G0{YV;hzahdhg!JGYb3*@gou@g;R(26!Zc~4bN7nTi zHOh`+6S>+NJ=h9Gts9R8#GkO~uA3%5&Mz`K`K3ppyLf?i)8$=dd}&M4E#DRawG zX1-{CWDJR*n=1{vn;5=%ltC#^)BwFhR`~qh=yl#dsYe^owSL)$4eBX@TMDMrU$Dh- znFzC9D(N+)Fk8+jezTbRfrN#6Ety|#H>>b5Lons@K0cGI^vJ&;C`8El z!+=sGc7`8dCtR9u5To!{XN`-d>-2#4jNEXZIIXi+cWRDT)M!5YT)_QB9ai-Ylch3$ycrMY5GMps?_t-BaWn-*s7JJG=9?2V^3bz zwkmfAq_Njye~dM1ST;Lvq7yFUMOoUndglwpTKKs-;=KmKqu#lZtj&{B{o^spQqc;5 z;q(JqDPkyDp)%QOmW~ch6Flr*RgT>ewgTG%*}_#dWoT=1?*>WUd(F$H+TSf_J*7W6 z7#}F_-v0JfTqdDv?L{12*k=D{RH@KopMmmA1^PlJ78(!U3j!sCrCIa?B^-e=;r}5e zT)l^iPgayk;V%B%@W?{7yS3%4ign6cQ#Yd>*tA+pj*hWmh;OGS`&Idy#Z?VX-su0u z-g}2r-Nt|55fO#TC?Ta`YZ%!p6v;Z)vCBSIg{+83k&)3bvX6PJIAo7xRKl^hO4%fP z{@x$zd3wIr_xfGGe}C8Y{io}>p5lDY{kiY={T{F3i80UY0DmY|g^@=A^4pfQs!nn@ zC{j_*xR)|hi&gI{qh8g}ZEB3tT_T``)0@hCA6CvNUN+r-L$Wjdi%vM9U3sMU;S*bO z-8SR8m3lZhGh!LLz_pzf{<}ye`LbKRe#mM@V`_~Ul;4}>wG3Dj&6``ppy*(3)jzthsxDk9w%qwALyL3ZoaQp=f5eA)CBGuq$G z`X=KW(|QeuLRq=WbxbkL^OjZv2%V7I{F!t8*&lG#@7>ub|6KLgaIuL3D3qPhC7$yq zc&T6NF255J*zAn;-lnq4JVXm2jqDdEg~h@HqxBW!xz{bP`1{b4@?tcv?{!vTFZda< zZhuK*Wv=p_kYSciSMsedLzIW|pB~7<ch)A;ZJX`9 zqMYPV+eZls+ZE57vC)!;hbm7Wa<=*uZG0?tOjeS%^>fY1z>3s0iI{G+B)``b+;+1k zR3`E4sGq6ygysFZHR3uF+sX~<5`Qq9yH~w~G&DK<5Do^EFvsA2jF3KuwUTl0^_6+L zWUkfsvH8YEX=N$BO&7nWXREARP=%$PxzSnrf)nQEjH`)>x5~}Ud1QWk<~MDf5$D1l zEah{E`N~=IV)F+%9W|Ob>Z!+;tuh_V|k0G;b znf8W$DEY+#`5^g_F(nP(&6x8>&%0gsUeXIo9-_@UK;dCDay{hA2}}O4;8B&y6p4k9 z7u=?;C~L-qFCq9=T8AH6B(m;j*KTYkJKe0H>Ii~zYIeUb_h?^H!GV7>aPK>i&v4Wm zrYP@xQ#j-iCaI627EDi_CJ**#IXmKX#q&A|-?BVw5BCK{8OmaRCBc&)?4QVu&?T=B zLQ^sx$+wAx=>*mdVb99Y$F2{*BY);-T6(7D5bjMrdtM%3*$Ygn6Y*SW`}bXK48H&B z)u;Vgf#L9^o{YDXbFHNX=xnUbi%{2%oKG- z!1(rG)2>SkA$qdsq=%ids{sEY8($Rzkc+`CwJ~1Q)+9km`f?V!-86{sq|6IgATk9~>s)t+OX`|}n(|+l6wMqo#)@fnHrP$X#VpXSN$E9Y9>0VMtF4A|tjB!GDv|GAI=PYcsDqeuP#i4R#vdfp@5r7Hk^MutO5aXo zN3e5gZ-7Yzh2}%}$5bv;|3L%GK}pIXB7ANKPSJpn&yEW$Aw~KZ1W#Bgk@Wueg@J^I zV(626xid>}M)C!DBwv`|&OmDzGL*;xC3xH8X^Y|G&=u?rv4CL1d!vX#F4+wYnE2=s|8ju)% zR|a_l-ZrrG`D(XLAP?mU5A}2MlJyQ%W;dEW_yGTpcUi&~X^r1R_!LRai5bX=pn|^; z9?s+X>oLe_fmV_-_AQ16IYz9<;d5--yUCD;`oE?Azomvv^MAYg{{qPW4+98=E}+mL zAkL6GPWN^!$o&^jp$MgGN`$+A$qud!MIi=Kcv!w&jO8k&4FH<))A+u_`4L{)gq@&^ zL`Ni?f~)P}b>3v6zb=X05WO3Fkz`mEA-D-7Gz<}tRfGQr zVn_8k@`F7%{(tcYta{$SpXme4+7u9A@E|*H>V9--cfQZ6MeMb;ntFe}@to#F**G@3 z*KHE_LPNA-V_pfwbcQ%XTH~en>fIY~x+`*4)PgtuP)Q&?Pey+pFrKt3JvTp6qs-l} zIIRN3!*-;`FA~=!kcD}BR&n1oi`D%eAZ8mGga%Us93UG;yBfp_AjzP9%(T-+ z*yF8EKeuf>Xq|~ud>hq%&^zqVOA)7xf!p=W&Y)h628xX$))4g@G9*KR@O3&toS8GV zrM3XE)mSj*=}X*vNIec53yadJd?md9he=PsmF1FB((4eg%D>$D)y29+XUlHMK01(| zg$6PW&VHyOMW-XNlOy8;*$EG00KH^q=>g`#2G|K!lNk=*5#1ot=c}A*lQB8JBAag0 z65Hq1(Wj_0pPTpzx>>pa zQupf*U3scTNarNhr9=>CAeCwg0(zyfx8EPrJ>mbGJl8gKt(&Rb_7sJkY@1WQq^KR) zAQ^)G(Ll!i^I+xnmOFgRdf78)pk*jM>B5&~C^Q@{mjsVmy}UEK`9L1+Q330Sa#+(& z0QK1i?T6!l;+q?|H`LQ(5ogP|nL;M}n}DZJbCoVXEAKs{^J0g#9>~GHxyfD_DyP9S zUR@-gJwcSn;t9>VjzY6MjrOWjP#y7=1hI(sN`T?X=c`=|CzZax2$YBeT1$aY(5%28HQV_ZBMT?JC)WD$d08TZo`|!?@@$RP3cO8us1X$B#|Jyf<6CGUm>G&_?v`X=S z=EVbqy$^tt0Zxj|yZjfaAc|xPT~xY0_8?bb=qT)2JkU0^&`0vy$IP_9p5wa$O(cfT zAf0a32Wix#Id0SuAazbkVWHq3{l+#X1!JHrAw*E^~Qs%kGHFGZ}_!N*2`D@`RHztj?=LbIy z89zK&R(N({-*Es-Yta|~Y8JHx(D|TTjic@Y4mwhK=w=|#Q~+0Y zuBNtuz(o+nH{}%MJ_4!bhU4jzN}f!2g<*)|C=MT}WwRUTq;$#qfpa5h)pokixzBbQ z_%$4Xg018v^E~H4yw)AS{4WjphH{y9biU$ENyGfO(Mi{-pFSK-OA5}Hr4l}UM-+6d zH1bSdA^CGE+kV~ouP;BSA)e;&N{enP?Z)3G`Qehu>^Go?hXx`c5&(Ilom**0=FgnJ zzvgJ)y1tiMtR)zfQ9YQSfBl_&=My)|Af&0T?6=<;qebu8nUw>c)`?IM6>W>xO~!v? z%=Z8wAT|e3ZLUV>kefjBCnFp>xSF{$YQUzo;gz%2z>h=ePI_kHr0SqQTS#p1Kv1b!%Q9Z}SQmpdUJQEtvZpY7dq# z8aVe_rCKXAO(v|uH_;xAnanP=`)2i&fm_Ge=>DQ+3Mc-j{RJlCgf z%NIM9CbE8wd5U;#dFTPh@t%=p`R3&NZHtHd4%$R&YF+Eo7RZC@>RG!$RcZmV7TJt9U`&T5Url094+XKph15}p&sNsz#gzb2oO``S6es7^5PHh-i>6mc6A(>5K=tC5S^#}t z04v%LzgmDG+u|rkEqSlcR%jBaFAQa;xr7E?l9pAw_<9w*UL;d`&@POLwC}}K7T68+ zB3jPb=#DM1j6fM|^+PgkmP}9p7jzh|YTEW(8&93~CS!SwdU2`wa;-DO!mE8}``E2x z>6V@{3Fpt#xVd=N>_2x!WEPQ;Pf`fU?q7Pam+E|wjgsa<>91QM;#$YdweE!>;V!|S zWkUR;ir}I#gw@P`dv#`|gjQvt0oru&>LA+u7&s-7L+V%W)id8qE{_|3GY3L(S~IEZ zaoD3;-5Lh_z3?g`7W*u77Y8nX!&i| zj~+hjNx*q#*v{5cOWLT2{k#%sn^EfN7%Q>*E!^<&>ui48-u4nF;x}(U{lNlEnp0>i z;h>F#4pBwQZP*0?r{jyX7V7g59=I)lMS0s^2_Ga*;fojQSRl(~IdzbcKW%~QI^eUB zl&?XJRkID^W?IT+0U994DJ@fqlf{akH->rmf;5_&&Q|@tmyXpth)bfks z%HkRZjr^u>A!?Pio~llKAVcsOA0;a<&lY+u5cy0V?W>?OMr7--#>cFnaXSc<1Vxis z=GO@#nX87XxlnKfrL{B!7{48PbiAxezFKzfbrOvo1hnjal*u5H z_t7QECk365thh4-ySP92@|VXE_3GsUpPVl{wm;k&z8W$|R1jBVqi5MjykBggMRstm|2p&SR{Sa>La`yyzb|T^5>bHBg0T*pM|jZnRzb!?4)Cg8VxP_j zvOFFF0*@hUc$RM}XPZDW=2{mjm%UqEijL__g8zsc`|FUy0=ZX(!p`Wtkm2()+D<9Wd{c5V=#IRPuP*KE{jCufXDg&#a<&yHbtcJla zearESnk=Fy(90GgrtT-5q)q!8%UP1MzI<#zW0RvP%&(Q=m!naa)A%M`R#08X zpNkg!7&jytsoWkaN4Op9)TxX0T9d=rSx)M+`sShYKgXz564%l*g!|KRTbInChYDP#Zta)AF3CZPFnn&sO63 zHNy(}%HRo^hA@wk+~J3Jf-VvS3=M|TR0JrlB^}vFS1qQ!XxjSLvGS(^)A(I% z>m)D&Zn%|mUWoyDTt!I@y+Wgm!tk1t5grR~my|BdxHryBi+E+MC;QME-BXK);>)yOfs>-Di7oH18b zza9NuAm~fU`U!m0vk%Y|N*4P5DS3_D^mM$HoT1AIX(xBn)u3IahA*5nXH2gN*uG?W zifPI+J!1{NZ#oO345VD>c9D*&kB8s#JH3<_B~b{DfeQRumj#5oR#~wi^0^OS@G{P0 zc_PJEfj?PKOswg`n}M^|FXN4`%6nd0&gIvMn;VO8)Zn-e`U=kDDKE5cq$J;Bc4}9{ zju)V6sPS|QmdwwG+x2?fR@UX5PMYcpo3<9XP+Z+;%QsUZT%B`7^jq@aObs0;#kQ(Z zd;z(Nf;KxSLu$G(P9y2^_pGeN=z^vwf_ovYixUz@7ms-+yVpS``)%`T(tJ&lNx^9c-1ZGNfaAea{gvoZeBJ1$M{bCq2pH1iKhG-k7{TA`lRSo^s}v8X47Dki|a zD#NErwtKHCN=bQrW4COLJ{d)24D571Z?qaIiS>=D?IdXp7E48%o#xCQ;!$o`N40f< z@<8lC%HXM%2w`mlC(Smi+QdiPQ6=Y!wG^E&j;ANdl$BEiPHZIlzjj+W#tUHv%CDa+ zr`AV;2A#X2$I}PV)Nx8!w}y$FukjoO3&ui5N#`%VWl4IpAAL@mO@UuLrh;EYF3BM= z0<5t*X}MR;Ad}2Ha^rGBuR*PS^<;G^%hm+WiIlUV{NqBr9`*b$-NX`5cV)`=RbWF2 zEo$d}cARY?r}8uFxK9svE#lTP_}Hm38Lhz1be7|)`q6Qfw}ntVx>m~+Ti!qGSLvzd zl16huXLl(>)^3_Pom6tconB1A%JoAEL-4#(3j?05wr#Gqf#Y;K;}T4ZEHU^~uKh5= z2zkl0+u!jnrYu|P#W;hHi;EZQek{i$czSZ zwKNl=%~NScBDYS&oa)?+%URDyj9Eii7cn+GO!Vmeq6*GDbUwrDYMG65O-M$ zG;C|Fou3miSCgT~d1G5CJzE=|441H~^)EVmIJEo92U$*ho{KsQ8*t=2M^nR72ZKVx zS`gvps=Pnq$Iwe9Iz^Lm%@sM%q2t#^v%%YsXN;*@Knq?X-)uFeB=OK8iX`=VU!e`4~r15|_r%2UP^1!Y;z`7bZwddeX2_GoQPxo}{x}Zo2F|kcp_jr*4)( z@e(Zj{pjoZC#$l6P64AmpdNQK&- zOs7i=!}>-etb&?G^lYa4jqWj)XRA$lwif2B2W;`Gs#Q6(NRzp09I<{^YE(n8#n3*N z4_uMML|ATy8gq2s=j$oI4=kc*J~<8zU;M9BtND9I^T!&&ho17{r<39kJ2-fvR&??g zXe0HxcLiZ7P&%3!exWHS_eXBKOls9y;x#kZt&LS@AcRKICnm8DCstiM7yC`w0HI{Q z?e^FK)UXbRN?}h1T81efr++;u3F~F}Aqh%NtX$e99`v@a=_vJ4UnCKtI-PmtSH z%v7z?^3&lUG0I?@%*Nn3%FAOh|9`kJN%~QA=FRrDuBnU%5VHi?wBApFnpIdu;l+%{tF4&OGa46mgpYuAeTb<16*gY3 zGGbi%E@)#hTM2)UHFJELX5vGY z!BNG(`7A4}JdZ^p4||VLYo+`NyU4WxdopqH0&N(NPHueDh=5ZlR{TRIlf{v| z`7~YSJUsM_SeP7vWSnP7KY+!-dLsGe z3s}s-vC5f}$qW*U>Jq!$>V1Q#jOrZtfsgKUS^mjxFQ*WY6==p*foB`uVcvK*XIWs)?*O&1kE4ubTC;LXk#D5mMmiyZ|D0$@1)9bc5hb(0UkU^ltHwvZmPk4tk-HEZ3lLiRbB9un4;XJ>J*4%`h<%I}z%qqnRlUAe$$cO}qaJHj-UUdRN@89isfynFVn=RJ%nDn*U0G|h3 z#2^^;1_;E>>tF3*W^^~Dr(j1^ze?KMMwVW!9ArPZlyR=mkLY=(z;cYkv~`R$i1DWq zQ~_K@3r~9K+`g_s@AgvA<2j{#xh%L|YcTmMlNYYwG&pP-1g^KVi^I$!Q{AcDawk{d z)7KQDQ(ZA}f=oS$oVIB-$(;X`JmRf1C`mUwwgw->Jb#0+BL$kdFxTmBJQv5*=5R_ds1XYP@&`25==kJo4z5Aotz{)XVs{$o;$Z)t!2hxiSibv-tG!}J%2me+G zU0a#z+oho%n*7YeQ!O{qs2-rR@~{%KJUnk3XME zL=CTlp_pq|u;Es_6D{w^FjrfhK|D0~DM&t;>^4Q%&v{?C_1@*K-6Zw#O?5fxDmtQV z6X}=}8O3edyX(g)G?#&mt3@$fgNfGx4g0Kp_(9 zcoVdu%-80TsQ?N%bE|!IZ~87qsLzk3(1*?Y4LgthN{upHy*mc zovq!eVNwIS7U3KLqi)wcjesLsmjW;QEhp}irBZ8c!bJMw#3)kNDXj9ag!Ku5f5 zL3c?>knkvFeb}1w!T^DRS6}CC`pgmND5eUX(Nl(0Dr$zP8KE4&to}NjZtVBIL6HBz z-J2b}kS@@1g;j`XTl%BK`jp1#h>D$pnf>p4TZQ#Ss2r}fRzrEJwV`Z-CQ!VE7ie$rS)7IIFYX7XL=ilMm1fb8@i+Z)X z1W{eJ46%~IknD1%LZh@N`a6f>{v7)VZd0}5Iq)~s^t39_h!8j+AS2Bd z?nC{-9qug=gK${w>gv*bf_o|Z4|CTef? zFGK#_U;`5i!|p@e=`)MH^4=d4!kjWTlE6|t5p=H-wLE-br|?sJNn?2HRJi}VbrQ=_vLSO7h5+A( z7lNL*F1*_kabY9^l&pkmN5j!Y-3momSS}Mp9<%i zEFLnC23FNh;;rM3&{{qJY4XNDi#&fQzd;^DYl-tgozZNVgCS)P(u5oJE-{?zftCy* z%%4HOiwda^M{C!HiF_(QPpprBU!czUkVxha8Y4mc;z9I;&h{kF)~obEzE3VLYW2!n z6iY{(KPfiQCD>xH44Rq`b(bTP=bYW8RmYSJsi>B@^!7<7E7(VKSRE3!k&&3%Bc_3CLgR2VKN*9 zn3~5*+S4THTrsABGsn^up`TGlp+aC&UR$kIgw>w=A$5q=1@VC+PPims*y44_{@mbp*W_cLHuS z>5A+56NB<+-KrG{6`I^ZqdIHMbD|fAf?B~N$av&7k3uAu#BJL0Ui8c zlEF`s(J9`)yDY8ZFQI@Zb4kDEeoBk}Y5 z2Sj^q3OlrvCK4u>W=dzX=V|ngp9Pv&==id!mx+t*Jpd`VciB3Pg0@cq$aQHN@}Ylx zh3TxW)w_qm2@g0|S`1H`m4veQeO7XC49dn0lXLo=X_T1lm1m254pMw=R!&1DFi0n8 zrM4`W-}K|Q9s{j5TgFB@?jJFCgBi@plw7VPWY-7smNYE%WM}(6oa8C$Z*xDZMgdg< zt(&*VZX=xL64BIm4Jlh2`;{VMg=&O*^GDF2 zYHQjdQnCuy48K{r2FvNgmFzLywnYP3pahA1He8+fxsX`+KIy}7l*YQ%&|JG}2^o8V z`f$Q6%OMZCLIZ-H-v{$(+wY`#=>a|`NEJpB$0?H9bCB1GG@8_shQNZEphz_kILfG> zqrm3tD7gU+$XrEwFv=)9k1`+Oc+XGw4N-=Q!x^`V7?+T}kU43};b$06^}BJXD&>`C zIimWhG*jp$JDj!$+x~9t97;_c-p)HkVnmUYge?6v0|+jHn9i)omNwkA#34R9OOn)Q z^VEs{j&5T)bzvHt`qX9wGJy+P)l~mFII!H7*}+L|Dpo3Q-oCz#Wv29 zR56O4{W7CPwGZiFZO8DKzbV1j?_e$UFDHq`aGw~5^l`{V?bMaS!Pqw0UnbE;2}(@) zo8xXOORmonh->1!w%#f}NXb=W3oVE*mNd}H2$h3J@~&39=2?misAqp=d>#LVojiLq z!b+4>LPR^`nP~|F`?u>_v}522C1{^iPAh1I0+LI+tLVL%DbkGeCeXbrUzaVmw8p2s zk)WNg(Eb=Iw+~HFPXdiET@DvQ+B@UcsGO)%ZE=a8wqLDjn!F_z7&yl-qTXd$jACdu zTkmNy1KflDLidg)9>`{sF4Tu_>9n8FaXT^rDp^iC53w)aJdoroq`x}=sbbUH^nx9E zxhfNH4}WgAw2Y#swUCxk>b8@*tzgxd8fBtT2Fb1LZjV!Q*$i46$o-_Mi^eza!wiD3 z?+jN)$VAGT91kN;yqi_)%!FdPFL%Z*7qxjuTg}JGL*e`)vXnZQ65h%&qDuMOX=5js zKtff=Y%K!?i~V^OnzQbN7LJDbz@?6YTO%AD*3jPBPmyuWfjcqMA{9c6xw(2S@)wdV}aj`8w3Sd6|Lb0OtI(AeeQh2_^FB@GJ6) zDXMm1KVEv9%E^SPO0=L!#~%3DNBXSVn_Tx|)lH4vyon}_Gu|kGw!l&+cgv;yI43Ph zlX8`pD9?n?)6q`P-kn+b@FS@TS^D%d$LS88G5uL5F#WQIOquS82UqX*Tb5j_feWTQwcFQ0>Ixvj;wZ}mZHTs|JnKl%dlL5-=y6U5Ws}qMu;)05`lNu9M~4FD%^26^&DaW~fdw-)-sjiB4_JeZh{b*Cf;)`o z!X#QcuO7xl-~M=^98rR9YR5Q)N5XS zpOc2k4^w(9A>0m*aaE2nb}S;jTkX=+Eqba!YAWGxH1?+2f@g-7%Yht4n`Z4BX4tS0 zC_m5flrm(DU`h8DGf_P}R-7BVCZ+Lkydr}pvmJ0X>5JrxyD1W@Iq_CZ?V7BT{jt$s z%84)4R0l57wa6a*G+nVg=XAIPzCh$t>{uImznXkDN?Who zFS}^ei7H62RlB|JyMUqvp!x=11Sg1Cxg`U1<@ETeo;uA;H@pqYM~wA3@ypMj)v$|a zb%-3_YBxz)pm)XNdx}l(@cKP0ael#TFrgfsg1Lg(Fgr~C=<^FqtP{=?0nViNEexZ( z$~vE<+g-VK*0%Gf)4+<6+i>>htX^FDM!tARaOUSqiF@d9x#vABT;WUY?RdYk*$z~6 zhG>pqe>?fr?UL3?u5a_}=;}fVSrA~-RV=hf`z<_u;qXyO&$1$#*g8gS}7=nJ|-kyq}jB{0NE>V4->OCN$-h*{Q}06n#)6y1V9xJ{c=lg&8)$t=A(Q$IJpYOhUh-Z%+Jxy-P?(qGUMK2&?Lhld}4p7(s!*7&Q zBZXpC7}O;7M}-S57vN;TXer`LG_s%x7aZ8K0Z-5G0zK7)0hp8GLGAd3(i=;%-<*NVr{dFV_&-G$L0j$0WP5rz z8%|~5cfdCkp6mU|+=he4_5-i3n*y+YAQDv*d*H~;i_7l?}KuN{nkq)F0l0SbO8 z%jIX4qaqe>`Z|`25(9#Co}L@|G@SEv?+kw#2^s-Y+N*svHCj9N=k;jOw?y5+WaT~T zNY|(z^jx<+4roOy3RI=O*XAO=$2?kCfkENL<<6eBlwcnRINdKBXZGyiarF^(%07Tw zPJ`LN8+erd3cy3IgW6(%-u~tt9aB|$;>oNx!3-7rmMv8v_iH9PT@c;@V%gk~((R51 z8sv;y;-v)v8kOwdqQusve^;5;KV-UM=jGjU_g1}4k`MHw#>2q_J3o*}iPVOMxfj7a zaTYY#EV(Qq-iaNr z+JbIiHY%a86aArC_y-dP5MUzq$wdR7c zI-Ax;K7y$P9x~s4cWC^cF(rv2YB(uGV!rcM z54gM{5Jaw4bcr6WE|5Wc-zr+Po-f+2B;6KT;D*C8m%q;}O793)b&)JAcfsXWto}x* z+j#FJ=x^Bqm^S;F%XAR(Y*XLrmU_!nE-Z(#n`0|kI#No1<|Zc_G5dKmDiarXi2JlF+yg@s!y&WeD= zH=d2|=FH3r^S|`&7tPKq|LW?biXpiJ$|8-QLdtiFs>8%aUYmvD(ApSu6yZbwZ-b0l z#fe_2#a!`Zv9R^Q3B(dSf_-M-KX2F3PL3jD5dV5({Rax(op%ytY=&lI-Z-0+6au+1~5AF z^4wXVdwV0QWh{~u#JH_NGx72A1GlONL|ULB`I6>wY?avy1M}_G&t7F#wZ)Zn+3Qn) zJi9Ouh@7mPd1$l+GT<#E&ipxjzs~?7jie{M3l>j1pv`~4fU;|SBw#DnM@nZH`hJDb zTGm>G8jqndE~&ZHe^-Ui=C+fk8}EVN z$-*vTN*O*vXjf}KhIR#dd$urXS%Qn&#r`g)J%E@#=wdf#DBBF zzVIMEKJG*xper1EJZR%NlIKN5F$lzR>Ut360n z2fDBYLTldYEX3$cvyGLXc#qXPqQ4tEJRy%-DgOR?>;Z7-)LshjM~DI}#E46+0mLuE zTMMk#NLva1eLx}wdpulKZK*0JkaMf{`Ew&pgVr#N2XX;eQy-0<2$iL3Tsl9c?L!Qx z$V)+}HE17X{YD_ZFy;Y?#J~1S1mZKnu|(m7i~tl>r7%`c83md$d!C$oZ3*)1oYiRc zlwxuu^$@h{zYJKv=LeZCx4;bO#ET3K6@OBGTswTUB?Ln}%)hHmp3K{n$ag~+GgyMz zEE*crwsXVPqdltV|JZl9f;v|t*EuqTSkR;)MICDhsH;qaQv9+rAK_u7g(O3%CNc&A zWzI%MA-gG3o=hAbotIg)O4D#vYEsR>u~n`Ugb+oS`|n-?Vf3$T4v=U?$>Xz1XG#Xg zJ^_O8ed>HWEi%?X;so+X056FCINsUO1%9VA3` zJO&s5Z;HnFNxjt>TJAf*oC^=wHhg=g;^rNiV#toBVY;{+eBQE!b#~kPH(ciT=div) zL?B6b8m@}oa7vrER=!eW`tZ0#5EkShndgo7!}=dMb`$Qk{Jm6{t4bWl!c+XA(BpXOaNdQk4%BySzmSf_glf(2Jyd8OkhYu zq$gEAYpZ0Y)c&N8F$5Y%`J%2M0IGcA!jG_q233J5zA4ftGyY>e;|ID&90lj=cNmLj&zpZd8ZG9U4@%i?&fxp=E^7%T z)JzN)cbPa?$K!#P;wBx~Ssu@-YE6uoA}GgPZVd)i*glv9DdF7OQ>BOj;GZS_>*FPS zJ&Suc-1LU=Zya>~+9@=T{}!4fJ>0T)24IXWqH7@HX^gD>q}u9RzWD5`oA6_0~d>Z9;@>lLzPKdNDbg z&7xlIY6=M0*Y5xbTE#EwLRbD$9{^0s{9nLLi+{gxb^#ZAFM`3ztJq1kDD3St4C^N{Ks2jej9AaHT$!}nTe zMr9xVp%AZb3>Oc+i8Is1gtbn52Qe-G^r#5{weVme1v;*a{j%|Oe?DU;XM;SorVhbd zT;?5CIf8GMKkgbB)CHN}?L$#UoW!7W9R?u%TX@(_Q7qP75S&mM^5RSk#W1h1ZB3Lj zl){ls(=L-B8L>Nnmdl^TG2T|#t&4s~i+iwe{nPGf1fA+a4E&jtPtii3A*`4K_)CG* zJM6FvaE_)TwGH!Z@jdThM69Rolyq^1q95o;(2(J_@682-cPZtF!v_fA+V)YOps(2r zoQF~66aW1vA>^Z`?x7K}FBf7B?Daf|KLkkK%wdu$3m-*0#}G9LRmU_yoOv5X)H#Ys zK#hNPKoUVX10X&{53ErliA(l5@=QVa>V*5u^zU6zwd0sA`q~@;TOtfD1L)Nwd;vYc z6=y-(17{_S7*e>TgUllB&$)GP8k9O0g5L0RQ}Of&QQ|F>vp=)*%V~ITGyE~nwGQb3 zIA6%DADI;fQFKsJ{wD-FfSz<+xkM`R${w7HU;S1OK=oR zmD0>n{|O5LK$cfOV74QT9`W)43)R+KE%jh+HSD|RI@4)X#2u7>_7{eL4FbLsB`=Bf z733~^gOo9Vk4Lfn_>r7)=1*Wh$RgHF>HQm?w3{IggTGp!=zV1+BrR1zJcRH_CJgL> zbD~h`sY;h;d z8yF-zPvzD30s-J9ArJ8n|92050b9(&Ia`T%cz06G#D4$|o#0Q0AoQE-zwRM#_eJb= z%HolMwKI^epvuEpCo;wmOo)`JQ>+sCFNQv&q%tCZg5`41n8Pw0Z3ZoBseeBM(%q8~ zfBZ$Fu?SQC@Xlcewi7np4l;-X6vTOSEB^I7VGH{GqFvy3#<-cTv@AmjoB5x@EM#P(x_lyoy`#8|@~IrM)h z6F#p4AO5@dV7QH~o%sz=~6zNQn3Q4;R!9KBKtxa{)rDn2+8PxRvAP=Y<6-Eu6MV|HR~^wQ2{emb7C#%q0As1(vm9FRmWWC&}@#k&#V9>&Ly zE)RJXfw!X$kT6fm#>W$TZT$3)z1u8|G#&aNw=`_IdwXtWhZ(j6v^f`zJ4o1-*zpBH z?uOsx5cn)Abvo{tF}+hl++qgxy3nOxpoo5|J(|u#e2CkFXf!HtuwPv&*)vNW8cn32LWiB+ARbcB4 zSm&ONDM67Sj3rU%qhzTi$vZ=V5UE(0_NQ^Qvy!x`K)ZfW%!D}mvY#R1c`pitKl_aH z%E#oT0UL<4WeBJnVD3?9H&CJrqV6BwzAeB#BH_QO$d*cVd3etx5K9hr_z<2)RaJTK zwe8T}P*|FL2b%OtmMy?g7SLv1jiqVGtb#s( zEf5|CFTiQOj*t$2xp`1Ms)BfL2^`tLLMy_1VAqk76Thya)7Cc$WZ=WYXuB7U!TJ4$ zBJb|Mh_~>%UfnzY2`O|uydf{|h5oTE?!LAOa)qFa#f z#8?UiaR*l+4Fut)ng}PyZ`MT1;`?K7_$}1VO};qY*Cp8RAX_?- zk=Y_$bp#>$A~`*8$=Inrpu?&CofvsvL?lvvQDI%-3qIg;cz?{oWjo^;H!X?&kz!~$ zO+BQR$c087LVGsANKEs@R{{3V8ol=_;Smm!K2otG{=tD&K%_KAA`E2DpP<q*h%TD*3Bk4&Z~dkZs{|D!43J&)8dvs)>M`Lk3CQbFw{{w{u?OZF%96J3)^bRwcM zPiYg01H8j&;1<03Wqs=UEi*WO>ihm!PVK(r1}O-jd$|P>n}si+Ky&!VVA9Mg?Of{o z&(zPin1ehfO73L=No-)IeA#flUUJI>xo_O}6u6v$+mZM0e#2in)0Yt~8;2i+g-8-4 z${|j0(~wjksiZGW19~S(fR5c03HwjY5K>Vaq&(oNh?6EuhlpQYSY(;{bC)L>U1|=NhfOoH_ZR12&KNfB$m}JdhHB z|K}*PfoRjQBV^-?<5#1eX?r6ErT{_Y)q`Ty7U~e_DTJ4?#65g_(xHw{yBh|c9nuAGiA7Xcev^W_G0FWs1 z_+;p_DsX+YMed&h6BuBBwlostMOt<9lN#v1yc?vP;G658K-Pc_{wo-~ojxiK54z75 zBJb3>|F(iCRJm`!=kYDq9C)JiR&H=iuK>PDJV$DffDNPK|O(i~8-`S32p+9YgzACPP<=6Ll0EW`xBxNiW$^YBYCe8H~_ zdp;r8_3r+2xByzw4QXQ z7k|n808#AMF7F`^*nuSfL@TgM|5xe!pB>nMIDz27$jYq`SQS0^4lO;o!50Sy=vH7| zrvb{I3v@jMyf_6n*z&(ZQ@Z}3+vbw~8I&Cf6}kKo8Dah%I#S zFDtiBVx%epn2YN=!Y{@H<=XT7Y+J{^KNQnd{a~cy1wp1Bx$1fd5?eMxG#KQw)M4Ye02UG!J z-U@D@A{P<{UBo>^7Vsrwrrj8!Uy_Y%qt34d zMWMu7vh7eh1|yGW>EDEWiR++!*6 z@0uY3;>}LN2Vu>$;R*GpU3(4KiE!l!fPe&dMJUoYr=VZX(Vl+U?mEezVaPq6v8ecB z=rVmVuOwLi*(rO7{%Q&Z!$V!}9N{lw-x=#5jT1O4QfA41w*w0}TRGi3-MGFBo#R1RMW9tn<6un@%DdvmB6?#3J z5y8i`IPEb(Xgu!-VF32aQKx1adnCVjV7q|j-#sl3m(lWC3_aLUCcz2>JBxkd4M6-L zr;+YOkYI1aJ}M?lr-^osCRgEWRy#K}B8mh3ktay4fdJCu|7svbifo)Okk--Gzj{;5lM9nqg6tB6MnokD znPuBOyxhHpr0fuWN0pCvf;A}_spUmTVt#MIozwikMMO1HBw9oFIrH8P9?xvK7LFNm zF}$4T;2$gi2nHZhC6i$PiPQ(PRrSUguu;F%T?eIe*8 zlqYt1AL)GhpmVf!O$>Z#^I&EWZ_Zh*i-GGPMSfNIZ|%sx2Ugbd@b~8Z!kMMSU+Y7P zI5e;>8uC^X>;cD#0oq?UQtRIbz&mrWo_0)XEy#z-6D0%5KL=+I@&ESKgZB5s8(yj= zJD~&k^eg~t3TRQ^&=>QDv2Gty2f^4i1*y^NL@!uy9z!|yzL_0s^_{o~&6z0xSQUb1ju4kG*!p%ue<7-)VhW}24XAJe+=bls0mWQ7 zW%^-@6W_~YLY0j++e{Z9KduW`l;ZPVrRTemkr z%R!f3ky>IU9?^E{1W2WQP%u;iJpewpadt5b_P4#se+%rJ*WqPN3jLp1ndI=hVJPIIYVw`6Bd#FUlSdlNP`2XRHwxtAq zdU@et=ouwz1{wMwbKPQx1cm~byq$mqzP=~BbS5h0XsH|>e4V?%@)LxzwGSrlQ!8IX z&Y3>xRWr+HhOU9*=}&)dSJ+ytNaN=UyCBj?_<}^a6_)k_DVPO;OF>u(lk9VU7Pp2Q zlADeySBL^DPzBDwcR`y)Y9et2scN`8x_JVm@+6~y>%1zt)J)Rfl()W}>DqGID!wi2 zz$e9LhcD8eN;Z1DFPX?n>hLK>P3(bd=8gR)PNqoPM_i4PRpWd{Yqe!G)K~hsRMg_L z#q8as!Os>3+Apk!K5u3%>Ml1s4Gg{M%s(mnNT<-cqkkjT_<68}M3{%KUeW~5eIfM@ z8eMgPyDJBY$ffoW(>&Ni!n()1o18WO-TR&MXxz=Z0K09#T?!`t*koY7>`=9Np`~~p- z{r^8+(m=}*nkw_kEF+PXQDj6M9J6$c>^-s~$|xM0jFQS8hhv5^%9gz;TcJ>7{~piY zSMSU9`TX(w{`E!7ej^4BJ&K)8l?jqKrLYUA`Voe-iyIM%m-bh>+FLn+v{4 zd8zAv2@16_2$&j@39_4(@r`LJjMZ5rLuop(lOlg0NI>{7^;9AC^ZqMgSp^0C+ zvkMbFR_o%s;k@&rJT)S|E_z9!+Npg~His`;=sZ-~La5z)e+pq++!W;eK0QaUGU&(@ zhR-!I-h;6Ou|z?i>c+IZYeRKb-k$j9ZtvX39J_;2K8Yev9Nl~7v-cqbUmBftU0_f{ zxdv#FIiNOk6?+dcJpRQ3-LGxb{5x}i7U#U@5zz(um7h6=B?FdFm!>fq^aMgQIpqQE zs4{>%yV9^6%WJ?PRV6r&P#}U9x}e#dOnZWu$GUU-_c`SPr*C>15f>c3s3BT`$l1x_ z0qTHRG}=Pt+dWwC^qG@;geh#_JfK6-eLO4L@M>7V>Q##KQ_BhPQRO#dW<^;HO0vBb zo}TYtEKH*QXNI}lex|3@JrV~wHg_!_O*qLugT6_rr`zmd100!n+A)61du4I5H17v6 zy?fl0$leQ0#*Jz+z|quamzTz6g{FJ+URw;k>BX8i0_qjd;u%(pPLnJtW+wE2Z%Hvi z9J=$8Q9>^~5qy-6t^bo_eD4E}oWs~RNP7zY9Fc8ygOxSVtJa@GwwsdthhS&38?(z} zhbAx%4G8_%a#Hj301bh`Aa8eq6uwU2$D5;nFaO_fLb2oy0*gk@;!X6Rgl@ox@0W+QQOkH`9QY+iZ$BgNDYp3s z2#<|B+f1&-r0@Fk%XnY%%G2zTN{y9gE%tI*R4XT}-dwmP@bFtD&K!af=h%1HKWC^% z^9Br`lLm@XjV(Tm?-i^evJKMwX%p5E}t>5=f~G> zo;&j6G7)_BzC+WV&odWUu9$$i`bxT~n^0S^s-eF@J!ckPY%ef+wv$59a>^<{y{l=l zxjvT{F?29TjXyIMBMd99&`rlgeN-=ZbJ4h95A@9I^_bw52F&LNs|*ks#VfxlP?ZAA zH}zcT$%ezYP^Lr9JGa-5IL7<6p{mI)htPu7VMPm?JcV6?E6VLvqIm`IzXPX(fzQ@( zr><9%!=(Hqq%lDUIIHDY$1iUHcu$r?l*c`N`nI&ADiPHXb~6wQ*m4XvQ7WdQHp%$L zrO)K;zGZz@wA?y9kbrdkGKhzC%y=k-FwSbY3yw2A&mtI2Kfe^+gfr!rO-a0^uxZXh z-)mhF^)^Y}I4v2q7=dyToCfqM0+HXy^szk95tGmWG;0T>SxKU`MqmzDD~i*eLPyEL zG+-^M?8ax6B`mY)7moty9EEOXF{}9nYI(`&%+tqs`*YS$gKbRL!Dk^UdP9-12Uvtb zeHkXA?EXujh0zu3EVyng&FPLkv!Fkz=uKF7;b?$BWKrl!uMk#jllh<+$N0p+n=xEd z;lAd$2ERndp#?^$&te)f;q1EJSj!cvD`{!1xQ=6OA~Ylh)f1OV!RN0>8Y+XeptJF#6;YEFK?lGSP>&ZW;k7qS2|Lyo~~T- zPsN9acV>T7rGE^cZJrv{tlScx%m@45OylV~a9m zvo9KKNI`Acx45d@=zo<@Vkw?cIa83wC}y_l-J51v?k=(R1cmVn8}SJeMp6r6@lAudRyW&J(t!P`xEB>JNprc!)G?~ z)+%Pg*dd?T1EYyd{ys~`o>?tX)>o558 zU~KUi8#NY8^T_;Q$K@o^#k5Ak{g-xby@pY{?fGb%-7b~Suj&kk0Z$|<^`@g7GvGEH z=v>CewsZwwM^XP~*Y-+MCk4gB#ny`rZLXN>@Zq`h;EkWg8h96geFjlW`dTU8wj%oc%!)0umkgO?$}#R(9CNEOSx{g2bW5)&lW?{v z7prQ`6LzagoJjf8d)M-f^%$R4-=K(zWud9P`ML4Y=!upVfkfHLXVu#A@sL3GD6WF0 z#p#0`k94GS8=mK1+nqQpuCW$s2DJSAsNh4R-=#<{ppc96*r z*WJQOr&>?sh4EyI1=fyqHqr<}(fhj?-q)ZwLv$8ctcLE&IS4j+1679LKpm3U0tG!r zB9{Lyrt2;V+Am+u@bjZQ#FSIv+~w&S*$j?b5>)(AlH!KkVLiM~a|6*!b?eIc^uN;Z z^Q4isfrrW#StBFQVxH2BocJLI8JL2Pn8Rf_h$h(x{Zr4g~4ZuBN5IVJ2r5>2dtSRi4&v`FQ@2a_A-J za;+bj^{0lzRMS@V&Wr9;M$8`PB7X zYHxQWXJicLf~uJ5o)!=*9HhEYHPvaPDCz4V$KM_@Z%tgZz!m4nmFg3ai#WiMLfyK`~F)XY|H4VQfS4gGqdHW^{hn7DivF>&$`# z-3`L;;3j}qOS-4X^d)bIaQ5=VVYyTbEp_QlBiOMsFo98?aO7kRyr`RJ^yJA2!RFcQ z`Y<>k`Td4i?7QX7=DR};$SXIIGBZ%j7dQ;y$;GCrR?cW;18ly~J;;*c)k^afPE0FN zk>}bfmb|9{ef8k75UuJlZnQ5fv}Dze=(9W*I+un&r&DCD`TEhJ({tR`hEQ`0 zAhVO_6W{z19O4)d=jM6?devD_uI~G4%uE%!QHiq?QmeE5{^?5hJ$WHY#+YAFEbSpTKD1t6S!vIC!3iiz?hO1G_>74YTBzTnmzBPYK#!Qi z*Cu2-D2^VlkPn3Smn)k zG7U5Y7KW`hW{*Sg3HU(avo@URE|uzl%LsJF zCQIL0z3ZWy{HP?P`Heo1c@_ZI&t-$3;w~7w_x-AO4&W?_?sj#W?HeEZ2~9$&xN-N* z&y~IU{K_1hqyjKz3A85r{XEhu31Mc@AA9+O=Uf)-?il*Qmhx z%V&Ff98hDE6T{}c#2T1fBF5iTZ+v1ik&=!i_cGMpzM`4W!6If>+;4dW?LUI>!I`G1 zWEiwD79~1i?knR_bKXM7*MPbg5_G#~QBslaQyRfK z{}Hrv*Pb4jBlS_*yJSx$6~FCX9VoI^Cxu8K!a>i?o95?TGds*} z2DX$}`mM2A_fp%WM;>WK-W8K&enn+Q;X#O%-f*O_{Tcl zDZaTNEyXP_i1}uErw&M1h@}oe;=#3{qqQz)x7RY;2q@TDBa`E3jo707Y6NFi`xB&K zgMr+|V;HC6Sdi2C>1U|lF^+eLSPWP%P0jV+J~wawCz^N%22yw zl#4iJ7`wY2h`9aFuT`}P*o}fFz;4%w!|m+E>l#13v~*UCVoZ(}X}vHkP%F0Xy3w63 zS!0~0{Bc>{1-uD)>3`jPdfuN2bH;6!NQ4nZ88ZyndFe%aVH((hC}ZuK#9Mg~!lRJq z3S*gtfN6(yMF1Vw7qKu|w%bw${^W;xA*0=L*WQEsiwb$Jl^=q>+Jud@j^{wKMi5KV zwA*!^5R%pK5WsuA&&mOHuLj4*SF!HK7!U_E_)B6;s=o*Flv^{701gztgW2nvfd)UL z{D0ail3;tq&egdf7&8r9+(x#3t`~;NO|s@~K;1YtDZsYw_u=wk+{hJz=zs4#Dqg~{ z_ugdFaHfQ(aYh@BGQK<`k60Y>#R zfiity9AC7;<9`_E7S#+SgahA^^(KvXqv`mM#xem%Z0LUh9KV0cD$S4ccyboyZhx-L zc|$Q~AnFSc=HSmmgu>k?MZW~3pMB;V=6v!r+csemC9)R|t_hm=fH{ViUCnwVQ2K1< z&zQ}4WNuvJ41uw4Nk2_5^b>tObF4z#r3+&$#YPmVhS5GWiRZ4a0(3j%v^qpc)^^mrcn~^Fjw-peD3Py|pX)iSOoOpj*FMhUSGAM4l7clfPd0aS8>+ z(f@0}L(S+IyNBLEbc-gTPa8Y=6$Z>!-4xSk=QHWrg<0ui`#QvG3-z!(AnV~fps{?Q zVA-8@rxYz8h-_@Oo>XH1`55dkv~u#Uf;WU9bF^(T&G8a|)bU5vifzZA&AsaLT?FGm zoOLL!It6SgPcz#=titAl&D83|vero92`0}2j$rq8X6p}Q5Bl;Z_9RiwKn8mZc~YYz zB*w=|(@;2;yRf}m`Z1mYkUOi5wB3WbqS1rhdzy#U9A0dcIDKPKX?TN%^MDMfDXZr{ zUtjOIsxYJ0#MEBgz04SUoi5P|I@YVM6XWS=7u0ZYS8}T^z3ht#9vhC;>`o#34z@fe zve*pk&y^Ql7RGXndApWb7KEfDrP~ax{l&!E#@E9uOmDx+13Jf|eIQV_ ziH$Nvpa(izUItZ`u)cfKZ?3Dg?GL@xdH(qji;W7Of5*ieCs{VD*D^kQEu9Wz8i?`y zlKOg}Vz=o#)_DplC5d8y-h$E}w>!yg4ZTb{OH4NC`hIMC*ON;lf%lI;bpx#z*>lhb zoQvR&^OcY#GhGz8_rs)H=_?YH!T>+M??PfNqt{g|o2D~a+V`6^!CFZI0FV(!`Reb|zKl#2% z9~p%X$^QP@&##jYYuNgdEUY3-Ja69iBvsr4?QbT$n~va&WNsXnj$4tQi8;SRF8>0f zJcebL#=mO8F8H0cJ9x9v7YW%@#9w^Ikm2biQ3hu;R-`DHJM)B z8e5{90T4`)5$9CjLj|@1H220|`qGh8^Jy~u)&4Y@us|*8RFgQVjm4;4S=l%7ee?vD z7@`-?znUo#SRj8otWa+Wc0bP8Dr*RMH4fAOwXE(5mP8hV7IUH6Dj~_Y@-CjxXIMbn z>j=^7NXvk!ho5=x4MM|#{y6h_MXoyL2HYGPg)%lMZ9K-#`~Ti((4ONpa~L+8gQ5w( z=o%%_HOifhE28vDW;_p$Fm0F}0PU~mu2^62S$SBC5$%H-R^(_oULTPMsLAEk1I~=& zPy&*5>aG8Ud-fvej>}C1{jxKliz6m0Wqm(F){6NBv$pQ;Ztx%cJ|+SpQ?8cENPnfq zF;<2fwxa!@)ahFRC{aHY?7Z>v2;7VC>-5e4DnR|HEI$c3fF4%z9{3j>wKRH?aDEMk zMpix=T}6kWSfoSWm!ZAe-4ZFzr#_zLfatzX$grGSd;Rqx>Y+$OH~VvLzP$d@EV}xq zJ}kPEySKl!&Vf4^0}B@v^AF#@{|L?BmsWh{j*;0-H0DB3Nz?W4ziw}=Eem23ZWZ77 z1OY@Oc-*B0w-Jlr{^VOF(-LaJ?y{L|yQPPqI?P)yP@Ez={GARaV5{+`CTG9HdgzC2{*!)44h}=6 zSExLBfqAb1xG54(fh@eM&}vv`!Ml3`t%W$oCf9$xqbJcF{Tnwv^BLXIplEuDGt>Ll zC&jmN>Gho7AzU9GRNtHpij)Emt3-c?8lj)B>qkc>ek<7 z$2kNZ|K2dAT6p||=<)M?lRf}K@fyM_vp-q@_iivI--c$#R#F~bL9b9MaxnCoUv`|y z0S^g3{ZKQ9T`07_N^KLme||1v$7rpBEy!|Q#!+*c6#M=kx=||g3J_Ia54zPr$E^Maz z{N4haI_MVpUmYCqs<&*T-=h2r)HzvHnRcc{%r8M=YVn&|SQ#hivoX2GIwu!vS^YQ} zW^t8FX#cElwlJbV`E+Dv9VfkyK2w!B(&|K+_#wxT8b`B5ETDj z$f!d7DOQYB#^~YPN5ByCwQ`8^vp{wt=#}s`vh>-z_Uu2uaYQ(Bjl95W*b=NWd?0@K3x+2~7qf!mV;gT8MwUP zVl!|Yl^zfS>3Vd(Ir79K>q+4a%K*-M70n}_n~SF}1L-geui1f<^n-Ay&!5a@!mo*IqU+I#iE zVo70Ir2hzhyd$=(fzp1;lhs?}X$7qr(TL|p?r1?XnO(C;(9!h)VP_1U>o$d1;Bdty z5Dvd66SN#KftBWzr~xzsD?xgs5lJ+5$|sSSQ_boY64zZ9_(ed6~Suw`C(P#mw*>EUrVeZ z8Y{e;S4QyE$^F~5rG5W%ryo;VaIncN(C0mn2dC4xYF@M7DSqqij>0UER-ri>?=k^p zq5#s(OB@aQJmE|T%PrvPiyIt0IkXEhQWaI84>Kq^dFZRP-)>W{SAnaV88B#M*)qRY z+%!EHD9x_>a2PBIN^eqdd9T(=VdLw8NCU|d0f?a$RPz9uehD1UMfz!`0UeNjAmR!= zo)mYtVHrE4+HD%YT!SefwJ%FOp8*`&mVRqNg{vAg2;*s7Kb8UFaHOk1q=qYG#URZ| zF#pLyrL8@w4hb#Hj38X80yp=OPO;6vQPS&jab(j#?F`C2>Z>-qIPTe#b9<-u#%@Xp z#uVu`WQFv<$a|oyTOl&Le6`;Fw1BJJY#(!ZZKXe_*V2zNZG(_zvwYY0`xEy*0I1@# z_4Z2n77fsEBCL|2yA(kf=ww(n6}bDY09z~5>0~KMIbfV-&}x{WKvoADSE3)27~v&U z#wBnNAh%=qeEauzOBEvl9AG_U$a7m+@U=T@nV1Mz70MrYpPxw1M8PFw0X``Na+1sz zu|7JG9e_@Z)9`vm8KNA)3hJXEh%R)vP|Bc$C+Nc)SC(p8XPjmdVkK-P>?($GO-iQ@ zsW&m(%s~u>jd31qhG;yvbOjGI(UlDzouZudDHZKBSp_mx=q2;~=rJ_kpp42E)Dl@4 zTm#x6;l&cr)(QkIJSvkKrtvr3WPJ#?D~)8{%bVPS>HZPlA@p64LT$DA%jM_aR={Zf z=~DXcc+T)zjCo6AkaV80!Y7x%IHAGG@zeb8mWTTDeAGv)Pct7y3-%qVMC;Sa0{6Ky z4FR!lHrYw%&t}R6j1`vSIphee=6rBtlt)$6x5Q&XkITEmLyqrO06otfcXMZ~T<1Qq z_O}*T!Le}U3X0^>XK4S*aBu3Y8L$=#Ek=`PPxpiLnfpbh#G2@QHO)om@3#s`eQ*8k z&WHUX;B;c)JVsZ*D|7F>ra!iLS8lb-Tus8H|&8H9fGnMzA zTBUF#EyJsNY4q{qL`(ggTUR2x6vWO-tfeMN(s!=Xrj$rxkEJz-L~!ETnjZb7k|dp+ ziFJ=#DoS?yhaAm}YR(C1=-nGm*-ls=p!@rZ- z+a8}y3^{u6q32hTOf0q*g47t(<|qrCM3%yDEg;@jUE6e=7P-(S7g*75soRh*@v8o4 z{HYk96Y#j@hiz0 zX5^S8Q|4Z{MRVNkQY5ahoY#4b4V=eUMizkK&zs2j#m;Z}X8tN_TJj{?6ps62h@hG+ zx*jX5z^BiADW&B4dbN<6fU@Q6v6(VVq?$m|H&qyYb$sQ|EXGrlj$DQlIT+(CBk3RFLqHlHwbQ;0|g zw6_1n;1#K|&U>=2D0d#{4C?4ul9w7aG*wMLc4~8ihv)`Kg*tSDA=+#D&0pC*{S5oA zsZNHeVGg?cYYl`;T%McjkGMkLo{c0*Fe?1@3K)_O2KlH-#0z=c z7AehaC8Bp-hAuvk9h+9f_u4cN6z8QvJe#jpxYJfc#_iXN2gLF);S(*1-_n#jzQ{-1 z-h8!l={TMz>5AA8|h zpkQUhy!%bn+aS%3@iS`k*Tr)Qo;s7tv-<-3H-#M=b z83%9*?R)+^*JhAaU=46_U6I)KP@J?29Du86XM{KX%G#>LXu|xejIU+B)h=ea!$~Oa zEYXK4yr+d4hiQl!+}6;Zff*hKQ7)!myU=KV1wLTNFIE>o-$R1lsE8KS_%`A@Pp76n z3FUS+#F&|OWw`XbfXi-(Q1tMPl`Cbc>pcWPOD>#mXKE26?xAR@2pkxCjyQ52g4 z;I(?ZWJoO(RvpNH8b#XL)8au?YByUd*_(g29d#v-xKvK?=to5N-Meel4B8{604<$} z#KuUM7VFH)A0mwM(zP@N`v@$zO?v+Y@W??6&vLHaTNSP75wxN=6{=>709$)}rLW|0 z_iodoi{gE%;xgG?O63hU{sORHiOo2`6(bQO3`#|!9a@}iiCKr}?-aE-9LxR^tf>&S z-W%Ub-Q;#nDN_fLQbsUR7!maZwr{#e^XRK&amr2?uWtoTB z>>Q_LA|bH3ZVo7n$KT@ma__?CKuM7d8gu&!Ani>+f8-?{2_lE+rO;H&by^kZ84e0p z)I{w&kP-Ek#>C_LGtsJ3WYUq=KA1Hj&>O4}FK?SZ$wQw&y(b!yd!kO8*$+N=zkS%- zwMA5o*vFROY@m6K-%Kj=As&GNo#ZiGT7;S{gLP0iU1$odqd-AMy9y)%#s=WY#Y+yG5u6}%wwbLJwx2p2AzqiGxx#SKJQ@-G#j0iZ97=(YKu!n*h8x+; zL#CTul0WCil^=<{c0gX0mU(Qh7`wt|F7+^daTyx>-A8a_v&Vu?G0I<(^xrrMe6?FI z05yMx-W_ChSK%4=@_LJ-zr#qwhbyDY3)XX;_(9j)uF_Gs6 z)9EqB21IQgf1h=IzdvoNQEYlrq#iGYHwpuyJ$IAYvzaAs{%{Vwd8TIPaZd!i-}MvS?pXOCPh zCYablYvgNB1Y#zuER3@);6!G7O{qejKG=4S((WS>+Q*C8nILH$L>#up?4@){_|{F~ z%O#h@U=Px%X#r#M*P&VQUuAH5bykh~U)n%ZBRECPpD8yR2GCxIVPf7S;yqDr(&7~t z9_<2iSx@NLHzDWuOQ&6KJyh6}H+|ZKGbxyf7^J7T~D1^qP85?GL z^K?6X+*AZA7seYDrTTL{DP;3M`W9(jljB!*kh&E1mhrjFt2FxIoJHjpL{&ovwZCwT z*uZV~VEhFM3_P*yN%+{^Ow?J>*|l2Q^{mQD(}|+^wc;)-7#j-#*69Hr?UkJeq-*W@ zISwi%8)MJma(Bxo(egf=FugWz8auT>liCN7J2N2s&VkwTLh_Ch&n*v)cZ&cr2zl=E zH%seF14{h8c@*;DBG9046R-yX(*FH;t=Aj7{5hX5?a%K<$DTFEWP5XdOt`RX@|_<{ z*U~=zGTWDm>&!w8o=(5oBgf1v!q*u=E3GP|}8vXKIS?xY`l$|3a*E`l%DdYG>l zxv6~8C0;q|rFFD=y3=KN%ki1p2jN$hOyi9>U;=1*c<@xLq3d{uAk(?%GibJXn)*eC zYtK*a#IwX|w>JhJ&>I=qi|pIpF@!Ti#}InA>MrUcS`mYkfD=hnHcJ6b2J4*$j;x|e zBw|j#`7)q1zw5@>jd)>!(M1>&dqEK5UGj;CXhP`Yp04}_j(!{ZWfXyAhE1lj-@unD zd~3VW_Vx8r?iTgO^UkE9P%elh9ih`X{$tK;FUxbOnW`A+EEN91r7BYky6ZcjAh6%n z%~C#PyAJqh4s0KIHGab(RWUv5ce^bQ24ui+_-fI<{@m-LvT%J;f))UDW_MpvQj%6y z;rL~m98nFpu}d9|82^j!6v#T^Sf}vuQ4pQ=!vOnq{2RRT|LH@_cZvspNZ6#6i_Lz) zewH*DX7Urp$Uev|(hbsw;{(||*!SqKzmuk0c`1UVC@Yz1x!}FHbVY$LfZ6BQ)AB9A zbc@&0Z+Xk6h&lw(!CB52I{`Df%Ea~8p)9ZN&_+E1cH5hvxE&b^;f22BCM)*RZ+6=i zJ`g|fFx)UZc#WAWiJwQtqK4mzDS9Q4^TFOjj)=>4BG*4USs-P$`z)g$J}N0ld@GIV97Vmkl=9z}j$E3a)oX zKC*trU{}=*iREU4}p&6JV019n5e*r^~shaz_4p8tZ-uFnxkOe`O z_s!NPeSXoUG{yF1)|kRtgzL6h^tV4ZXSK=byQ$}*>=3|yGh7^(f)jyhlLYZ>0`eZ= zC)}MjD*9nu=Oa;s`=$v~6uhn&?lXgoMkP)+z!t0Kj&V38P$}*p_|lMVTa7!d0MDIQ zFZZVdk^*FUkz=yu9(_7w^)O=+FC}{|V+=scaHdrV}eWpZXuZ=vtF|s8# z`;za>JZH#XBid_Fdyqf>Gy5|@;-A(SQV1^Gu*d=yfL<-1ij9c;3)pID179loi=+Zd zeBUHQB3!^4)^;q-Kysip*AD}d^Gxb`s_zg)&+{6k{#sX|y$&rjU%5D@j^9q^w|v+1 z9GnGR*b6dNr|2DN0ZgRW#TX-=)Fe5&ERsf!C`(p~#WQvrHCh2Ud%YBrteE=v)R#o) z;{a29{VzT85FisHnQk;(j#FQr201EMxK&6Q z`_Yb2S+e_m5AI$PntjLkKK2DwXOJnJ8J|UpV%KC>OZnt69V(*r`3hrTbNE75fkv9n z%}s1?=qc5oVLqiM$c}hrvSR8@x^5Q~?b(UMF^Fu7&D@psq&oql{Ud%IY$tKnufDyQ z07YtGqf%N^H2I}*Eg=<4<{7zL9N09q_v?`+68IrL29K+Y9$gP%G^wOORr(L!@|#-N zt~tHOGOUkpeVNmD^!8y{@}1a$l;cb{QIvpLgzs*)Po+K@rF|ZpIY!#gq5Mpu z=w-r&&SF@{L8V3AX0bK%7F*fwx74^cfxPP7+g7R9TX=Ub9YY(>ZziS;^Q1OZZ$%-( zL`^^d33+$@749(IRs+)DQ1;v70f`VRWt|5D;z4;HcvRC7HP+d6i2-z)ea%G(SxeWTnNYHN;HNLD<`QU=ng#UY+I`_{2 z`1@PlMe4`QHo5cY8+t=Ym&Y=)^U)6+wV!T$rk_{{=+s{5kve_EFP*+KSj^->Kda}A%>m&$K`Ff40{Lf# z?N>j7u+nCU-h=2@5s?C{tML;(d0X)PY~ScUUw|dPO6-1Eaay17Qt#!1i^;kR zR0lXb>h4Gl1!dRXOdmLV!ehGp$7ShrC#f0bDcKHnZ=Y5V?}EE=^x6DA^uX27{u_XU zN0EGSCjexCm>$M$%EMP6wb&&OF%hM&2RQtz$2o-75aC)9Jlr#fBjHJDN^`=WF%q{x zprHOrA?9&>D*kyW%gzs+sRhz_xitGv`?p^oMhSlSXFr#n=+}>d_&fBZ%Uh~_&hb!W zfY0#Udl=UyT2;Mw`--mu?9-YNy<*Njz|CC4C`KQawS_O@$0_F_oj1z>i-?+te&T=s z*UbR9(HlT+S_ss2e5e8ZJv|PB$8G`3N(41UY;`1TRjlt)zXwqPtyL#L z*uIKkyW>M&_^lKyV3BZhNnZ(WOh`)Gi))8~qf7*eUhRSc?;nmB?UAQb5gZy?AoRV{2eW~==nUK1 zoxbW_ho;%S13hXAh*=>@SK3AW+QSdA5OBVGv+kTAL}Wk)L?&m{8o`hAtI_`4L^=g~ ziG{Hync%eBw8!-fR|3Q9i3RG#QWw|T+;N*8pow%u5oh2r(d9-CXst)%A1T0h zbN1t#J^wt}9b|Hd>XyTJoj^|;0}TU9xsOJM=507R?l1ZyTz|9xS_)5_AwyK@(r2IO zgr99vB}ertJjxut4WXdC>6af>dAnf3(FF7bF`7u!?+RmUCUoXF0}2k}Nnw0qaN3=% znO{Pqy?s$4rXTprFV0Fw%Fwjri+ir}Cb`jbs&|>{Uh1k(=-ABp37sL=J(zhi;H)>K zZec4L0CNv@S)fcWTCt)YBL8yYs-eF$JyxZUmgohjDE`zsAs>KQG=dZMbRutn``+%U zsB6L68j=FYPd36iP4t&nr(TSHRX#qa{_;BQ8hFa6aW|O@AykOB$)I&T7^rv6m)9f6 z?g6pVn|u2?m&XF@wUTm~Rz*e9kuMX15|p zeVR@sY~yJq&d|Y|m#aK_3|-hcS$I0ep%=lES_;aJ!G<|Fkh63=QrmW-=I#<5=_xzV zb88fC?6^?zVz34i)1yP6<8OqpH`bJjq^l~ql50ood-8yjovVAS*4F{%m%8XVHeWE=qWG#l zDB$(bt;Arl)>#L*r?Wk*gCcCiUu1eeo12u(37tisuB5wZonBtEW(@2~@g9dZ(&j5l ztw!3Tq@Z1EvBwyxK5Q#*JIMZLvnLd`4r0}`Pa%fo$(zVp47s~V$k82?3c^Em0H8RO zKY2>!Kae?U-U4=xKIm;NfRiNx5YP`!c`K+52dS<<$j9Es)V4?*x>l;^nxS3TtRysk zq)@jzOBWsGdv=4;A@!T9ve*{Tm<2YYSC=_%B!~pP7~zJwzNZLxIQlJYR-+FG8-A=o zYLt&OZCdDf@K36l^w-TcErL_RgFzWX^_zTI&o?%{nqDSq$(wzg-P%2HIi3UOw$lo? zTH?OkR}se{P*G=Eu;HYn!VLEVVvCwPnw7UK-Bt|ocDF7q=0Y|0e{$l+_IiH?3%gOW z>BRTn8trf>cS+=QqYE>BoFzLtWm3$6WX}@51=bTdT>GdiVBR_}?^i@{>-GV(cZ*)f z`iipE#&So-QS5O8vCej zWfy#^n^>{t4%4uaEMiAQ5!QkOl5Ny+PtVjp`{kcK^na1?3JaqwM05lps+AgN(HVcf z_8y?B$$F`d9AB)E$P3aq2ZIOMB`hELpb`;5%D$(}=Sg8L*wZgUn!S1>1tkknHick~ zXfFIrqPVArVE6{~e?*8ZW?0gz$x^@WzPjEYaE{F!LSn!K_9$BKWuUEY^kk&}FnEoo zn!_%M!(qL*btzb(M%#a!2u?9{b+RlVvXKhIgv+LuR!1MXEOqit77w5HY`t{3;BHsm z+-`#5N3|FxDUfb zN%b>`O6b|ph0S~ROM3RfSP6etKz9!OT0!w~2pmWRC1n+Z`wtbfo}A~?$9j?b0w8(^ z#>dis&8r-gD&rYqlZ%jnB+P)j+Sto-{0&@BVHTOrs7G6^;4RU(-88t(UpD3LKYvtU zh1hhK!XvD{d(JJR-NB_Qy{CMzf3`M_E91(tXL`~)*Pu+KV&JQu`oep~A@bcK{a^Ig z9IyT&fEejhs64k#QfAsG`?=g|sG)NEH5`$c^-mkPt$&7g?h=BR_#j}ZMjk(Jk)0NR zptYt$^MVvKDA@>6LK~nOdoef@+6W}v6kpS6mHsmuf_WPtNwB$_}Cgi-L)KzcxW=m5;Ir+^ZSYPVi$5Q`pBqds!b-6}rc&wbu<2LI(GrC5Z;& zFDjiU7nK#+rlauSy(E#lX4TYR)w5#?-{d z4m|VmxXLlAodlK@TkmUs-n8F43^NP>e{||{2+TJQrRE`kQU{8FS*$Up5xV#hE$upQ zlIa4tD`oX8YRRy#e+|t8H`|XgpEfn~j1;wttZzqlzYeWUT9O5f*~U(izmP?3M#xUG z?JE~%30e09qps1a6Ex2dWIEXwuXWg0N~W?mW!?_1(tajqqPbLGF`|)4^+B^n)#}m= zcKQs_$3UY<%`kn?!#g6&CKAW`PT)9DDQl;c>>o2nWl+FmXD3x{qL!xm7Pa}~NLOoElcQB-Y)`}it5HUf4hIhZeQLrdwSI=FNog_1zdkDB&?{b-zl3PTkNe#)Ht;pFN@ z&IDxIeE>~DFTivIdTBS6sV;cRvk=A+h6#4IQXB^n0#&+nc{%npy*LNEXYi&M_9dXqI*RqP=hL*MIa zwVGiEp|Up}VYg|lxNu4szjtYUn%#ACWt6R(cP9AsL?8HRxvVxN9oJxX{}Kid;AMMpIVFnFT%N%w$@vR@1 zfV{rs1SQD~Sk2~em?HSn1-n;3!X&;=>GhcjP+kABa2PW+d z^->k}TwKdz2UwK+r*kSPqtcEX8BP>;zpH8%U>oK$q2IWQBHOhPm#PGj7+CK0Y+J%-5iPdVK|9*TA)UgN#P zzqcp?d^magVGlc0DVRQgA8?Z7Hu^-6w~uP2Qb~qTBW{Rf%@)RT z+xKMTOR@wzcuvw13|oSpK-UW+-*t6M(e~j?+b^RHZNGiVp%Umad;km#O%O61C z|5WR$@9)DkV8{IwDcb?l9!({y6j?10iZflQ?lDEIh+8wKVN=QnJ4hQO)Q{@%!5MKV zs*&brebv$i=-p=l>JOUMWs#4x&oi`bfa4|1c*=hz0o%|BX;Fs>CpMvpx9JAZ6>Sz-!6+(F&DM+v<)!89^jY!I z<0ULC>;k#(1wFoaoKCHUHk1yXUqf>c&caB}DSz+!E;&ncsLzTxIdj2}Pa=+6`h6F=s}zt zE>8=pgajJmLZ)89-PR+^vJWi3^%FXq_bnG~On%<8v|J!ytH{5BGbQ?< z0%n5r^;o5`$mpGHPf)E_%5AT<{|oSLW)_c;i9`nn>vEzvC|sH3F1g=y0@l^_8_k52bmu)YCu#3{H>ZlKo`ZAroh-V24ba5 zfT}D*+(8zb0gl?Ur+;SSK`{H|X@j>f>zQ{W5K>bBp+U%RZ3r3mMw)mr!Z-}qTLqN; zWrLJQgxZuU7WX!U3A!2j_W7n-L}$ETsmffKcCQNHH2u&8*IMPK>e~gW_~U(}z z#F@>Fy=$Mt)+`nUvxPJkCwR;OJ<9X0X}HEooGMNJ7H-~=qH-(`XUp!$Q?{2-Bj3xa z{!f+Oxi9)G(V7xf1_Ur#)T`_R|B9lfr3cKb@_v6QgM0E4>p3 zE_0+(rJDW$&G5AJ!59xocX<8JF8^>(A%SZuADm+~U{p26Fm*`Q>Z|QzF`XGI>xaX( ztiXPl-cN+V`PCOX(sEz1-D{N4exOsGMHV_}9R=Xr?95(J;}TpZj-h~KS0#%lz_fmG z{vrw>41W`^if;qBB?n3{EZ0xcRSa>w;%(w?eQUKr?g=m5DM5YT26&u?=;RzQ1-UPQ zShe&gI)5x4!1H-g{<|eli;!}J!ihsYBZt?p-sf=#8*#m0m1)hW{-veG%APG_XwlWA zeH_g-O#%}|8Zx<5atQ)+D>zMp3|A#2pd=b@Sx0D63mvIRQf8EoP~XMnN!X(BT65aE z?doV*Fj$dE67S+cC`sA58$)i6m4-L8i|=U^iFdO*)jrfO;mM%KXtE@IDSaV=4jZ_g zc>4-cR2huE|1B{3)6UCcp_He_3ax|9<;-CgC~1Fr`QozV>os{kKcG0^PlvOw%lJID zI=yiVI0^>Aqx~AG81q4tjS3PND!Eh`W0oNHO6c}WQ1M$@@B<5etD{_jer$7s=2{13 zqcMU}<-w(%;{bqYAw&oakS?lnoc9bfc|o~{f)mr|#+A&UZqOKgA!~Ja;N$y7xp1;T z2Ul0)p$$qyF@j;u3Bf)d{j#aZgn<>>%d0+AOs<~+oB8(qOMz3jdSSWC*0+qigV!%6 z4{kndkv|%KsSv(MRo8%hyP_kIXV;d7E@i;bDou1Jv|%%i`U&csG--E3mfb%#l-!4G}z)VKoW~PWA3>B+@ z;*`jL2S?S}hTX+t-8aSs!Ai42-DstIo?`-W=-_KVc0LaEnQ$V@%t*JgJ*O}k+9jUO ztE4EO6>NVE(xoecA+@$!u6r$pzNZyzC7y~bl%O`d;9NG&q0B!LmE~3%Y_V!f)Yjh^ zxb3Ji`BW-^Q>)Ollid}LV^T;8v3eieSG$sy!RX;~k?txJag4*+hgP&C@{~$J0kqch zj?%?6Gbyo?nh9=?T8;)t<6D=dI<+t=C=RWhPjdt)9OfXd%l`rpk3pBod`JB|1?oQ` zUz5nZ!<5o|TL07wM_g>pr#t|JO41@hAfXK>=4C+prE)FVGz=LL{OQx^guqm+BhRf{ z@Sz31CWdoqcY#&+n_E{#jI0G{{F*he*&v7cKstCCOc8nc#{=!fyEf!+ zLH`eC3ay^hN_p={?jO5iwmz*n&j;eamwnSU<(s7OV!jB6hLPrNnP$YGHs>6Y&N&lT z%h>uA<-%RQ%Mj+n%9#r<%Kwobv99nZJho{<2) znTo0Ju*Yb^@pZQQU6cYjF=c8p1(t)t0Nur=^RnMemJO=;+DU)*Pa4sm#`Y6>FZG&4 zY~a==0M#G>y7_LGYy2N1ELUIA3h_!lu zCimXTDfr3FV!7KMnlp5rf%JJwjbL$MgU9F0 zI}22Yf@_vc=DUR`UB{Txo)+*)qeI}1Wv00)jlZge^Lw(RCvs|$7BH42EG$x$F;MHv zxkDHNbIxOMSP47L#f3e;x2pSpVgyt}ZouMWB=*0A9cN5X*9^^KKeU9mp1|>n*5?GJ z>6#9PrtkuiciH*-GXyE3!^qeq#*)XwA=FQ1phn+DACe@5j=h>3_Wl3kE$OT#7?7c_ zWi|!Sev4JXNpK(V9f(?{=d;zquK=8?$W;lr{dkTJ3~>-$6)d!q@io_L=}6d2-(#~N zYOjsE=#zKc!S=;(;2d11-mwiJx&Hyyyff!#9~89|1vggEF1U82=?$Ay2Y`ztu!|en zu)LO`qYwj~Jc?WqlYHd0l;q3})#dnf^`jZ%ltKuK(6?0(V>CGR>p0t?&di0&g8 z2|r8!S!f8>`Jc9unyNZB`+dyRUhaDv9XG?+vKVMALym zQHC_Zr>No8f9Q{s!T$nuWndX4n~Yfh26Xcgpv&_9-hZw{;s^R@{GMie!zgQrHljFD zw?}9r+Ko1%Te(O6Q-lvQ!N;TT=8tZlH*NxbC?CLjun>`v(uiHvGRQ#x8GS_Z*?UGJ z+i?vqH8hjd_f|QfZ=r%FMKU`1ze(KxcM1QQssC?!3PMu1plMJC#0=#~W=eRG2`%0n z3jMFYdRu2MdmR>+Tnt^DH(Fet@Zpc2V&uxFpv!YP6J>mm)rlx z*sG_7Z5ulw}e?R|I{lo$n80ypMB7_pcd|@ zrjTn1Ax0P}RIfbOW3;nLdLl~>{&*QwW!yd>; z2Nk3J(L_6Uz?T@&z(g4$ZC<5rF@#jld z?HT?TYllVP{g`bjaet@52)KgfBR=?OUN{G>65mucq>NC;X%5=w76H+Ki`)E%zB)X# zMQHgCCII4E1of&NT1EyNQ5K$EA!Ent@UMZ2&Ra10BsWmDn``DeJ{3<9ncD)&+}V;yy#L6`d_^7RoHg;vz5 z2mt23o1=dUKkP6N#_Jq7PMLtEeuV`Nvn6N>)QqI&6;1%yS9yL+2xwmeeQy8_OoRwl znSYVl!5Pi^Mw6qUmK)I`A})N$N@E+jiO~y_=Ac==tmby0!yg9#-ugP+GmAtCTL70B9ftw z4al?UtvzM*k4^wz2qdOzm1(JEvh;kq$pGbmxS2DK;=pKPL;HGem&xbvm9y}AjKOmAU?(u>={M)@%*f0kQ;{#g ziaAinye#SF3sQ9xWUUDvTQ~Cj{&fY2mmrHosrDj136{!Biso;ksYz%YkLXmKLq&ks zQ^rHyY1QTT7)yj!1%EXa_LC2`)Kbnqf2o26lM==*s=7gC4VB!<%o`~8|4FPwF zL?T8J|6mFwG?dJN6v=`aozi_wFrei~s6hi;fi6+p^$Pe4qW`7Dagc|`#YA}^T11#g z4?<6xj5R+?H;X`aI7SjM3I}~PQ4=^Q%2d&pTJ{Nl9Q_ENqzg!fL+mjPbj;U3 zonko$Ezn@1IO(dm*i$hq1gE1SW5?~3yO1rB8s4lPPHYthrKvly7@J9t`j6M zuR^?XtTNht%uh0qWD~Np^OYOJulKjB_g}-$wy z2Wa_fcb3-Wn(=AC$?Krqfyzxr4!6^pKRH6$VGd&c0@0pgfoU~V?hfQ zb3phjQFjmRtSgyCfuE>?seIFllnq`a5z>iGe~Uv^N(&V)ZcaX-vR4THT*HUKgmF{b{a1bR(-HjKoqgzpd(lSZ)8SZGNzmK0qYwmK@_5uE{1^X`sW{QuC)mA;wbcEKE@;hbnxqW>n`>Im|C&DZw+FmS##6i&m97z zf~qnT1p4vOXW)@M6?S-mW@Dn(JV1R`Vr;Vm@mRvd_{7l^E0|YaqpGtCN-MW?wQdbH z=J_$xnFBSUJpc*aC5qYnw@x_(uCoy3!S*YUz!|;<4%Zh*f!F{pw!()i1L_$0Z?8lBZ~5RnfMM~SAhT@G9+se@*?+F$ z1nL?em-JB-y`P1{8!)0To6F<#FH=L$7_L8I*I-h(P{$2QMG4Dkad>`DMFcR9dBCld z2ijpS!jpAm;oCk6e4RtnuaS!aFwN7}WKxt!|L0A_P{NPjaL^~E0apwf-JqTl`cR)W zC`)n#evRQR?n^1jRc`C9m!^mrXHy;(zHv`hUJ<8)rSAu{-7f$eTt=^|u5}$c2u0R# z8vLV5&@S0OKw+2qR6P2h$bcH4zRjcIy`Ub;6KV1w5kl(vkQ1~T6)t$(O#z_bLMzj z#|ePIW=fkt+5UN_IbAgRdkz}{Sq+!k4q$*gg-!g&}dvtRZOSDK}=2YQ9PV35U@xk8@r{j#r) zP+J5ipyS>mYnqN`)uKdcIhU#6!0UUhRjXoRkK>=6U5$OPvAu42?Z@)@1koT!shEIQ zghN132Mo4{P~!L9?s*&;HoGJSp1Kz%dD0{(^qT1ty|d$P z0CT&=_GH?EK9y?Jv|vNIO%RN0lI(Ss{V5ebF`4&l=zU8|%X{eA7ZDMWynq)Oug!Jv zz<-`iUARPxULlE*>J`V!yYFVu422P&p-Iwr^!Ard7(ees47%FAClGv@BpX&o<-<~$ zGXUK#y&+OdhKP9@{Quz>4L|={>#s)>;_+G75r=+4UoovU0;JmpkQWjfnqbgJ-yNUF zQDL)>)L{av6dze?G%p?!Z4ar!x|WmC+Dk$=iXZRaHmGpn&#m5bTQe&B_w&j5aQG}n z%YVIW3KwRQ5kR7k3JS3NK$(G9TN6(#6zTS32(}2Auyw30G zTp&8mVXO(`2r4S7SDp$y)z^~IJ84mOzIWi$@xv5G0h_@R5jws3wk1D?%r6>-JCg5h zm}l03Lnq_zUJ;a7v+$>+NMc-ojrsikv^^m@=t1@+h@VGB-aU|4Rjr5rkkHc3*Jl*m zL`$iGTw6Y9?ql@gG*H+7nY9U!{eo+sP8c3l&^Ic9Fytg?5zWigf;Kcjx=Wj!v7P&u z^mv`brYWJ zCY-QeUUWC3rInRAguOyg{GF1Lf;y2hpFiJYEvo(R4RhClBU#M7V5JHtRm0;AZCOmg zN@ab5da(e&{s(i{vz5_bX?w#2^DN%dQ!3AWmzHd=#@@(E;KfpSt{=X?|Md+}YRn+!8!5a?SO)J0oF>_qNwBdv|36!`g$r~Omf@e z8jzcagoNCMckQ#X+3)K z3hmNA-&J)Qgg{3pVHJtlNK#GJ{p}^_oMzURaN&C{R{v)v?_cZF!4-oJSL~de@1fhB zL3euZ+Bp8j$lm1G0%ir_YF@rvEI<$n>SamO8z8GpLwgvtE=6t z-G#4YYopuS)rrDF!!lq4*ce17F9lO4^Uy)7yO%xwcRF40@$tI{t$TBGzBOxJzywc8 zOH>EwFWGd^>syW@w+Jd%-Hy~`{96T;k()pV56j6kw98kjYoGyIcWyK!sI@K_>`}PE z-gZ?VvjO_X8c~26tX6=-cCL@V0(!1M5jZv?ovO6H`;v5@QNHk+x?!TyX0^kVDp*il z2&L4+o-p&U8k9n(yH_`<-3k9Cs7dA+{#+tP2C5-Qf1JVsHdwOL8=zKqY;Jz|AVcv6 zGPzgo+nsji&POZeJCzSZt&|hQ^zLKq!PKg0QU$3QyYc!=T5@Pv-8D&bK{yi{OPWIr z5Waum%bVLL6%`e`Y$rS)uo#q1rv!-!i;2hnDY@kB&%hkTbF*hiqdRvMt0&JmZ*MZ6 zrlk<6taGJUptqI5;><(a8=pu+*d2&=AuHMsU6oO@Iy;szjk5?uQM( zwjzRX-Up~tFI#%_XsE)KX3w5EV++L~FjK(L@3ehzdU98`-eo_%m1;pCU^wnvyru9j zjjdke0}HA;aUh5zP5|W+))1G;FKHSXc?_SmbPi&)<=+)PeY$ymeEcw+)*?yx&{2ed zeraN&ikvNwUlIc#%kfq>bkKj%z)^Nr6&o8noVkZZIdkW=X-weE;memVe+FEQB3l4L z*K~7JtAEM-A?(xmV2wmC9;~YSw7P*oHf9Xeu~3m#Sy)-U@^s{)y7>CH^xUGrId<$y zgPJ0BVq9D>uo_c=EFTaOQe2GRllm{2W*;en&&t5<$2d6t5-G`AD%`8bl7mKj?Qigd z@){AvER3=QE;=N|@Vg9E-MH)*KY}OU#jg$4t#_QC; zi&W50^++_Ao)4Jxte}=PU1VU#%j{JDv-=-$VfXL-+WlWLa1Ij?h119%0@#~@m9_3> z{baK@3U_tO?s7o>-mj=gK*#A*)qlSv={cycD`$o1<^GW{6Fn0^xBDtw&%)^4z=;V9 zL(StpX<%qjeNR~&^C{VXeiu0;H9DYhH=7k568>JNo_Z0;i2@e`@re!51y?8DH z!8a73Cu9=?f`j2o|I)sHzupV8iV0a8?dPlisLd0^(_gD4@bK~Z0njE0?kQkA;gbfv z*Kqds_Cg(Nwg33nzc!Tu4!OeYxDd`?N*9(yQUc8oyB0zruuyu-0I`|0ogF{ab{rmR zjUfB4sByvr+%40E)&73K$RsAI7^?VsX;^+9!uPq!R7ZCgsCnW|NloR43g8A&Q>DLu z`+3nJl5ZDF@er0 zF3?QF7J{A;NaufMZG=tZhcbCI6T$@uDtG_aWkJyoHDoeA0OIL`5T+ox@0;zAPU1!G zIFxvDqq?>KdA9&!3Wi>kW6;yJ4zfr&O5sq=M+Q~Agb1DWcM#Ds#q@*Ot`5sdgpZ#?)5B@k0mjCnK z3Hu{|f1uWcdF}IVvILc%xWY=Wlg4 zl8z}XEIi$tM-TI$0|2(aH^Ux(vImA2Ffb|sSjA`AOg<%Ieh|z3{dU-r|LDg6FLw;r ztsghmm@ix~+~xADmx7Son1Pp<2pB(-W@b!o>vNecxKtDrP&MTtX<$HuTy(?M&pis@ z1~H0@8|})=%d1PYBo_R~I<-Fs^^%iA-Jk@AD0vEVC9elEC%#CipmmYFnnXxk+^zVJ zp`E{U&pm8V>RNpr&=cGM+R(@=DP5L(_1|YMT?6GlTp>bp5^Z!mF1X&=|LDE^K;*Wp!!fehph-$N?~uWvCBB047?-T-Pi+dGL%^M&pyLx z(e;T8*Jlr$dD$ZTMJ#iLbTj5#Z8Zy(N4oHLtNs1mTVLcLiw$Wxd;#bsm6@4&-nlLC z&)tNzXDN9xDmf~w3Li+Wt_fhvn=sv}0Q~h-DTV8_USG^9C8x9sM^{`2-B5Y9O^^TP+Sna^gsvuRiC05GG6hE|)e;6;zxboL)> z<96BaAb8%)mvq^p4-gR{5s>3=_Tzr{?%jer*EWiX|F~r8=^$oR3tz%A?K&aIi#wNl7#Kh_Q9T%i88W8QO5W`aL1c+=Mx3?Y&-SKS!i|ee`oL7N0 zn&BbqvX!^Q+jLP;5f9BfEQ0wELt)qU|8w>q0mvE+`lVTT$aM2fFrb&Xbd3aY>7)By zLwJ*W$vm@(v>INRTND9e>#R(w+%npg9_+!Nr{YhNoT`$Wc(fR9IORe9!7%M>UAF%- zZ1P0O&CIXF9$RXgKYxyb0e%O@&X7Ru8}pyv44)szSrhJe=a-LDA~OOtH9kA+1ge@; zB2;n$4hq$CfJH13cuzAskT|*Fp3yv#DpZ)xbBfk+&Y>8e`Sso*a@n`NlQggHX^zf# zGJT&u^}PTxvbB)a)f|!@S^f9h=&8VHjq@JLP(uzZ1txH;Nzdb%S3x5ZhC>Q67q8sv z^BN1dV$pq%+R~dpcRKn$2%Je&j8Pp0I!>y!L@_Q~7T-hfcai4&3g|=~{;`X!{LVmw*qEl9Y7F zufQ-|YvZDhV~Yo@m!2$ONRWWBG&X$rZgyFlLKy3}kz>Bsc0)Qtn96KIl>d+ ztb6y^Dg6pR`+bI?*3Oe`l+{*DJ}VnZp8I>#(4brJ*dudz-XE{J3(%Ep27b#85PNdl zm>MXG4*|zW^Zv!B!xPvPMF+5htv17r7KX>$*drESZnip27SEHtC3s+bgHE-`ec)!p z%NXyUF&3%yp(IP3oF5>o8U#6~X-n(4qkq1MeZ&$49k(tD(V%S|=E{dM9_2)FLjwL+ z3y0Z*=f8h?KiLOM`-JnD*|n3Iogmly@+UUyYhCMv0ykPxzq1!beKIb7%P}j;z_9VP z@q!R8?jx728Pk_Kc?pLr^Kt+6GD+_&5W^*cUq(I_OHoOQ-+n@V?rXKNp*?TR&_M`q z>*wWpL!9)jII=VHcg}pFFmyAfpG=n@9!zZD(q3)$P29rPXsD8e_UHVzBQ@3@B7X*x zNGK3mVXSM}dLR_|SWt}N1)XV$>qs&iuM2&|zEFsdK9&L#!>U8WeET(f=ym^Kf3=y& zx@rNcM^_U+Jj*{KG~|-TGc}zP6*wSz{GIE-G86+O12{4)q8u9ZFXi3-65bZVL-P=! z+({zXczAexW)ti*qHb8v2>CA^Rn9`gsw??RN^j^qG6FScxzSe3j0bu;$^*1w#LWi*VteP9RQG~mz>D6lNit*!j_nH$ zoa;4s07L-oJDub12lufC8xzF?kPyQ4=i;fcpd#l3?N;0CeINo^ z<(dA0a^)fRa3e_xbq@w@7ZKy~q-A@H6o8Rrp%jiY zC&X0_a9|ri+^`E<7|Pc#um@E4aknkq%>b*!0Qz&HtAgU^x!k26w(jdcf2+L-eE+mG zmPE&q4{IEtY7%Fg^$t`Gas+g&3wiP4DBvsK{`yKTJg%#6j-i27TROcfd3guuREczr zZSZC7%7+@%olUf1OD>5dr#xjQmVY+pI?&^dxw*KDRf%}LG&g42>5>wIOnV8?+3r8_ zUhT<*cf|8>Wgt+e#t)>~AfPFi-30Ixzr4c->Oz-1+)LIcx-%r9?%7iP;}p=1Ba~o= z>6zz#Fj<^;p^??d8Cp4}=6u3+PDv(CKOoy}SDm`^UFZAd{;8u=x1|S%Ie_hS*#Fq^ zvDbugCE{pP9Snsz(FW!s3072Dm-T)P@7_EQKLT-;^*&1HPW;mf)3)&EmD2}%5$k2( zM*sXLm8ziI5v$tRl6m2TIiama1K)TfpWw&u*h6p4G$n8DypDQ9cu7~C1NAFNe9Pcf z1%Bl8%?rXO(aRQy;bnwc>k1g?05J~^kdG5ay=xU%lBH&yO~REqslJKYJ_!DiL?$&wD7Zf2noZZqra`vVYP?BmhNV= zT0!8UUSkNKHOSiHao-RwN?tWaoCMlbe9Zc7UF%_=iW52=zuz~uQQ?!c6EE~+>x|-q zv7vPuH7A<#J14+dWvraheeBz{GNJCi*KdC-mgXxWecgo~MN=1WlIS6T*wRyK1@|b) zs~ow_O3}MpWv~Wg&%;8V=p!ky5O>2J#*~0KNZ^g}q#Wd1-{P{&op?}FD=ieUF4BCs zcJSbx)l;lklZ*9DT?*{@folivG(Fxz0@fG};xmA>qxj;*L%S&L)~0XI_RIDH+Gbcd z*V~KSnyr&HKT%pDp8+_D^dn#`Wv@MGP$%z>J^(TY*}x*d#u17N9nTrI8#pUAWLMid zMWTBb3ykt3`+Z_HqK1q_`pDOG1>0X`pt;*;$h{=K8JSpPg}Y%q+tiO_#1^H9onIEJ z;Vr3BRvkXjjwTmTW_c6RSgxLjz7oUc&tocFO+2KY>r^;h+|{=yfT>t$TQR9zsj6{y z6|ed0gUt(Mc2JaM2IBerxZqL6f2YLY&N+k7kc_BOZj=-r1gg)esKwod0L6ZI`BQzW zfRMuK>L?bR=OTst@DCg~x2||DH*STiN0JA551r~X>YOD%cQ(&!jO=BBIhLlTCZB07 z)~Ddu!|7h<;MjV6H-90>2u+v7*nmB?3*)?(4mT{x)f=*~%%Y-Kd!1RXdqfCMm%(S`a00or9}@<$#a;< zg18%#9Ef{98ywYhVu=hda}Z1#2lInlSVmO>eZ0D8*4*WM7N{b~(!7pfVYXUGhIWXM z4P#}TypZZmG9V$d-yz0gLmM2~z6Fjz^B!Se?IdPXES>g!4`{y5vlvUOS#uelD#qG2 zg`CayP&lLcYznz7ZurZSyvzHK15P}2OPiU`L(IJp;PcR_utGtLqB~1!gt;6 zr;rQY^-B!wo`P_pd~2VoHfHG*luO=W&0k^wV=t@Gh23~->E-Z?jHaI;!V`sd?b>i@ zmMDICL4W5&h0sRv3%)_>F6kbuzs%T7H5}83kvOs(w_y84GQ*ykMQ>FDn6K{_v8LuF z2Zt3vg9L&GDJi?RK7~BtqwRekW?@w>yQFqrNJPaTZC5w0RD&^n1+;-GRdnZpHqD0L zwZiUCfIB10AaIkwjIdS8DCAZ6)zxcPpI^^am2Y+Xu;5Ad6E}t_qbL6P$!fZzAH!wC zUNsBz9|#UQ*&g@^U9zz$a$9A1w7JbKq|hZd;5hD9e~4qiu8?Aw`#~y0|14;5;w0F? z5S`$r?gxM$_&t?g%>3CTrYFV=i(9=}LsA0vEofGypBZVYN;s|01A+G9!w6Eps_iq5 zoZ~V_i}*blarD#S2m%wX9`vmYXvYB#skg#h2w^rkV?S+*Gd@vu!1OXK!qw z99pj%8Ke{6QcSv6fOH1E&Bkg#L1#@1e1=QEkF5}Sd!{7VuK=N+%!S{@18Vzt;JF`~ z5{X@RPC8Bn4_*)-crjWX zx8kr)Mt1fwS6A11Z)_lgiHQv^cvF&-C#9vy6tAwXN?TZ@6@-nAnUPaaJlp-aa8&O` zcx-IY%8I=fCp$Y%a&ofP8@`;yIo;E31O0n_37BpuncAQ=8CP);o-de<*>DgR;v-=h zO|g|HZ%>x|n7l#qyp;4fL86qM%vsu2;sEg#l3G>XOQ20P$h-Ap9J;=wfym#k6EeCA z+t!ydao>@Jx)kQ0=R@t^xe~~QLGv1V0G?jE=0pFncR=FwiHoD@{stXHK7hdrp11n= z)EgYzS6_Q`u2KQ&St*@_# z95={D3+(%*lX}es`0wC&vk}_=>*%z<#e|b3d9h)c5h=^buUCV%IKi5{CcuB~-qGW~ z?IqaI5oH&$d$F&pPQv0#FrFB?8Nfenh^ZOHkms{quA@5XTm-3T((2vySUiJlivDDK z=-Fce`M)e{7*MwaHPMWGeC>TjWm3_=>65CeIme9>c|7HFXM#FS8qIF|{S?MF09c`} zIAJaX^9aa;I~6P�=fIDj*0*I^ukoj}IQHm>254g{k;}?A3Tg_g-+{ZXPp>6>~>y zg;!*|NcWz*_>w#N*PK6q$|5`PY74g298%r&3Xm~81l)nQBALLw9tZgK%G%iYcmXt? z=*l(Rj&tfJDSR9qodOuRkjGAmNS1OaMwC5x)6(CrW!|0tLh-qAV_Cev}hYlFE`zEDSWapCDt zz>769Le2LZN2>RCd8_vK?sYFh2}U5mwPnxacQtA<;8Zue>>UcbVb9V}6#QgI*Ipm8 z1u5y>W4as=%bXDPXHME=34I%fv59@%nMo^H%(Rypi;}MP5oKQLpoH@OA>u=h7QL4s z*0h^wBstd(-QmrEdTaeqakB9Kwv*#A9S@JHt!d!nJfP{d%)KJlXb%i%1oWzh?L%7` z2JbeY<#jJ6x_##mgXs>0qh~GbAREnY0xT$eV?w+VIgr772K}5!_kBVD!w#uEm@;3< zv7S?-Ivz*BNy&V)EOMst#6Y5K&9hFSVe4q-PyO`-=ni-_^IZ6);zuGew=b}Ae#ix32PHJd%uKk0L-`%hOEB8x2(avAH8mqu9HJN$zbp*ARoe|WR6IlXlhJvmk>BnOCGLQQ z2Xw2wyc!j+2H~*`@Slk(D6}jE?b|-V4GdXS2ey0Hpmw%yvPWYaYa=QsD4@sfEG#VS zTAZXV@h3R>MdV7)Did4?qatdWz+4d1X=`EO<48Jlk(CfpT9?7VdrAdB&83cLISo=x z{27%`^mZvn%O_OAUY6^srdOpsoS8Nw1lw8=94EsH&)RZjoe0_l!nfv-C6g>qWK29G zY1+p`33?*2B`>=|dzM}p|8RA3Vu-fz?re|B{m;CXfPR-i3nw5ELorwd?B4fYn9jXm!PXQ6B}FoEzilx$#C(y24XR-+^0{UzFc?>T^i)B1ej(v?x0Dv zsH&;4we|c}6(?zx1hM`(4TlaB1hUgcow?r5Aa8q$v9C2=G-S7_m3c-w)F8~4kGdp)I!lecF%=>k#>qG- z?4(`(mU;ULCBu!`adzCCpdV~)_qJ{CUvr0i(6!9_)+xvIeiRQv*@N~w-_h15MwMPeC(A$FYTDdV71@cYu_2HH1-2%wRs7Z*D8x*m?eo)_pcn0gIz@ zU?_4zdy9N38G}j`%x8QT}+g5_M@%di}Z;0L&Z^>>@KimIh|tu3PBZFYolmqC>@VmzMTdC=}5o{w@6jj$z-7-9@SUTvV5c-@!F z&(CkVE0c^RAe?FAeWMLI?#qdn`M}P7$uJ6x0$-Y=*KBtHcWo7Y(E^_*Y8gr|5el@j z;_*~qrhUJj2j%fu>av*B(ld~;gXF49D~;#o=+SW-;^UoALzC(h(nw-%pZ$D}E`oVf zSvn3eC36aHMDSg5ddX#2c4fCaS9RT z2B`JNI1gE8pnXhm3A7B=+%3RTBW-*(ykT~tkorrr#x>H%3KxV6Ri#eit{;=FJ%gK- z));@@exERC9TeX6JD-HiZle&sUc~M(icGQqo=_@|TJOzA5;k3lD$JRA>ui4WY8 za|3NK9mX4IELlrvcoH@AsV^7?!V+X%sb2n6h!{Fkc(YzJf>v$C;C;NnlLu+dj4j}+ z;NjZo{(zuIcK_zb<8GJ06j}}byn^h25ekK7V*(DRd40o{dcA+?qxbQ~JUeWA>*1`Q zKcB`*%C}yEh>RAJap8soJ27X=;!0xFK$>EBi62gd!%ctKS5Xz=uE*p2`l*pEk0{Q> z8KdJ3>Ir{?$g zH^llKnMH4=HGWNm_BJ1MKM7bo3J$)~P^3i|uDm@5Q!wpfotcNA`^)$1eD|DI z8y!9l+dMTL7m7^=zx-3;5HdP2|VHt(AC^}wlF`zABD~? z%R+baD8T@Bx4eIM04T@7hqU#tHjyt;c;^RO28fFtEmaU?;LQwslVNCtw}~Am_^>^@ zJ3x4o5CGmwHY+M$5>i7iwS~*d1vO<-!$YSREyI~(CpuP|V@g_-C#6<_Gu`__p`6(w z!aNnqJ6`fc#VlQP^3fHkQg(7`RK=B!NNoj+XWog#0}#WM(6LCSjmKgz`CNOO9!B7Z z?`@iEsfB)vWG*{a2X`u7$e*uCGvkFb46P9)g1NKl1zEvL^;YcmSTP1Ro-lopQ@T{J zPM@D30&y^lnH%Ac*MI>W((q8ByqPh`!2QnbvO`ZoOIzht5CP{rBDoAhP%2*xMMDAL zPV%~nobM+w@!)*#@B3Za%rqu}oFhQjhan)pG0cMO*arS3JxS{3-S_Hr#7C+-46rC@ z)TzEw$-c;xh-z!btli(3Rgc4cJ+FNK(_z=oa06XFo7uASAa~=8V&2Sk=#N_;G+`;| z=Z+Uf2fVHVkr&PSc5;nOF|X8~osqquUvF80UpJ@=X&T0Iy$M1pj>6Z{Rnx`k zeJ2VV{g#xXxik!$FL2pgJt}6BvyBITQVU9jB~b6;Tc8yh>sEjibmPg8A=D9vr`Dv& z+cH-RKCY+`J7-nuRb?L`YB0eQ4V1lCCYKgG_;t|4U&o_Ck71n8rQLjO)Pqs%jEYlb zfJZkaRJ-iaVZ$MaX=h4e8R)Qy7;e1=@47y2lfLxDhc!eE4ZOn8%xpMQzn^JMkGF6ba zS+4#l1p6%~6x%wnkWx0n`OQ?4jq_x>l`)TkySCs)ZBf5wb)Erz}vQOv8dL0Fc;jSQEdVRai%$2tX8PqM3i?!v*+b zvX0CcM?Uy)+p5hVFUzLXm_hZ0ifpOp%WqRTbqHA+%d-3U3%cs4pDG+5-I~p7fV!qG zFIFy~u4dZFN$Kf5+ZtT*D<6Eg0_D!H)ay0v9HAH0?#4Knh09^HU71mzyn_Z+%!C#` zM&DCS^Smle*-0#gN&233IW)HnbZ+M|VkI+wWhsi>z8p#`dTmE{;YNR{!znW}v#!Kv zbX05`<&7HlJzx>wq+cU z@AW|(jLHRQsIdi35v#kCo^v`Tc-F@AkR>~TqNJWlm+M7k)trHIg1se`QN(d?-+wPz zCF7A*%@NJ%F~|N3@?ArKMB7&R_Zje7-vjr-dQLSoCi2j`BIqDU3>`7N+!c7!L5b_W zx%I5>p1i^F!nBNxRN#ixGq=D@rw3>Z!;6^~L)OC8dUezg0KkRf<~MIZ?)!Hkhn8V){R8{LMoGc;7#MI7Pz+p<`cH2JPe2WDbs zkoUxWScGQ(3YR>FRM zvlAsx#igsl7kl#(m&0-^*Y(XC)m^!AkLf9IM;cN>2q-NtAE46N*48FxZGC!z$%W&M zFE)Jcor>in37&hd-Fb775knI875ZATPQElkwuhh`=HaHHQ@>^X_gp-^i51uWcgzwE z4GmWbZdP`Z7u1UjUKtQ{}xTl?}o z>x?v|A@`|MFq9;Dg2AoMtP*70kK57R7v;8`C7m+UoM57cB`V%Mcgr(-7neBx<`qGw3AopsOTcrUJ zNamB;7wSM}M)=hoA5@LRQ~UmX_XU2gFW>~ERGu3TC`O+2$=MGYD0c#di2hvNs;x&g z0mFYZ;Qy|grRBSrnxGq@RqaW=oT(g&lu!@e9rdBKv@{b?ERXPAR{C0^XJluUu+VC8s9K4;ScB&{c5F zUZ)vKi5O?uXDv#suH2I3ZB+(;I72m<+7#3Bk#hFV0`sMeCW@+t>0 zGX~9UwJb182beEzy{@tX)8x{VsMebYdUQ9J+Y0z`>NOm&hpNj3{7_MPiD7Gr#@CYT zRmTgV6yY-`2)m1nZ5OV6n7{XFjt1Cr-aEdpt*UOirJSG>`vtGspt{KO#-Y&O_htBp zCF~0K(mm)x&`)6m>5JoTeQE})UELZj( z=nOZ+jwp$QZ(AxmH$HW;hB$E^Sn(-N^*g{6(QaT9_5ASV$5TFOW*vX7jw!Hc_`+i? z(JK%XNco%>Hp;e-7*c{+FLBGm(OFwCI_M2v9Wb-+8lCg!dXw!dJ_{yvs?1(H0rA;> zo*ThI;EN054dfY65(Z*Er^NZRcas0;wJX98v)zbN`M&M&9xgh+oW}iNL`U1DHNRz7 z8s*p@Zzt}jKkE6Gv9=77vxR5B^9_ZGiM;a{xU#{;=3>&AOQReFW1+D+2IYSy{76X@=GSG>@Hc z9uBW-Zvr4WZ6T;18jvmP`<*>6fjD-}CCdkqwIcYW3dcr<*x2~Bp`h`5nk^r^A220+MeLlOJ^ zKwnP4Jg+(D>fW<4HBGkju>YY4e8et(3vX}lp64qL^vGdM#c#V&j+g-hkA5CPi~NF6 zv`>!XFF8i$F$=bhBSyPTOpCF;1XPLr0Lw>Q<^mQCMw=Ce9_pyaN9N;mVM(|(=>i$l@)1Vma z>B*}o;B7|<${}9_-v={H$~c1H*$qx3Gmfx1jCTwqzq z>?G&}(_3}VhSfgBcf{2;Z;+3(7CEwn-#)ohB*Nj(rEe%4T7DXBy2Ssu>DZ)i3H&@2 z&MQ8Mvdiih3Ja}^Ckg|muKms7u~xbKJ4ey>dDXN1196~qE<(}hj4=rzISN}{mLErn zJ_2b~AZpS>VlmGwII;CFkUHX{pY?}hw=cupi zT`u>MMG&Or%5xij$In_U`)PfnL0JtUl;ancc=N&w;q2~$zxLs;1fG{Dy2`|^bDR@h zRW0#hreDeV5RsVIpAHs3Z3jb2kSOJ%%cifkehK3N9B~!?0bc}uZ4rGuiFZxy1AwBZ zBCJ-Es}OSiw%0iCwN+zE?~}<2^uChcu^Jh+GCXzZK63OV(Q1zVTFviV*Y-z?cxX!MZ_bxJRodK$U2D^4~n1Fga5wniepF5L>fy-xxpXQ z03x5D=gd)!#B;637GR^l$GeUQcjtQ^r535dAMP7vBc3BL?;zPWmgUge%~%u9)t&s& z3caE|2|kv9=fp!)EbfJQ9=5mO(W55<0C&oou*u3LEOJJ6~$X`9`Tpal{@LhAEKie2Nu z7Mn!Db7G(BDjcv)uI9&!I-5a68+z^<@J?ztydk1VhK>}fYwzmU(dycQiaAVP!N0kh zu8TX>Ajgs_K#v-V{zj+p^BGW+?jn@1*Ftnd{l_I zIJqzBDMVz=F4E)dC-KF8aaBt1IQx^Gms9IKF3vGOW6AKh$5gh**?Hv@>~fz!q~9SX zCY|YuYi=b^%Fa$3GsHjU?J1exy%$&*HZuLyVRVWU-C}RGwt<>$WqP0 zUzGl$a;NlKQg>I&7SWwcTkC$`Z*beV9z8cOq^F|Rvu}=fmdLCwP2k+;n)d5IXdFLe zD%{z1^TJb36R2Fb?3%r{YrJLi^d6_`?d{`^A`-q`v&OGye*VmGGVSid4|ZggoEEWo zV0f`Ew8rY=Lci`_`rPn1f{xlAHa5G!0sr9yXOgl3+F77#2=dixGNL5s| z%8Vt~EimBA7?cxPrPJ2Q*s{EqXudi{^wLXCDl#eH#Ch)81>=N;q-`gyN@-@DUl6CWVoSQ#@n82RHGF&ci^PxD{i%DmaP<^}R`cx@Z<0?XNH(*! zC4HVuz2#(9hr<5N^V7)5HhS~PlK7!t+7|r-8c+U- z09Ik^CiPmf6RD<)pXIH8(l^EwK2cJ*$hOX%;CompGEAecM4ZOe@WIAXs*y$g(SOa_dN+L3~ zt4yLC-(%04P$lPwvbc6dNck;Z$=bY6Nft- zb>8~B8_299|bgQn`C^}|B28YEnJF6avfz=Wk=j=Xr4$u%X-8$iW_z4ZJ z-UR=t^Y z2I|7;TM_h(jKlYjlET*wYr}Vz7EoZvAuKyM!QuAtLZPXK9`bwuMEn*HtR>!4a7@hh zgBZ(oCvvf*Z@A63yzHIVC>K%ux{Tk`R;o}O!5qaiy?tTq#G1f5BcxSgIyEt+Q%Go@ z+|t=k$mMEaJnr@Cc}NA<`$(rk|XtP6MK)c4|_6hOdo+Qki9aG zPyB39iZ;d~^QrzbaizUA&z?Q4M)4gYpIS4L=yvL56&`O-lb2UrCaNMTqC~?zt&ifJ zU4O+qGNH>|8Y>;Dbk^B7F5P<~@e4fm731Rb!CA7X4hs8+VeX%8?lfv@>jwuLdNS;H z@jNEK##pxC7m)c?mi98MY-mMP>SCI%Ta2X*CtI=C4~omxXL;sy3NF8oHWOM={Lc2K z0wZT}i+Xa>Qg>O=q46G5dUrfMV`jH!w$mx+lgSmz-BGq3pJ|!2>9J})hszF^r;oQ) zbyA71oHRReS9mJIlGV?ew)^Y87lUtIx-Lz}%#i2U*Nwc(iM=yB#F}}YpQ1-!x9%fl zqLt6K1Q@e9Ku7h~K&7b9PnUj3*z^3}1c1At1;7SEfy_uS9In-mxSK8iKU~jkstZnSlVmb1nj)Lmz6y1QYG^bUc#KkDewR_?- z?bWr$`=g)azgDTQb&W+ktG^zKp9(f_>eFZEXuc?8kAK(0{oDk5R>>OZHp!|I^-k2U7j_{o|!YL==*hl})ljMr4z02Z!Rw z-i|#h6gf88*?S$?tCB6UH)T6k#xaiddmVl5&vjq-b$#yZy6(Sze|-Bxbnne1`*7@y9t=HYutH8*C#A5XmvI#lYjNq(!lwxQKHRd085 zmpd-cn|in^R@kOTcXOb5Z)hFHL+!GgTCQ{G%sopB-PzgQc0L|)*sM=|wS`pUNEnf; zHzb*8FQRoL*RcYjm{CF`_i!E+^1Uu7gQC0;3o;uX*KPo5ih21*RqOBgZ&6JHe2sSMCGRB)vct$EN(cM8i(YCR z{v7irOcsG1r9KOqLIR^EH*SVMBlya3iKWxP;vH~8ooPv9yzH(GbyqzOp}X?{XEJ3x z-8&}2#fb7Gez(zwX?-z}>(%Ka*O0rK2yJ}2&gAKq=hG?Nxo&Mmse~8U5TtyZpq!)6 zW?V3oVx#0I^%H?(IIosbn^8(E_mDTZ)2qZ`ULjw-7#w=%vC9Pq!3Htw1;D-Ff&TmY zsdVY!VpR0a+<Tv^4 z+^?8!Jz?b((vn}t$@O%KH*ced&#e<3&>J?qdt%jl1$jdT@k03H3}^Yu*Qd94sM5IX zTs~SIE~rvLL}dAiu7=9KN|qY5N%*l8Z$Yl`eBpfJ80N+2*{bgwD3b?`aOkudPc4 z>%B(NhQMCGLN_`HvJ-2mkV7u;?8Z}?F9PB=}HR;c4!FCVoE~+TX0rf&W z(*N$xOb6TFJ>`o*6x&)2%%Si|WmT%Hs_0?iv;GbU8#@bFFMKRN?8}tV$`dlFMz&v7 zMqO^QuI*3Wz&D`A1Q%{Y#jdi(K6LE1Q<5S(kZ>+)>s1dg@19C9HnVd@*N&X5JwxkN z6J>4Ru%xb$)3=IkOuSmHCxtOT{e|;*cm)$jws-s;vnMkhb%VtN_rV)9x z&IFlS)Q5sfUj{+JVN541RR*-Xf9I$8G;IE`iOSnIE&zC^{jh)2Pus1(RoyBmg}~>H zsyIEqjrXmnr1%DUAk@J0X zd7C-Yvv|f{qxJNkljvTX!x!}#%WHbAe2}j!n&)C#M6|HWo*i5#t~QV)`051ddiu`X zmmdAipMz4o;RbW1XE7#!+PGxwxv^q+dPjvPg!umJ7JiA!)P!!zKq_Kp*>DQyfVinq zqjieps}QTW;#7psGu*jGeESftijl4a`lQ--;4m)<7|%bLH!k z`5?rP%NzgIxA^@RlK92A^~p}-gN9Q22ayc3!aJw^MyI;C5ZJHtbm2Mu%a6ba_!ymX z^;fS!GLHN3SXE4Zr$8W$qfd<{8>V`Ob~D2=(NJ23()nv- zxk?-9_gt9Yn70{q+G{!Z)vhMri(2nlm$cJRmW|Epa3*Pit?MD`eUu&p2O-a9{?jAG zIgyVRS(pN?!&gqF%7`P5&Y%6F5bPN4;vjN=k1MdiFZC<`x<80>v(|C%b0S_pKWAI|U$p=n z4s(fAn^{B4@v#G39Aqmvb&<_A;l;zQpxUq0Dc9OX8NNEAdI_|fciLp31b)jtaQXoe zanlZ4U&ia+U2f7glSr$l{wV^um<^P_Su!mwmB%e(ZcjrNDlZ_AhcaV6bQi0v;P&UB zzQXlLBWahc`pL%0jnf97B#eH8XDs(m=?jfgUoZOkXA=?T!|A$NqG@{E9QJC(VC7_IlD4fD`^%D zA(+tO0}*lYo!gN%o3{j-q?AI9^FjF?@e3aWA|EtXA8}V2Og3yanoiT!pv2Rj^CnTu ztgE}3AYM=F5EvXaMtzue;4^hkZxRHSW>%t39v1n7+H*>eE?0Q-pUbW|c~)S#E+;Gt z>M@MR=MdD@mVwRKQ;I7x4F<-bQGgB-_EtLJu5$dX&@?HA z;rDS+=ykfrxd8e?lFQl!ZvQvWFFRo9E^E(R7US@~MHlj2BoTh+w{81}$p^V)_@M z79r%mGxrzq4eeDvJcN?u-r}xK`MqnoY=IonF2_5!8i*bdn{Hg@8Lobomw$2cW3fBU3g>bz{<6xSBn36%E4@q<7V%C0UW0MV2$EW7vTfz z`@VyjIs&Pwn=}!-M4S>6339KHJTBPKX;eT6^=f&ZFFd_9j^%4Fi$x+R87~Udiz| zvqn8A(2P6k2UUR$rJ>kZo#kOehgiSM(MzX0GQ z%yEM}r?<`sn(=@Hxq7r|PNlyYV_}%REhSff!k_)lD-%>?8$!%m7lkZ`I5M(0Y?BanoN>rAnU@Hw?rozJA8r06I z`rb{#D>2NSX>3Z{{-E-v_G@tT;rWVS=omH@EL?y8YN>{Lb-hVl1Okl8IvtE2iYmE?4s2{*1a5ExYBQG1xOW@RVo_;6Od7$W$B$l(eMK(rb5wUI!REsC zDEy*~TGwPd(#+{$heGh+Vug-%ybIA2tN3D6D`10x{W6bS`^mon*aI0&LHg=T!r~2> zI%i!#j_5oZwbn2&*cBhvep);(G3yE73XJO=MB%9tnrbEaeGXSHTFeSUSGoP28x$^$O;W4X4h9=eQ-RAHxljB6n@P=9KK?B6q$fu!y=);0kQNZ^i z3Ms&1{hJh77?c2+@JK;IjF)FR|`!!92-3Fgc*y>5r0M2^r-Gc`qg*fq_<3WN^b&u7}YUk{WM`1sOMY=q8cw(@TVsgQ3O6=pw_pqhz<5B zW<22<(QTD zqr#S^IlSCaZW3K5D$G_lGO~H@0Y@OyNo{~ zjXDXLJSNckfL>149(Gy8(#z%lHoe?<%OUQdJEeOn_&Sy54do)L;0%Gv_mlE;BH9_= z3k~xvfp;sa2n*LSw;Uif=Fb7rn!u|YqGGAZqeIIhe5DaB5sK}ZubKH|vRk!ND*Mw` zmCl2he(-_s>lfO0YNke?X4Vy43~n8Gy($;cm$PSHPjE;!UC@ICvZ6fYt=iIFc2^0*>6IO~@xYxGDB^V8G{Yojd&E7o-_ zr3-7*1}zHJ@?+SFq=_$%2CrIu}}=Z$pR>e@@5RlVOcDYo|$ z_h5c1f$ zu*tk0xuS`DvL#2dWK4bMbPW8o&OkgJQFq(HZF2Jdwcs(6RA)UZ&Sb>lmQL?laG*0s zgb8;F3Zu}Jz8{Yd$wS=i=)Tlbi*jibmL2dCy4&of{lrsOkXp-)v%GxF{F$mgUF+CR(n>*AUTLGyM}qEdX_5*wUz~POBYMS@fg%04k-0b9z_m@oH>_Es z+g((WY$f0$xU}=UB-s@H#p@F1&WGZ><`^<~XKg6XrPqkrTw)&7wQ>l^*b{=x^n1uV z=_8{StYac3&}j{Ro;x(;=)@XEul@2eN@zrp7z6CU9(r^QvSWx$H_9B}Psp&|xWRNc zH%V;WVtB9k&6s~$kZ`>9NE8IK9gw*t=QqV~_i^WR`9_4>M5v2@PI0ZF7A-MF;UVh& z*44L)ol$)g%4${C4L7ZhwYG-5(h^iZgTu5X8YKezrzk|cWAxZqjd26wvK=vTAAE;D z4JUbDU^2t3=jv$v9Jf0SiiSdSK-Dl=k_^0k?HcXiP_h zCu`e*mUFj{))m2Z<82UBM$L7&gra+`b8TIzd^Vgz8_fl}YLuP1<5R`eMLb$2OLZCP zzidoD3r?nCWvZrhs(-y>aDP{u9ZCmtqzgXu;?MIQk}g)r{uy~p^D6HL4Kl~=u{5)* zm2CBZmUdOaB(2&;h`Y~iG9M|j`8-d}W#Jcj1P`*;327C!!ODN5+plKU9yct*u&kq z;v8LY*>LPAo$ooE7Vy9pr=lQ~D+8RUxz{#La5!zOJgs4KeD9rX{YjL8q6ylYwaAA38@9_L$N4U!CH9 zC2$s%%$Z$4Emdp!Byg~4Y-!OOlRSprZQMOJT%u-~k|fgPe#<+(i)1KW0%67K5m(6kR%!3%O6g}1hgNC?}X}H#aOY3gOLh=#Y*vp7h_-b6Xtxa><5|!BxGTvx+ux#=Ya~TM8DH2U;J5DtcYtIYk1e4ZxVYt4OaE z$!&cN;j2%p|h_Om1nBhnBv;%FD*|z zhz1lM?$zx>6FwPiNDhDf-b3}#lLVU9<%T|YL_aO(k$S}nZZVyB3hP>^P+9Nwf%Pb8 zut7NW_No3x8*6zaeXf_!vd?;dU&Zj|{f*w_Zvh;6n=|$KjEgI+jJ9j7XlLEWt(kIx ztJ~;fevjA&R>~KXMXfWOuu-u+`O&RrFL+Pl!IgT)*7{YwgXEk$Kg>37e%;526sGO_ zdhWUU#F`)anJ&*xYm^$dk&s5Rl;$Qea+L>v&2%C|fBVqQz%?xLx{lNbh}9DoqVjYW zHZBOF(bOZIVcoO*TI(V%^^q1}EAyfivZ_VJanVp!Au!D6^cT zyUJU|WyblyJq7aVTVsME9==svV|%-HsNkvV;-wVL4P?Z> z6p%OM7YtME8}}ajbD-X^Xb$*qH6Lopx1YcY_j9s2R-Q#yo;(7-ruau#}8XQj0$$B+iFf;b5vWJ zw&yiMvud?}#$2stX#kVjw`q@>bE8H>YrGK|&XkigB!;H~gXIj1`yMW?M{l+GbE0jNi#IZbe!6(XPpJ0I+}Jk^ zM6D;y7bj==Wa1R_RC4SJN2U8^Uj5agtT&6UihBC3dHt9KIpXDiE9EVawzu)Pbir{6 zgSr-hmBL2orP3NyplE>MajA;(%XuAV+}mQ1%_WK*Pgau^BcU|1c&UBzjzPF*~23 z$0CnCqcbOe?)`R(WgOykI2g0gHE~54Wta<cfJ%iS%yONT{rS=j)91DP2`x^0;^kwrUN2U zrfb=3X)6DDsJbR@WR8HsaYe0IobhLFF)43%*vmvWO!^;I=6Ec`5`HvLc~_#%=v$A1 zwImt4`+)SP+H-XHwpYMq>At?Q;GrH3x7K}9t*ph%wV@EUZQ5m4)O21>dS-m>tCkM7 z>qwp6?c((SpGme_^2RKeKq^7&@~?N{zlzV}tB@~r#m@6S8Rq=TSP?n9F*%jPkM2h* zCZ~J|wd<_e=U?XAA;A~DT%0G|p+s$-r69BG79o0I9h#KuVK?CQ040_>>y3W(H5-mU z^?_>aK3%XBxjn4wfG-wYf8^fY)4L;DSwyqN< zY;Cy9`sJlJ52)7X0TrfTr7%eZtBOUwT*CxDMkUKX*UpKSMwfAc3YBN)Jrl#2eBbl= z`zCG@~(g55(2ZIame zNi&g_z4>YDF{+ufKk!YyXW9y&YAPyZv>F|&VFcAr#f`HgYImp8)+DC|`Dte!yyHl5?(2f?&31hu7n}$HleRwTKuzy>HU|~@(peE~;%h0Hy`m^P= zhr><^{Ftr09O*UHAJOMl@^M*AD{UrXMRRYSrb${Q^k#X#X7sz&Z6=Q`J@0FNf!{8H zXRH+rF~UmOJWg2)+kihz6CFsqAVvHHd})IJ{MWyg5V<^+EkY)~Y2d6E>>Mg(`WE>UPI8_^U; z<&->Ee<43E#(XJ%{KLoEqv*G3T#ImSwkQn|4I`3%N_6%5D@xb~vJs`|DGNOf+0EML z23EC^f`1kKZB!uxth0@l1?S23E!L?<8NuGH!&?+?lRgD}i=|w2il5{!jDZHx5<YY5jWoY*1wFGRi5Sl?{K~HuI6xL`(r}>#8Vl=N!w_RuT@!e*kL~| zaqF0O8$%^yd?PP^d<6b-zzp7^3pUh$_|CTnm^m@^%{u_n*A2k;yE?XLWgWOpIYu@~ zFloZXEO^T0x#lCNf;Xd2Fcs^_-BNjqhzYlnhRRnnO6TDx)irg{XoQqcR+I0usE&w- ztly^7Lh%&kHaO)eT0eGEQ4{6W~R?(9K8geCYDXsLgku=B0^VzhT2;-EApqC>e8fEk$0_ zE!>8oL0Qne-`MI?>!Vl60F07*FaFB%P?Hjac;liK7r?^yB_mfEFjE;T$+Pz;Qs2mO zHlg7JUZ2DO*O~-5c!k8CyvJ|*NV>#~2f^V~@W36w0|3AKC%}I;W1o24E)SRxb!~yV zMWA>pfR%k=lT}W*ro-52AU3G!wWEyBSiOC{f1z%O0fzU$T!Iofek|FBA;if2=H@2c zeGY(9UQ!}@AU8jmB-%&}R=!z*vHIDO08^ShNSIO$sL4Wptc5Z!WH`_>`ME3pVq(QB`gpyb_qLTitf?Y|WN+kzJJOV{>AFwi2e zBhS$(J}7kF?~2CyC)n7tU;y*8Fqp~q*lO~z`?NX?M0YaZ)-WfetRn9j%n4d=l- zDTnB>WBsm$Q{0-fbU4dR-+ zh5NNO%vk^VaFB>3A@QerEx)D-ay9y`+D8w6Z?Pg}Z3;o_!pDzg05lnIVFJO6Dlfn0 zNG;1YH3ho;frnQdHP8~z0Ah`lWTnp{_yq95h^ODJH_Z#o=D;(~hFUrhOOsX1|J{24|ndyr7W6fURBvHsC*; zT7~bx*ukVD(tOue-x_OG8?r@ND5SU|+Hyu@ymFG*Q3qeFr>n(Q!r&D!!(G9*Z{F;P zz(&!>oobMX?3LUwW`@Hf->zVpc(7{bQ!F|v*O3<$W1O3y0NoSQ=N3Y-)(kkcKL`Jr zunSJPrfV=>3wvqM-*x*#43pn)Cc{0)fD$!QvLnW<=;_Y=7NHo^SNBaVqSgSYg}Itm zX-&CrR7_39Nt~)YZ$nysbUujz)-@_(;#y77RUIUAqlr0}#x58^vrE2&xO^Llnd+F- zhbO*a{(|Svq&OY;Al#SFd=Pb@_7ZE5KSjB*?j18W0)iEaDo#f!r%bR8k}`?$=Q|fV z-4=sjk~edJo3*VI2M&L8D@9fa*uCbn;dzpS)xgMMG&QBTP z7sm8M@#go-Kx=j^Ya74j@V4K)ha-j!Hk6uQ0gOtx0`R{ccIVXt5W15Hp8dtVyvei( zs7}joPXH6~Y$%VVJ$ma4RL$G95|AYF`WD*qR@2%u{WXe@VNoj3LlV#w>^jjUX|PKy z1!DXPZ8JA+5Yhdp1R$J-X<)bNixl|m;ZPjpOh2&z{e;M0KN7kDbSV#3mrh#FgGj{T zWcv-xi-nPujMGt-F|W4g{9+&i!bq7M!x9y$;rK>fC=m zSm*MqmE2MD#YzQ&Mb5{}^1GQn(K%JH@esj@D*(7Pf?#i*c6U~-09Z|nxyz)$hQ9-I zez%+huJQ&d$`O2!zTzi6n z89N3?%Vp9b3f})!3y>>!`_85MtT=uN&WqQ7VMWmv>8lBB|1G9#bWc@))RW_2r5=3a zf;Ax$cM7l)V^Q7o0tqBWQ5pC!GMpC+aIv9QnN}=l%t~8M{0jtaUuw_Z555t>O7orT zTEO~0FQ0hM3jxX^6HAwXIcR~W6)Zup%+1m&Nk!nre0wkodL;DK|th4MJ?fYkb(nMN-)zY>=@6TZv>A$nwfcy{lQNQ_y>JK zWWlMJ-`)-|6Qt$Wao+=_|B2<;<_(zheYQJ4BG0w@AAJ2Pg?-^ZR!2<7^BQAktdE_s zNoJE}GI;9>R!0b0(1C`uVa-9w<0Oj0pFK*JD;LV(@JGvLWMwJ%h~5`|LZ5D=iUbs#;u2Zle}st0 z7y`(`R0`c-bf)`<*>!#HP3!yL&K|!awx78Q-HkbMZ!DvUwVAbf+>RwnQ}w9+FMCaH{A^^ZeKD0VW861mBu0s zw;sC=!@k59!GCG)`p6WD-x;_4e&A$dDhznz(M$UpWslVJf#p|Rac3QAC#gZx|3?Nz z*NHiJ6gPK17+{E7Vb?7HOv>`n|8-Is?dO4?XH)+l`9~FG*B}bpm2U=ymi+E!>_xn3 zGaLS3nb(n7U>A4-can?;T!o!i7K(bRR1QJa6%H(1M4!p~B5q=017@hVYp8~H6dy{l z+}^B}p7q=09ee8<1vQzN3^bK#!0-pH!Eni3mi9rz5XCim)lzv(;~=qlvxoMwcYX(v zqQYXTJ7Fm+wby!OR*mFUfu3-zo1p0|r^YL*kg#rC9?b^W=8jrEpR%Y0n?5oCEfVUK zyKmRr6?Jv1!a$7(;Upd-KFS#5s(euXIGeYOh7Mo^lU znx;WUO0aTw*wVhhw;>)k0D;Gx_u#k?oLM<`Ts{!t|4jN9s!ng~Nlf*g>5}b2{{8iC zG|BYce*UF2QFfE|?OAj&Z>s_GagVp!?&+@vtD`+?HAwulnF9zK<#y*PrH{we4}PvG zGTKfuW70QWY{K$_j1GPllP`bBdM>~IK+avIM#U$6T4D4L_s{9ZEAGPtCy?tB@Nq6) zHQ;OFx82(nfhBQX;P%X2ww)Uu!luJIcimKiubx9izrk(4<&;cNw`8*R(PdN*GwW?C z$~D}o5e$SyMCFArs&^Sh~ zw@%-kY@KH8G!#)+a1^Xb);vGv>zEwD)%qlS&GD2`uc6A&M*thZYv>0Rcy4=Y<7PD! zeR=nDxGN@#OA2bOLt$wR&+PK4OJSdAuUXPu=JRyu@6-c9MmCXsJwxl%Jr%VY!pbcL z*FwcMlABs7x0zs@Z{G&~VJU#t#Kw(uKx%1Lx;W0_MzAw@t(q<;i6CGa8Y!YvGzvVA zT%I*Icy*3{Jm>x6_T;0OkCb-0>Fu?F9Is9{s3G7iK~c%w`@#nF>BgCPtG`0g^z<30 zDk)LG>A3gmCh=aF$#*7Oxf6_v$#lnFyp_sY$u0YDR;s5qIs@z1%cV@FgBk@#Wh@q0 z-pX4rA5e-7_W=IZncH16kZ0;kbU+eg#Rhxp)JF-C+E@%|n&#lTs8%1G`J7@in477v z)gBG<7;K z1HZoz)7-Kl=|~DHi*8(Or>5vtl!32t_b$@ibx?YUusW6n(Ta=7L2d+Xd@ve zWw0=nGO883|D(<{Q+baktw9UvQ#j?ORx(eieVnT8&9a6W_DY|eo}ocfCA}KMOLKP5 zK#M(^QLpVtO;TiZ`6e4td7`ep9oKrShB@7ND>G?*ymfwb_gko;HW^gLurTzty6p3L z+mleMdsbx;^R23PaNXu~F+W>WWuJ!XfBCzc^S#TK-DfpE4;x zImVB??!!3VfKKpUVN7!mV$eEoQfPK#Vq}ikdPTejvdeK0?A@?I5sI38_?ZnXZiICM zV2~C zlW%~@@E3BE+15s9uY)B7-rnMb6d4*lVAI<1on8F9%bP0ldcC>h6@8H>BqBSpJc8VP zbnhCQMPaJ&!TS5@-UrIFP%Jr#+K5$LkFQB0?X_pUe z$lMbj6!wM%?H*}sHP|<-aNR=tVpmhI!gz|e#{1AuoV2`_nV!CH_NI2Dhd-4L+V4b( zI#U1>BaKdWB!VEk@BR7`o4Ng~bEhU(B7gOljl9{oy3Z;|uo~t=*%^E`qjP+fNXygZ zCb!epB~+QhjG0A>*LCO&~HpUH$$3Z)%vFDNSO-qCN+W{P{iKmH96!7TQ#S5ZC(COhx?erjBA2f|C z8=HpO&j>cQ5lZ*wL+|27aZ=zH_Fi2rd(Gj8&fvd@0CM?@>&)ZCvc~_MRCB%tLDc`$ zlV1igXU33^Yvd{f6L6xx_UoyL=tQtbTN=&{(nLN@0WXK_RsE+f`q4E7$M3h3(Ub!K zKDGQ$jwTI?KfkfiH$)~kYVM`4(lX)`7+9Mz$P}5*15J;<}sLiQxQh4@9_Am zWdd3<<_)gU@`=9c-i#}oAC2XFIL^U3kr!0tWy?f`QiL#p*#Nqu(SQ-?$bQ3~31!-m7=K@Ln#$*2ZEgO)XO zz5A()Srs|Z)}N|-=L%Ce zv9$KeraBE*9yu950R&NN9L<=qw>9S3Bm!xfte%QZOF z)R#T$=_6Gi$YfdS`nVnUI-R1U2&abV-j7eQC!*#+Q#Mcum9Ivpw56_1uVhVxvao}! z0NJUV5|rFxvGu|M2?go+9FA?WSLNz{k&v(f_cmfD zj87Q(d##^Jo<)P7h;hO-12dX1=6&GWbvIP*4zlB- z;sYAoS8|fWQ@zwPSHE*Xezh9whb}P`8j&1H!igZl*HSnjJ<`wZl z_@K$J8|y_PY6ZD7QwAZMb(GoF_VM0;>1Q*or=bgm zOX*5{0)#LA#--n|_Y0(PmjfMbnGd`+V)OQkSg=(lAezSYTS89u*=!%*2mQqlCO5nn z-w5&GU5o-T#Hh!0GP=&S(?MBZeoX}j!M3#Q6|Q9M+WSx3OC+XcXH@WX+9G&Ww7POc zgq5fb;YYSE0wM&Y$yWQrg%}|fy5OstG4-B#QKai;16od=l=ACQg{Sfx9!SO)1W_B= zL~zkzjkcN2UJHPxSmWXt70lt`*-iE z;pSySey)J8xE}Pv8k?0=#rInPAdkN>$JJCmbAvK-d z#_GZyucxgxDfyn#1q;`#9A*d9V#GHaj?)nLEc1YCfps}+IO^dS|yl^6fmN|$LbZP28HDX_+n z+awn*J}C3(47?x8@xRg<+_iSDV_ZyhL}2eyF7y(9M|(aNyPKWY0gEr3tU#?ks4-f$ za}D^Z%9wD0$=0^Em6KUPvk$;+jcpdX{a)HXcb6 zx>V~UGEVnn398OMx^DIj2k*+F0xG%E1l%cf-+t!n&_mJ|goG$idS&GFfB<;+nOqLA zTh=5?jkbyGR|E$yvDY(CF^j5T%BMg0U4>}0cA6$~`3r5Yv}{LC%lAaCaJ|^m7&vAH z@d2Fr($GEFDBq!AT1%F~AL$U~ZtLMMv9Q_Je*GT4x|F-`=@dV-qQq49?r?&ViwL3h z%fqE+YZ#RX;I#hOi%-x36ZtW#x zqq)m(q)kY4L4>#}jT;F3EI2>#q#~ktzBBY;D;bv+v#Z1?J;oj6SHDl&ewPT~sBQ3p z2%H=eie84Bc2pS_M*=P()YzdhR9f`=Js9M8uXFwrD%5GRo8d&|vcW!bxH_74jI+}q zPG@61CZIL?5-$vIM`w~*SJQxTT69P@k6n?eK{*RXv!{ry;KC4-e#yO8lzEj-QBhuS z{hu;9pvnE?(@b6=y{qm+PoUjaCr!+XpdL@)i-xQM_5M$<;o(|D99wBS1v(iY z)|U3t7ZiHNFkBjUqQz#8UU9ierOm3L(78Bi(Y8>IXePI!VnG5(h1YSF)iH|hxVpyn zMHi)CK6$MuOu#D{Rq4>KcX1NWeQ%oI#vxOgo(13hi}UL)~(<3 zF40>cJ33&ZvIf4omTkDg-FDr8x}<4%X88ifNq z8qw5!u7FO3brpiq=6W%7{gK75ZV*ry-#zy*WHppinm z?&keoA2nj;6d!e0A{|f4)YPGLfG0^?cTfc$m|C4r2#1^Z%N*lojXz?3L%@xoYUQq0 zdS!>+?SvkdPJK>uK6(7ORUVMg$45&EWC{+8pNpz5p8rj`y!P>p~9K`+6N$D{94V!;gm}#@2NH)ttaxFcGaZ z9!iLelskd7M^vn5Fu15&E|CD`Wo)n`-Yflx(uDoOdS6{rApqu=4Su~Oq#HWkb_X?H zpUTsv-sYyK4LrGA-oyl$Chf0J{W&Zo0Fx!(Jsc`OH5Ll3)!iAHRm+gtJ#Z%vxGD|m zSfQcQ*viY~O79A*lN|#PT@RQgZ(4iE#fFQ=cG(M}oc9V{WrrG0@sITtVxK##hHOLO zFR76i9SV}k_!h-O6}l5;(XtjUVKuL^GeKrsG5BYX2aSUW;$4f^h^OlQ?N8-XHX!qC{_gl}o~oWP6wFqWM(#9p$r91fC?kLc;sO^%?H zOaemb&CF75eKT`Kg#RSriu{x)D0m#nJ$Uyon$9+sQCUdhU?)KtGv)W; zC=H>5xk#QzpX>pd{X%g}jV`z*%4_`sHTurrdT=BS+l~i$fH*HBY*UuqGnsRuLL#P7 zSYD+{YDnn*{^=B6`y6=rmm(3Sf1r&0d~qj$$AAPkzX&A?Nh2~s@Z}QtUjCLtMLfov z9AN{KKH;E8Vkfw0W>Rgkj|hj84J4&oCe`QwYv&0On{^Uxd;ith;iz8R;GvDcR+c3( z@}g8Zyb|JVR$1VB%mL6?zO<2@>Z?)_(-o#mCQ1jt;g!A8a`jNf)7! zLz?h6Ildni8O}lx+>6i;6}o+uVr)FJ&TUC9Q>>t&CBA#Cldl8|7GDFeD3CWdjvxKE zS}qLDO-*6KhNqXIn5uL9||8HT7PTD^mXOC%K3+|f+cY9CEA8zN<+E9zCk08dCb z3J#7)%3u?7?(WW2Ko>WWzhA~GY(ElLa19F#H1<^+ST*B9-ep zR8!mi7IovvgGWW+j)M=o(zCd))quKE5`RzEIaEv&0kYKww$J`nS`~L~q*WJG14#-U zm4fogyZ+-1`GTEGRX2rpvo1x1O|iqum|=JnqF|YpmPdbreJ3=0mW8d4y{PbPbEX+X z1RE?}+p&p>UmB3$R~GqWjim1&Q6(9+ZW=Qgay0J0&M$*btxqwjsux!QcVtv?^)mrP ztL{Xdb!02V@)^+n5$H0CKDrgFvdg)W^65fj%Ki)DYhk-_{a*#0{p(rHZGjc@4Y@?J-4GV#bs9z|70?Fy z0x=2;kF}(mnBnRG#)yZP0GzQFptqp=;R)(czk_V{KU6DU)CV0;FKvMvewUqtMGWpR zQFr6I((;|9Mtyxr{i@QUBPx!CmZz$W)T+4^A(Vu!l;?B>f@KgGko^qijr zzLUPFrl8xnb!9tfDt+N>hxD*Jl!+~C{l`?MD|qA2$uolO9@%>_uNv%yF~dESzhrX} zrLEFIEjQB6(&ZBQq;FU!*_QQa`)5=5%p>&SgPMiTTm)2q@SMc|bj;ZJ=C?|EdNJ8; zfdcZ;EZpYs*+y;V{Zf}Rc%P~5Iy}Y&FILt=6rz+$$RT6GzLokJ-F4?Porx`T`^VHR z7&P_SpmF0rfur{GAnqSpqeVF{Lm{I*+;({wz1%PxQ-xFb|;_LoSa{t zFaxQP(82vmt4oh}r7?UYBE-FcImHu+4JSb(2okbw(+FHXP+MoT>-{?dKhF-Pq_tBp z$OJ0|3I)L4^~%|9>1s$&)9!*R4uZxe)wr$XyUE_ZvFa`TPI2Y~vXcSkscL30BJ4Ee zq>>GIR0WC7W9AXj(St$EE#HZ83xIZx1La_Gl;zh;=)nu6dvu|aWR~6oGJi|oGvaJ7 z_6$Go%u-HpI_2*jur4J!cMkWb{NqO&=4#;&CDAL>mM^v0y zc+deI=`i2n4%KA578Nw-r$!yg#m}>>Mx9V}s>et_B9-QKCU#Hp2ecG5Od1|*(nRG1 zdA=>^imjUACsD5{pK6nNcZ-fe7BSHJN)@PuGHjfX@aTWXNr}G%xLglhKCLu8+IP>+ zp7xotQaqua1%r_lkIm(?f`#i3z<4`qp3eJ-Xgn-`_@e8U>Q4n{=ZcTk;{tX`{UkVq zAkDy|rvo@QwQ#;q1?Mg3e0{@xuj+!E7nmz2ofA~O*y2F>z#m-8NPA!6^%Mpy1}`dq zNK2Ev!UY=tM}m7cS=rby(~ZHIIaP8BLBW|dHNs}c{rAqc+U#OlS{kE}5X4ZY_+;&5 z8gz7z0asiqT3K0{U6w7z{iudjJpPCMJEn}Loib$|1AvZG!~xfwWF^9fATeVfkUjuo zCvKa3I1PZ}-e+re{A~`;sP3~bBiMs+S@=yQ*nOb{10FHsxfPTS&`4FDsC4~5NqH6`@?;e z!APuX?h)XFUoFAb{e_UdN_~JWKUVE*hy4cGnF7Op`JxKAFDmUMA?ya6fJyEzHuvN1 z19xpE|7Oqcxbtr#f2Iybn7~EQge5nvutlor&Cgg6p!mdoz~Mf{e=WxEk^9;GPIa1g zgYo(PzvceD+@C~2`uBj>6uVY_3zr0}#~4v5E>mvV2Z3&YO7=Xf-wC<`JdwWxSV=bM ziPxN*Zme;(+0J0eOZzSEkud`r`N4D2xU(YDjMc2I#$Prtv>8`}f2DQ-S{v z^!@oGmi@TEm-oNE2-FN+_{)KU4}ARq*3aM8{PUrI569oj`DX#tKl>2;+b2-S)qF05 z?SFgGpRUC}ti>O{^$hDc`^u31#XHV+_D_TJ_xt>}b-Q;RXikY(gX*Qf!_wd0arWJx4$c4g_dkCm04k5*d(Q6&@9zij=iC0QOZ%%W{C_a{ h2T1+@2_{G4-`?#UsQvQQI0yd7OFwyBB>C*+{|C7a3XA{% literal 0 HcmV?d00001 diff --git a/experimental/torch_xla2/docs/how_it_works.md b/experimental/torch_xla2/docs/how_it_works.md new file mode 100644 index 00000000000..e4098ca0096 --- /dev/null +++ b/experimental/torch_xla2/docs/how_it_works.md @@ -0,0 +1,134 @@ +How it works +============ + + +## Tensor subclass and eager mode + +The class `XLATensor2` is a `torch.Tensor` subclass +that overrides `__torch_dispatch__`. + +It roughly looks like this (with some details removed): + +The complete class impl is at [tensor.py](../torch_xla2/tensor.py). + +```python +class XLATensor2(torch.Tensor): + + @staticmethod + def __new__(cls, elem): + return torch.Tensor._make_wrapper_subclass( + cls, + shape, + dtype=dtype, + device='meta', + requires_grad=False, + ) + + def __init__(self, elem: jax.Array): + super().__init__() + self._elem = elem + + __torch_function__ = torch._C._disabled_torch_function_impl + + @classmethod + def __torch_dispatch__(cls, func, types, args=(), kwargs=None): + # here assumes ALL tensors in args / kwargs are + # instances of XLATensor2 + args, kwargs = unwrap((args, kwargs)) + jax_func = some_registry[func] + res = jax_func(*args, **kwargs) + return wrap(res) + +def wrap(tree): + # wrap jax.Array with XLATensor2 + return pytree.tree_map_only( + jax.Array, XLATensor2, tree) + +def unwrap(tree): + # get jax.Array out ofXLATensor2 + return pytree.tree_map_only( + XLATensor2, lambda x: x._elem, tree) +``` + +In other words, assuming that we have a function +that takes `jax.Array` as input and returns `jax.Array` +but otherwise implement the same semantics +as a `ATen` op; then, using this tensor we would +be able to route the call to this jax function. + +[_ops.py](../torch_xla2/_ops.py) files defines some of those ops. + +Let's take `aten::add` as example: + +```python +@op(torch.ops.aten.add) +def _aten_add(x, y, *, alpha=1): + """if isinstance(x, jnp.ndarray) and isinstance(y, jnp.ndarray): + + assert x.dtype == y.dtype, (x.dtype, y.dtype) + """ + return x + y * alpha +``` + +The `@op` decorator just puts this function into `some_registry` dictionary. + +`_aten_add` has same signature as `torch.ops.aten.add` but takes `jax.Array` as +input. + +![](dispatch.png) + + +## fx Interpreter and dynamo mode + +Now, assuming we have this `some_registry` dict with key core Aten ops, +and value the equivalent python Jax functions. We can also build a `fx.Interpreter` +subclass that executes the jax function given a `fx.GraphModule`. + + +```python +class JaxInterpreter(torch.fx.Interpreter): + + def call_function(self, target, args: Tuple, kwargs: Dict) -> Any: + if not isinstance(target, + (torch._ops.OpOverloadPacket, torch._ops.OpOverload)): + return super().call_function(target, args, kwargs) + + op = some_registry[target] + return op.func(*args, **kwargs) +``` + +There is no wrapping and unwrapping needed because `args` and `kwargs` are +already `jax.Array`'s. + +Using this interpreter we can build a dynamo backend: + +```python +def backend(fxgraph): + + def tojit(*args, *kwargs): + return JaxInterpreter(fxgraph).run(*args, **kwargs) + jitted = jax.jit(to_jit) + + def f(*torchtensor): + jaxarrays = unwrap(torchtensors) + res = jitted(jax_array) + return wrap(res) + + return f +``` + +The inner function `tojit` is a function that takes and returns +`jax.Array`'s. So it's suitable to be jitted with `jax.jit`. + +`f` is returned callable that takes `XLATensor2`; so can interop with +other torch codes. + +## nn.Modules and state management + +See [README.md](../README.md) for using `torch.func.functional_call` to +make `nn.Module`s interact well with `jax.jit`. + +See [Examples](../examples/README.md) for training using torch's optimizers or jax's +optimizers. + +[def]: dispatch.png \ No newline at end of file From 310f08dcf0d694c45f9c6052f2f0e341deca42c6 Mon Sep 17 00:00:00 2001 From: iefgnoix Date: Wed, 17 Apr 2024 16:37:14 -0700 Subject: [PATCH 02/53] Fix profiling in benchmark script (#6934) --- benchmarks/experiment_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py index 443a4067ac1..da952f3c079 100644 --- a/benchmarks/experiment_runner.py +++ b/benchmarks/experiment_runner.py @@ -319,8 +319,8 @@ def loop(pytorch_profile=None, iter_fn=None): self._args.profile_cuda_cpu or \ self._args.profile_cuda_cpu_individual_ops enable_xla_profiling = self._args.profile_xla - assert not (enable_pytorch_profiling and enable_pytorch_profiling - ), "More than one profiling path enabled." + assert not (enable_pytorch_profiling and + enable_xla_profiling), "More than one profiling path enabled." if enable_xla_profiling: logdir = self._get_results_dir_path(experiment_config, model_config, From 9f2b82dce7924fea25eb546e2cf8a3b75acaf901 Mon Sep 17 00:00:00 2001 From: Jiewen Tan Date: Thu, 18 Apr 2024 00:08:53 -0700 Subject: [PATCH 03/53] [Pallas] Integrate FlashAttention with SPMD (#6935) Summary: This pull request integrating FlashAttention with SPMD. The way it works is to create a manual sharding region for the kernel which means we wraps all the inputs with enable_manual_sharding and all the outputs with disable_manual_sharding. Added a new test file because the original test file is not SPMD aware. Test Plan: PJRT_DEVICE=TPU python test/test_pallas_spmd.py --- test/test_pallas.py | 5 +- test/test_pallas_spmd.py | 110 ++++++++++++++++++++++++ torch_xla/distributed/spmd/__init__.py | 2 + torch_xla/experimental/custom_kernel.py | 66 ++++++++++++-- 4 files changed, 176 insertions(+), 7 deletions(-) create mode 100644 test/test_pallas_spmd.py diff --git a/test/test_pallas.py b/test/test_pallas.py index 2902b5e21ba..f8480782094 100644 --- a/test/test_pallas.py +++ b/test/test_pallas.py @@ -417,6 +417,7 @@ def test__flash_attention_bwd_dkv(self): @unittest.skipIf(xr.device_type() != 'TPU' or tpu.version() < 3, "This test only works on TPUv3+.") def test_flash_attention_backward(self): + jax.config.update('jax_default_matmul_precision', jax.lax.Precision.HIGHEST) from torch_xla.experimental.custom_kernel import flash_attention torch.manual_seed(42) @@ -449,9 +450,9 @@ def test_flash_attention_backward(self): loss.backward() xm.mark_step() - mse = torch.nn.MSELoss() for i in [(q, q_grad), (k, k_grad), (v, v_grad)]: - self.assertTrue(mse(i[0].grad.cpu(), i[1].cpu()) < 1e-4) + self.assertTrue(torch.allclose(i[0].grad.cpu(), i[1].cpu(), atol=1e-05)) + jax.config.update('jax_default_matmul_precision', jax.lax.Precision.DEFAULT) if __name__ == '__main__': diff --git a/test/test_pallas_spmd.py b/test/test_pallas_spmd.py new file mode 100644 index 00000000000..33434594191 --- /dev/null +++ b/test/test_pallas_spmd.py @@ -0,0 +1,110 @@ +import logging +import os +import unittest + +import torch +from torch import nn as nn + +import torch_xla +import torch_xla.core.xla_model as xm +import torch_xla.distributed.spmd as xs +from torch_xla import runtime as xr +from torch_xla._internal import tpu + +if xr.device_type() == 'TPU': + from torch_xla.experimental.custom_kernel import flash_attention + from torch_xla.experimental.custom_kernel import jax_import_guard + jax_import_guard() + import jax + import jax.numpy as jnp + from jax.experimental import pallas as pl + + +class PallasTest(unittest.TestCase): + + def _attention(self, q, k, v): + attn_weight = q @ k.transpose(-2, -1) + attn_weight = nn.functional.softmax(attn_weight, dim=-1) + attn_output = attn_weight @ v + return attn_output + + @unittest.skipIf(xr.device_type() != 'TPU' or tpu.version() < 3, + "This test only works on TPUv3+.") + def test_flash_attention_spmd_data_parallel(self): + jax.config.update('jax_default_matmul_precision', jax.lax.Precision.HIGHEST) + n_devices = xr.global_runtime_device_count() + xs.set_global_mesh(xs.Mesh(range(n_devices), (n_devices, 1, 1, 1))) + + q = torch.randn(4, 2, 128, 4).to("xla") + k = torch.randn(4, 2, 128, 4).to("xla") + v = torch.randn(4, 2, 128, 4).to("xla") + + o = flash_attention(q, k, v, partition_spec=range(n_devices)) + self.assertEqual( + torch_xla._XLAC._get_xla_sharding_spec(o), + f"{{devices=[{n_devices},1,1,1]0,1,2,3}}") + + expected_o = self._attention(q, k, v) + self.assertTrue(torch.allclose(o.cpu(), expected_o.cpu(), atol=1e-05)) + jax.config.update('jax_default_matmul_precision', jax.lax.Precision.DEFAULT) + + @unittest.skipIf(xr.device_type() != 'TPU' or tpu.version() < 3, + "This test only works on TPUv3+.") + def test_flash_attention_backward_spmd_data_parallel(self): + jax.config.update('jax_default_matmul_precision', jax.lax.Precision.HIGHEST) + n_devices = xr.global_runtime_device_count() + xs.set_global_mesh(xs.Mesh(range(n_devices), (n_devices, 1, 1, 1))) + + torch.manual_seed(42) + q = torch.randn(4, 2, 128, 8, requires_grad=True).to("xla") + k = torch.randn(4, 2, 128, 8, requires_grad=True).to("xla") + v = torch.randn(4, 2, 128, 8, requires_grad=True).to("xla") + q.retain_grad() + k.retain_grad() + v.retain_grad() + + o = flash_attention(q, k, v, partition_spec=range(n_devices)) + loss = o.sum() + loss.backward() + xm.mark_step() + + q_grad = q.grad + k_grad = k.grad + v_grad = v.grad + self.assertEqual( + torch_xla._XLAC._get_xla_sharding_spec(q_grad), + f"{{devices=[{n_devices},1,1,1]0,1,2,3}}") + self.assertEqual( + torch_xla._XLAC._get_xla_sharding_spec(k_grad), + f"{{devices=[{n_devices},1,1,1]0,1,2,3}}") + self.assertEqual( + torch_xla._XLAC._get_xla_sharding_spec(v_grad), + f"{{devices=[{n_devices},1,1,1]0,1,2,3}}") + + torch.manual_seed(42) + q = torch.randn(4, 2, 128, 8, requires_grad=True).to("xla") + k = torch.randn(4, 2, 128, 8, requires_grad=True).to("xla") + v = torch.randn(4, 2, 128, 8, requires_grad=True).to("xla") + q.retain_grad() + k.retain_grad() + v.retain_grad() + + o = self._attention(q, k, v) + loss = o.sum() + loss.backward() + xm.mark_step() + + for i in [(q, q_grad), (k, k_grad), (v, v_grad)]: + self.assertTrue(torch.allclose(i[0].grad.cpu(), i[1].cpu(), atol=1e-05)) + jax.config.update('jax_default_matmul_precision', jax.lax.Precision.DEFAULT) + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.INFO) + torch.set_default_dtype(torch.float32) + torch.manual_seed(42) + torch_xla._XLAC._xla_set_use_full_mat_mul_precision( + use_full_mat_mul_precision=True) + xr.use_spmd() + test = unittest.main() + sys.exit(0 if test.result.wasSuccessful() else 1) diff --git a/torch_xla/distributed/spmd/__init__.py b/torch_xla/distributed/spmd/__init__.py index abfe1c62ba0..099f25e9fb5 100644 --- a/torch_xla/distributed/spmd/__init__.py +++ b/torch_xla/distributed/spmd/__init__.py @@ -27,4 +27,6 @@ "_mark_manual_sharding", "enable_manual_sharding", "disable_manual_sharding", + "enable_manual_sharding", + "disable_manual_sharding", ] diff --git a/torch_xla/experimental/custom_kernel.py b/torch_xla/experimental/custom_kernel.py index ff4b335058b..bb4ce0c4e23 100644 --- a/torch_xla/experimental/custom_kernel.py +++ b/torch_xla/experimental/custom_kernel.py @@ -5,6 +5,7 @@ import torch import torch_xla import torch_xla.core.xla_model as xm +import torch_xla.distributed.spmd as xs from typing import List, Callable from torch.library import impl @@ -168,15 +169,29 @@ class FlashAttention(torch.autograd.Function): } @staticmethod - def forward(ctx, q, k, v, causal=False): + def forward(ctx, q, k, v, causal=False, partition_spec=None, mesh=None): # Import JAX within the function such that we don't need to call the jax_import_guard() # in the global scope which could cause problems for xmp.spawn. jax_import_guard() from jax.experimental.pallas.ops.tpu.flash_attention import _flash_attention_impl ctx.causal = causal + ctx.partition_spec = partition_spec + ctx.mesh = mesh + ctx.full_shape = None save_residuals = q.requires_grad or k.requires_grad or v.requires_grad + # SPMD integration. + # mark_sharding is in-placed, and therefore save the full q, k, v for the backward. + full_q = q + full_k = k + full_v = v + if partition_spec is not None: + ctx.full_shape = q.shape + q = xs.enable_manual_sharding(q, partition_spec, mesh=mesh).global_tensor + k = xs.enable_manual_sharding(k, partition_spec, mesh=mesh).global_tensor + v = xs.enable_manual_sharding(v, partition_spec, mesh=mesh).global_tensor + # It returns the shape and type of o, l, m. def shape_dtype(q, *arg): if not save_residuals: @@ -208,11 +223,24 @@ def shape_dtype(q, *arg): False, static_argnums=range(5, 13)) if not save_residuals: + # SPMD integration + if partition_spec is not None: + o = xs.disable_manual_sharding( + o, partition_spec, ctx.full_shape, mesh=mesh).global_tensor return o o, *aux = o l, m = (v[..., 0] for v in aux[-2:]) - ctx.save_for_backward(q, k, v, o, l, m) + # SPMD integration + if partition_spec is not None: + o = xs.disable_manual_sharding( + o, partition_spec, ctx.full_shape, mesh=mesh).global_tensor + l = xs.disable_manual_sharding( + l, partition_spec[0:3], ctx.full_shape[0:3], mesh=mesh).global_tensor + m = xs.disable_manual_sharding( + m, partition_spec[0:3], ctx.full_shape[0:3], mesh=mesh).global_tensor + + ctx.save_for_backward(full_q, full_k, full_v, o, l, m) return o @staticmethod @@ -221,6 +249,9 @@ def backward(ctx, grad_output): q, k, v, o, l, m = ctx.saved_tensors causal = ctx.causal + partition_spec = ctx.partition_spec + mesh = ctx.mesh + full_shape = ctx.full_shape grad_q = grad_k = grad_v = None grad_i = torch.sum( @@ -234,6 +265,20 @@ def backward(ctx, grad_output): expanded_grad_i = grad_i.unsqueeze(-1).expand( [-1 for _ in grad_i.shape] + [FlashAttention.MIN_BLOCK_SIZE]) + # SPMD integration + if partition_spec is not None: + q = xs.enable_manual_sharding(q, partition_spec, mesh=mesh).global_tensor + k = xs.enable_manual_sharding(k, partition_spec, mesh=mesh).global_tensor + v = xs.enable_manual_sharding(v, partition_spec, mesh=mesh).global_tensor + expanded_l = xs.enable_manual_sharding( + expanded_l, partition_spec, mesh=mesh).global_tensor + expanded_m = xs.enable_manual_sharding( + expanded_m, partition_spec, mesh=mesh).global_tensor + grad_output = xs.enable_manual_sharding( + grad_output, partition_spec, mesh=mesh).global_tensor + expanded_grad_i = xs.enable_manual_sharding( + expanded_grad_i, partition_spec, mesh=mesh).global_tensor + if ctx.needs_input_grad[0]: payload, _ = trace_pallas( _flash_attention_bwd_dq, @@ -303,7 +348,16 @@ def backward(ctx, grad_output): if ctx.needs_input_grad[2]: grad_v = grads[1] - return grad_q, grad_k, grad_v, None + # SPMD integration + if partition_spec is not None: + grad_q = xs.disable_manual_sharding( + grad_q, partition_spec, full_shape, mesh=mesh).global_tensor + grad_k = xs.disable_manual_sharding( + grad_k, partition_spec, full_shape, mesh=mesh).global_tensor + grad_v = xs.disable_manual_sharding( + grad_v, partition_spec, full_shape, mesh=mesh).global_tensor + + return grad_q, grad_k, grad_v, None, None, None def flash_attention( @@ -311,8 +365,10 @@ def flash_attention( k, # [batch_size, num_heads, kv_seq_len, d_model] v, # [batch_size, num_heads, kv_seq_len, d_model] causal=False, -): - return FlashAttention.apply(q, k, v, causal) + *, + partition_spec=None, + mesh=None): + return FlashAttention.apply(q, k, v, causal, partition_spec, mesh) XLA_LIB.define( From b2556d6c3176cb738517701a1d417d7cbc0f27ee Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Fri, 19 Apr 2024 09:09:52 -0700 Subject: [PATCH 04/53] Use TPU build for CPU and GPU Python tests (#6921) --- .github/workflows/_build_plugin.yml | 2 +- .github/workflows/_build_torch_xla.yml | 4 +- .../workflows/{_test.yml => _test_cpp.yml} | 15 -- .github/workflows/_test_python.yml | 176 ++++++++++++++++++ .github/workflows/build_and_test.yml | 69 ++++--- benchmarks/requirements.txt | 3 + infra/ansible/config/env.yaml | 6 +- infra/ansible/config/vars.yaml | 4 + .../cuda/torch_xla_cuda_plugin/__init__.py | 3 + test/benchmarks/run_tests.sh | 12 +- test/dynamo/test_dynamo.py | 2 +- test/spmd/test_dynamo_spmd.py | 2 + test/stablehlo/test_exports.py | 2 +- torch_xla/csrc/runtime/pjrt_registry.cc | 3 +- torch_xla/experimental/plugins.py | 4 +- 15 files changed, 258 insertions(+), 49 deletions(-) rename .github/workflows/{_test.yml => _test_cpp.yml} (90%) create mode 100644 .github/workflows/_test_python.yml create mode 100644 benchmarks/requirements.txt diff --git a/.github/workflows/_build_plugin.yml b/.github/workflows/_build_plugin.yml index 5f773971430..e30b88aed1e 100644 --- a/.github/workflows/_build_plugin.yml +++ b/.github/workflows/_build_plugin.yml @@ -39,7 +39,7 @@ jobs: shell: bash run: | cd pytorch/xla/infra/ansible - ansible-playbook playbook.yaml -vvv -e "stage=build_plugin arch=amd64 accelerator=cuda src_root=${GITHUB_WORKSPACE}" --skip-tags=fetch_srcs,install_deps + ansible-playbook playbook.yaml -vvv -e "stage=build_plugin arch=amd64 accelerator=cuda cuda_compute_capabilities=5.2,7.5 src_root=${GITHUB_WORKSPACE} cache_suffix=-ci" --skip-tags=fetch_srcs,install_deps - name: Upload wheel uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/_build_torch_xla.yml b/.github/workflows/_build_torch_xla.yml index 969fb3b5dc9..3e85b7c4c98 100644 --- a/.github/workflows/_build_torch_xla.yml +++ b/.github/workflows/_build_torch_xla.yml @@ -26,6 +26,7 @@ jobs: GOOGLE_APPLICATION_CREDENTIALS: /tmp/default_credentials.json BAZEL_JOBS: 16 BAZEL_REMOTE_CACHE: 1 + # BUILD_CPP_TESTS: 1 steps: - name: Setup gcloud shell: bash @@ -36,6 +37,7 @@ jobs: with: repository: pytorch/pytorch path: pytorch + submodules: recursive # TODO: correct pin - name: Checkout PyTorch/XLA Repo uses: actions/checkout@v4 @@ -45,7 +47,7 @@ jobs: shell: bash run: | cd pytorch/xla/infra/ansible - ansible-playbook playbook.yaml -e "stage=build arch=amd64 accelerator=tpu src_root=${GITHUB_WORKSPACE} bundle_libtpu=0" --skip-tags=fetch_srcs,install_deps + ansible-playbook playbook.yaml -vvv -e "stage=build arch=amd64 accelerator=tpu src_root=${GITHUB_WORKSPACE} bundle_libtpu=0 cache_suffix=-ci" --skip-tags=fetch_srcs,install_deps - name: Upload wheel uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/_test.yml b/.github/workflows/_test_cpp.yml similarity index 90% rename from .github/workflows/_test.yml rename to .github/workflows/_test_cpp.yml index 0f9e96e31e5..d0056d34963 100644 --- a/.github/workflows/_test.yml +++ b/.github/workflows/_test_cpp.yml @@ -45,17 +45,8 @@ jobs: matrix: include: # Use readable strings as they define the workflow titles. - - run_benchmark_tests: 'benchmark_tests' - run_cpp_tests1: 'cpp_tests1' - run_cpp_tests2: 'cpp_tests2' - - run_python_tests: 'python_tests' - run_xla_op_tests1: 'xla_op1' - - run_python_tests: 'python_tests' - run_xla_op_tests2: 'xla_op2' - - run_python_tests: 'python_tests' - run_xla_op_tests3: 'xla_op3' - - run_python_tests: 'python_tests' - run_torch_mp_op_tests: 'torch_mp_op' timeout-minutes: ${{ inputs.timeout-minutes }} env: DOCKER_IMAGE: ${{ inputs.docker-image }} @@ -64,14 +55,8 @@ jobs: USE_COVERAGE: ${{ inputs.collect-coverage && '1' || '0' }} XLA_SKIP_TORCH_OP_TESTS: ${{ inputs.disable-pjrt }} XLA_SKIP_MP_OP_TESTS: ${{ inputs.disable-pjrt }} - RUN_BENCHMARK_TESTS: ${{ matrix.run_benchmark_tests }} RUN_CPP_TESTS1: ${{ matrix.run_cpp_tests1 }} RUN_CPP_TESTS2: ${{ matrix.run_cpp_tests2 }} - RUN_PYTHON_TESTS: ${{ matrix.run_python_tests }} - RUN_XLA_OP_TESTS1: ${{ matrix.run_xla_op_tests1 }} - RUN_XLA_OP_TESTS2: ${{ matrix.run_xla_op_tests2 }} - RUN_XLA_OP_TESTS3: ${{ matrix.run_xla_op_tests3 }} - RUN_TORCH_MP_OP_TESTS: ${{ matrix.run_torch_mp_op_tests }} steps: - name: Setup Linux uses: pytorch/test-infra/.github/actions/setup-linux@main diff --git a/.github/workflows/_test_python.yml b/.github/workflows/_test_python.yml new file mode 100644 index 00000000000..bd260cdb2d1 --- /dev/null +++ b/.github/workflows/_test_python.yml @@ -0,0 +1,176 @@ +name: xla-test +on: + workflow_call: + inputs: + dev-image: + required: true + type: string + description: Base image for builds + runner: + required: false + type: string + description: Runner type for the test + default: linux.12xlarge + collect-coverage: + required: false + type: boolean + description: Set to true to collect coverage information + default: false + timeout-minutes: + required: false + type: number + default: 270 + description: | + Set the maximum (in minutes) how long the workflow should take to finish + timeout-minutes: + install-cuda-plugin: + required: false + type: boolean + default: false + description: Whether to install CUDA plugin package + + secrets: + gcloud-service-key: + required: true + description: Secret to access Bazel build cache +jobs: + test: + runs-on: ${{ inputs.runner }} + container: + image: ${{ inputs.dev-image }} + options: "${{ inputs.install-cuda-plugin && '--gpus all' || '' }} --shm-size 16g" + strategy: + fail-fast: false + matrix: + include: + # Use readable strings as they define the workflow titles. + - run_benchmark_tests: 'benchmark_tests' + - run_python_tests: 'python_tests' + run_xla_op_tests1: 'xla_op1' + - run_python_tests: 'python_tests' + run_xla_op_tests2: 'xla_op2' + - run_python_tests: 'python_tests' + run_xla_op_tests3: 'xla_op3' + - run_python_tests: 'python_tests' + run_torch_mp_op_tests: 'torch_mp_op' + timeout-minutes: ${{ inputs.timeout-minutes }} + env: + GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }} + GOOGLE_APPLICATION_CREDENTIALS: /tmp/default_credentials.json + USE_COVERAGE: ${{ inputs.collect-coverage && '1' || '0' }} + RUN_BENCHMARK_TESTS: ${{ matrix.run_benchmark_tests }} + RUN_PYTHON_TESTS: ${{ matrix.run_python_tests }} + RUN_XLA_OP_TESTS1: ${{ matrix.run_xla_op_tests1 }} + RUN_XLA_OP_TESTS2: ${{ matrix.run_xla_op_tests2 }} + RUN_XLA_OP_TESTS3: ${{ matrix.run_xla_op_tests3 }} + RUN_TORCH_MP_OP_TESTS: ${{ matrix.run_torch_mp_op_tests }} + BAZEL_JOBS: 16 + BAZEL_REMOTE_CACHE: 1 + steps: + - name: Setup gcloud + shell: bash + run: | + echo "${GCLOUD_SERVICE_KEY}" > $GOOGLE_APPLICATION_CREDENTIALS + - name: Fetch wheels + uses: actions/download-artifact@v4 + with: + name: torch-xla-wheels + path: /tmp/wheels/ + - name: Fetch CUDA plugin + uses: actions/download-artifact@v4 + with: + name: cuda-plugin + path: /tmp/wheels/ + if: ${{ inputs.install-cuda-plugin }} + - name: Setup CUDA environment + shell: bash + run: | + # TODO: Make PJRT_DEVICE=CPU work with XLA_REGISTER_INSTALLED_PLUGINS=1 + echo "XLA_REGISTER_INSTALLED_PLUGINS=1" >> $GITHUB_ENV + + echo "PATH=$PATH:/usr/local/cuda-12.1/bin" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12.1/lib64" >> $GITHUB_ENV + if: ${{ inputs.install-cuda-plugin }} + - name: Check GPU + run: nvidia-smi + if: ${{ inputs.install-cuda-plugin }} + - name: Install wheels + shell: bash + run: | + pip install /tmp/wheels/*.whl + # TODO: Add these in setup.py + pip install fsspec + pip install rich + - name: Record PyTorch commit + run: echo "PYTORCH_COMMIT=$(python -c 'import torch_xla.version; print(torch_xla.version.__torch_gitrev__)')" >> $GITHUB_ENV + - name: Checkout PyTorch Repo + uses: actions/checkout@v4 + with: + repository: pytorch/pytorch + path: pytorch + ref: ${{ env.PYTORCH_COMMIT }} + - name: Checkout PyTorch/XLA Repo + uses: actions/checkout@v4 + with: + path: pytorch/xla + - name: Extra CI deps + shell: bash + run: | + set -x + + pip install expecttest unittest-xml-reporting + + if [[ ! -z "$RUN_BENCHMARK_TESTS" ]]; then + pip install -r pytorch/xla/benchmarks/requirements.txt + fi + - name: Test + shell: bash + run: | + source pytorch/xla/.circleci/common.sh + + run_torch_xla_tests pytorch/ pytorch/xla/ $USE_COVERAGE + - name: Upload coverage results + if: ${{ inputs.collect-coverage }} + shell: bash + env: + CIRCLE_WORKFLOW_ID: ${{ github.run_id }} + CIRCLE_BUILD_NUM: ${{ github.run_number }} + BENCHMARK_TEST_NAME: ${{ env.RUN_BENCHMARK_TESTS }} + PYTHON_TEST_NAME: ${{ env.RUN_PYTHON_TESTS }}${{ env.RUN_XLA_OP_TESTS1 }}${{ env.RUN_XLA_OP_TESTS2 }}${{ env.RUN_XLA_OP_TESTS3 }}${{ env.RUN_TORCH_MP_OP_TESTS }} + CPP_TEST_NAME: ${{ env.RUN_CPP_TESTS1 }}${{ env.RUN_CPP_TESTS2 }} + run: | + # TODO(yeounoh) collect coverage report as needed. + if [ -n "${BENCHMARK_TEST_NAME}" ]; then + exit 0 + fi + docker cp "${pid}":/home/jenkins/htmlcov "${GITHUB_WORKSPACE}" + if [ -n "${GPU_FLAG:-}" ]; then + if [ -n "${PYTHON_TEST_NAME}" ]; then + gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_python_coverage_${PYTHON_TEST_NAME}.out + gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_python_coverage_${PYTHON_TEST_NAME}.out + fi + if [ -n "${CPP_TEST_NAME}" ]; then + gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_cpp_coverage_${CPP_TEST_NAME}.out + gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_cpp_coverage_${CPP_TEST_NAME}.out + fi + else + if [ -n "${PYTHON_TEST_NAME}" ]; then + gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_python_coverage_${PYTHON_TEST_NAME}.out + gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_python_coverage_${PYTHON_TEST_NAME}.out + fi + + if [ -n "${CPP_TEST_NAME}" ]; then + gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_cpp_coverage_${CPP_TEST_NAME}.out + gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_cpp_coverage_${CPP_TEST_NAME}.out + fi + + if [ "${CPP_TEST_NAME}" == "cpp_tests1" ]; then + ABS_METADATA='{"host": "github", "project": "pytorchxla", "trace_type": "LCOV", "commit_id": '\"${GITHUB_SHA}\"', "ref": "HEAD", "source": "https://github.com/pytorch/xla", "owner": "cloud-tpu-pt-dev", "bug_component": "587012"}' + echo $ABS_METADATA > abs_metadata.json + gsutil cp abs_metadata.json gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/metadata.json + + INC_METADATA='{"host": "github", "project": "pytorchxla", "trace_type": "LCOV", "patchset_num": 1, "change_id": '${CIRCLE_BUILD_NUM}', "owner": "cloud-tpu-pt-dev", "bug_component": "587012"}' + echo $INC_METADATA > inc_metadata.json + gsutil cp inc_metadata.json gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/metadata.json + fi + fi diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 38203f57580..e5738b5a6af 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -19,6 +19,7 @@ concurrency: cancel-in-progress: true jobs: + # Old CI workflow build: name: "Build PyTorch/XLA (GPU)" uses: ./.github/workflows/_build.yml @@ -29,6 +30,40 @@ jobs: secrets: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} + test-cpp-cpu: + name: "CPU C++ tests" + uses: ./.github/workflows/_test_cpp.yml + needs: build + with: + docker-image: ${{ needs.build.outputs.docker-image }} + timeout-minutes: 120 + collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics + secrets: + gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} + + test-cpp-cuda: + name: "GPU C++ tests" + uses: ./.github/workflows/_test_cpp.yml + needs: build + with: + docker-image: ${{ needs.build.outputs.docker-image }} + runner: linux.8xlarge.nvidia.gpu + timeout-minutes: 300 + collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics + secrets: + gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} + + push-docs: + name: "Build & publish docs" + if: github.event_name == 'push' && (github.event.ref == 'refs/heads/master' || startsWith(github.event.ref, 'refs/tags/r')) + uses: ./.github/workflows/_docs.yml + needs: build + with: + docker-image: ${{ needs.build.outputs.docker-image }} + secrets: + torchxla-bot-token: ${{ secrets.TORCH_XLA_BOT_TOKEN }} + + # New CI workflow build-torch-xla: name: "Build PyTorch/XLA (TPU)" uses: ./.github/workflows/_build_torch_xla.yml @@ -41,30 +76,31 @@ jobs: name: "Build XLA CUDA plugin" uses: ./.github/workflows/_build_plugin.yml with: - dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.8_cuda_12.1 + dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1 secrets: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - test-cpu: - name: "CPU tests" - uses: ./.github/workflows/_test.yml - needs: build + test-python-cpu: + name: "CPU Python tests" + uses: ./.github/workflows/_test_python.yml + needs: build-torch-xla with: - docker-image: ${{ needs.build.outputs.docker-image }} + dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_tpuvm timeout-minutes: 120 collect-coverage: false secrets: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - test-cuda: - name: "GPU tests" - uses: ./.github/workflows/_test.yml - needs: build + test-python-cuda: + name: "GPU Python tests" + uses: ./.github/workflows/_test_python.yml + needs: [build-torch-xla, build-cuda-plugin] with: - docker-image: ${{ needs.build.outputs.docker-image }} + dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1 runner: linux.8xlarge.nvidia.gpu timeout-minutes: 300 - collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics + collect-coverage: false + install-cuda-plugin: true secrets: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} @@ -75,12 +111,3 @@ jobs: # Only run this for HEAD and releases if: github.event_name == 'push' - push-docs: - name: "Build & publish docs" - if: github.event_name == 'push' && (github.event.ref == 'refs/heads/master' || startsWith(github.event.ref, 'refs/tags/r')) - uses: ./.github/workflows/_docs.yml - needs: build - with: - docker-image: ${{ needs.build.outputs.docker-image }} - secrets: - torchxla-bot-token: ${{ secrets.TORCH_XLA_BOT_TOKEN }} diff --git a/benchmarks/requirements.txt b/benchmarks/requirements.txt new file mode 100644 index 00000000000..14e2549fec3 --- /dev/null +++ b/benchmarks/requirements.txt @@ -0,0 +1,3 @@ +tabulate +scipy +pandas diff --git a/infra/ansible/config/env.yaml b/infra/ansible/config/env.yaml index 15e8dc79d6c..d324729ce11 100644 --- a/infra/ansible/config/env.yaml +++ b/infra/ansible/config/env.yaml @@ -14,7 +14,7 @@ release_env: TPUVM_MODE: 1 cuda: - TF_CUDA_COMPUTE_CAPABILITIES: 7.0,7.5,8.0,9.0 + TF_CUDA_COMPUTE_CAPABILITIES: "{{ cuda_compute_capabilities }}" XLA_CUDA: 1 # Variables that will be passed to shell environment only for building PyTorch and XLA libs. @@ -31,7 +31,7 @@ build_env: PYTORCH_BUILD_VERSION: "{{ package_version }}" XLA_SANDBOX_BUILD: 1 BAZEL_REMOTE_CACHE: 1 - SILO_NAME: "cache-silo-{{ arch }}-{{ accelerator }}-{{ clang_version }}" + SILO_NAME: "cache-silo-{{ arch }}-{{ accelerator }}-{{ clang_version }}{{ cache_suffix }}" _GLIBCXX_USE_CXX11_ABI: 0 GIT_VERSIONED_XLA_BUILD: "{{ nightly_release }}" @@ -41,7 +41,7 @@ build_env: aarch64: cuda: - TF_CUDA_COMPUTE_CAPABILITIES: 7.0,7.5,8.0,9.0 + TF_CUDA_COMPUTE_CAPABILITIES: "{{ cuda_compute_capabilities }}" XLA_CUDA: 1 tpu: diff --git a/infra/ansible/config/vars.yaml b/infra/ansible/config/vars.yaml index 2347d066e84..c1ca7a93d27 100644 --- a/infra/ansible/config/vars.yaml +++ b/infra/ansible/config/vars.yaml @@ -1,6 +1,8 @@ # Used for fetching cuda from the right repo, see apt.yaml. cuda_repo: debian11 cuda_version: "11.8" +# Determines supported GPUs. See https://developer.nvidia.com/cuda-gpus +cuda_compute_capabilities: 7.0,7.5,8.0,9.0 # Used for fetching clang from the right repo, see apt.yaml. llvm_debian_repo: bullseye clang_version: 17 @@ -10,3 +12,5 @@ package_version: 2.4.0 nightly_release: false # Whether to preinstall libtpu in the PyTorch/XLA wheel. Ignored for GPU build. bundle_libtpu: 1 +# Suffix for bazel remote cache key +cache_suffix: "" diff --git a/plugins/cuda/torch_xla_cuda_plugin/__init__.py b/plugins/cuda/torch_xla_cuda_plugin/__init__.py index 9321d26a1a6..e6863ff711a 100644 --- a/plugins/cuda/torch_xla_cuda_plugin/__init__.py +++ b/plugins/cuda/torch_xla_cuda_plugin/__init__.py @@ -27,6 +27,9 @@ def physical_chip_count(self) -> int: # TODO: default to actual device count return xu.getenv_as('GPU_NUM_DEVICES', int, 1) + def configure_single_process(self): + pass + def client_create_options(self) -> dict: local_process_rank, global_process_rank = self._get_process_rank() local_world_size, global_world_size = self._get_world_size() diff --git a/test/benchmarks/run_tests.sh b/test/benchmarks/run_tests.sh index 7d404a7ee7f..3832b21ed22 100755 --- a/test/benchmarks/run_tests.sh +++ b/test/benchmarks/run_tests.sh @@ -39,10 +39,14 @@ function run_make_tests { } function run_python_tests { - python3 "$CDIR/test_experiment_runner.py" - python3 "$CDIR/test_benchmark_experiment.py" - python3 "$CDIR/test_benchmark_model.py" - python3 "$CDIR/test_result_analyzer.py" + # HACK: don't confuse local `torch_xla` folder with installed package + # Python 3.11 has the permanent fix: https://stackoverflow.com/a/73636559 + pushd $CDIR + python3 "test_experiment_runner.py" + python3 "test_benchmark_experiment.py" + python3 "test_benchmark_model.py" + python3 "test_result_analyzer.py" + popd } function run_tests { diff --git a/test/dynamo/test_dynamo.py b/test/dynamo/test_dynamo.py index e7ac2681d5a..01d77e1f78f 100644 --- a/test/dynamo/test_dynamo.py +++ b/test/dynamo/test_dynamo.py @@ -152,7 +152,7 @@ def test_simple_model(self): # Tests that the dynamo bridge automatically moves tensors to XLA device, # then back to the original device. - @unittest.skipIf(xr.device_type() != "CUDA", + @unittest.skipIf(xr.device_type() != "CUDA" or not torch.cuda.is_available(), f"GPU tests should only run on GPU devices.") def test_simple_model_automoves_tensors(self): x = torch.tensor(100.0).to(device="cuda") diff --git a/test/spmd/test_dynamo_spmd.py b/test/spmd/test_dynamo_spmd.py index 0595f502da0..d1f6cdc3dce 100644 --- a/test/spmd/test_dynamo_spmd.py +++ b/test/spmd/test_dynamo_spmd.py @@ -205,6 +205,8 @@ def test_dynamo_spmd_mark_sharding_outside_of_compile(self): dynamo_res = dynamo_linear(xla_x) self.assertEqual(met.metric_data('CompileTime')[0], compile_count) + # https://github.com/pytorch/xla/pull/6921#issuecomment-2062106737 + @unittest.skip("Failing in CI") def test_mark_sharding_inside_compile(self): met.clear_counters() device = xm.xla_device() diff --git a/test/stablehlo/test_exports.py b/test/stablehlo/test_exports.py index a08b65d1ffe..6208ae1ca52 100644 --- a/test/stablehlo/test_exports.py +++ b/test/stablehlo/test_exports.py @@ -45,7 +45,7 @@ def test_interpolate(self): exported = torch.export.export(model, arg) shlo = exported_program_to_stablehlo(exported) ans2 = shlo(*arg).cpu().to(torch.float32) - self.assertTrue(torch.allclose(ans, ans2, atol=1e-5)) + torch.testing.assert_close(ans, ans2, rtol=1e-5, atol=1e-4) def test_constant(self): diff --git a/torch_xla/csrc/runtime/pjrt_registry.cc b/torch_xla/csrc/runtime/pjrt_registry.cc index 648076757be..99e23f4b555 100644 --- a/torch_xla/csrc/runtime/pjrt_registry.cc +++ b/torch_xla/csrc/runtime/pjrt_registry.cc @@ -60,7 +60,8 @@ InitializePjRt(const std::string& device_type) { std::unique_ptr client; std::unique_ptr coordinator; - if (sys_util::GetEnvBool(env::kEnvPjrtDynamicPlugins, false)) { + if (sys_util::GetEnvBool(env::kEnvPjrtDynamicPlugins, false) && + device_type != "CPU") { std::shared_ptr plugin = GetPjRtPlugin(device_type); if (plugin) { TF_VLOG(1) << "Initializing client for PjRt plugin " << device_type; diff --git a/torch_xla/experimental/plugins.py b/torch_xla/experimental/plugins.py index 77c2a572de3..620dff7e45c 100644 --- a/torch_xla/experimental/plugins.py +++ b/torch_xla/experimental/plugins.py @@ -76,7 +76,9 @@ def use_dynamic_plugins(): def using_dynamic_plugins(): - return xu.getenv_as(xenv.PJRT_DYNAMIC_PLUGINS, bool, False) + # TODO: dummy plugin for CPU + return xu.getenv_as(xenv.PJRT_DYNAMIC_PLUGINS, bool, + False) and xr.device_type() != "CPU" def default() -> DevicePlugin: From 0417d4d5fe4b28a9fb3a18fa8ee563335d8e7508 Mon Sep 17 00:00:00 2001 From: Manfei <41607353+ManfeiBai@users.noreply.github.com> Date: Fri, 19 Apr 2024 14:10:23 -0700 Subject: [PATCH 05/53] [Fori_loop|While_loop] Create fori_loop.md (#6942) Add doc for fori_loop/while_loop and add simple user guide for simple test case --- docs/fori_loop.md | 114 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 docs/fori_loop.md diff --git a/docs/fori_loop.md b/docs/fori_loop.md new file mode 100644 index 00000000000..0c9f85af399 --- /dev/null +++ b/docs/fori_loop.md @@ -0,0 +1,114 @@ +# Fori_loop +`fori_loop` is a replacement of pure python for loop, PyTorch/XLA would enable `torch_xla.experimental.fori_loop` to keep loop computation graph as rolled during compilation +like [`jax.lax.fori_loop`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.fori_loop.html), not like currently repeat computations by enumerating all execution steps +of each iteration. `fori_loop` might help memory utilization and might help faster compilation. + +User could use `fori_loop` like this: +```python +from torch_xla.experimental.fori_loop import fori_loop +res = fori_loop(upper, lower, /*user defined*/body_fun, init) +``` + +current fori_loop only support simple test like [link](https://github.com/pytorch/xla/blob/ManfeiBai-patch-81/test/test_fori_loop_with_while_loop_simple_add_dispatch_in_torch.py), and user could try [simple user guide](https://github.com/pytorch/xla/blob/ManfeiBai-patch-81/docs/fori_loop.md#simple-example-with-fori_loop) with `fori_loop` on TPU too. + +For detailed implementation: +- for situation that loop range is dynamic, [`fori_loop`](https://github.com/pytorch/xla/blob/ManfeiBai-patch-81/docs/fori_loop.md#fori_loop) is implemented with [`while_loop`](https://github.com/pytorch/xla/blob/ManfeiBai-patch-81/docs/fori_loop.md#while_loop), +like [`jax.lax.while_loop`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.while_loop.html), PyTorch/XLA would support `while_loop` with the +native PyTorch and the XLA backend: XLA::While. Due to `while_loop` didn't support autograd, so it would be used for inference only. + +- for situation that loop range is not dynamic, [`fori_loop`](https://github.com/pytorch/xla/blob/ManfeiBai-patch-81/docs/fori_loop.md#fori_loop) is implemented with [`scan`](https://github.com/pytorch/xla/blob/ManfeiBai-patch-81/docs/fori_loop.md#wipscan), +like [`jax.lax.scan`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.scan.html), PyTorch/XLA would enable `scan` using XLA::While operator. +This implementation would be very similar like `while_loop`. `scan` support autograd, and it could be used in both training and inference. + +# while_loop +`while_loop` is a replacement of pure python while loop, PyTorch has supported `while_loop` in +[code](https://github.com/pytorch/pytorch/blob/ca6a0e1348ba7dcade1833d983b1b4ca12a5c1e1/torch/_higher_order_ops/while_loop.py#L69). +PyTorch/XLA want to support `while_loop` with the native PyTorch and the XLA backend: XLA::While. + +User could use `while_loop` like this: +```python +import torch_xla.experimental.fori_loop +from torch._higher_order_ops.while_loop import while_loop +res = while_loop(/*user-defined*/cond_fn, /*user-defined*/body_fn, /*tuple or list*/init) +``` +current while_loop only support simple test like [link](https://github.com/pytorch/xla/blob/ManfeiBai-patch-81/test/test_fori_loop_with_while_loop_simple_add_dispatch_in_torch.py), and user could try [simple user guide](https://github.com/pytorch/xla/blob/ManfeiBai-patch-81/docs/fori_loop.md#simple-example-with-while_loop) with `while_loop` on TPU too. + + +# [WIP]scan +like [`jax.lax.scan`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.scan.html), PyTorch/XLA would enable `scan` for training and inference since it support autograd. +`scan` is WIP. + + +# Simple user guide +User could try these three simple test case to better compare difference between `pure python for loop` and `fori_loop` and `while_loop`, these three test case have similar logic: cumulative plus 1 for ten times: + +### simple example with pure python for loop +```bash +# python +>>> import torch +>>> init = torch.tensor([0], dtype=torch.int32) +>>> one_value = torch.ones(1, dtype=torch.int32) +>>> +>>> for i in range(10): +... init = init + one_value +... +>>> init +tensor([10], dtype=torch.int32) +``` + +### simple example with `while_loop`: +```bash +# PJRT_DEVICE=TPU python +>>> import torch +>>> import torch_xla +>>> import torch_xla.experimental.fori_loop +>>> from torch_xla.experimental.fori_loop import fori_loop +>>> from torch._higher_order_ops.while_loop import while_loop +>>> import torch_xla.core.xla_model as xm +>>> import torch_xla.core.xla_builder as xb +>>> +>>> device = xm.xla_device() +>>> +>>> def cond_fn(init, limit_value): +... return limit_value[0] >= init[0] +... +>>> def body_fn(init, limit_value): +... one_value = torch.ones(1, dtype=torch.int32, device=device) +... return (torch.add(init, one_value), limit_value.clone()) +... +>>> init = torch.tensor([0], dtype=torch.int32, device=device) +>>> limit_value = torch.tensor([10], dtype=torch.int32, device=device) +>>> res_, limit_value_ = while_loop(cond_fn, body_fn, (init, limit_value)) +>>> res_ +FunctionalTensor(lvl=0, value=\ +tensor([11], device='xla:0', dtype=torch.int32)) +``` + +### simple example with `fori_loop`: +```bash +# PJRT_DEVICE=TPU python +>>> import torch +>>> import torch_xla +>>> import torch_xla.experimental.fori_loop +>>> from torch_xla.experimental.fori_loop import fori_loop +>>> from torch._higher_order_ops.while_loop import while_loop +>>> import torch_xla.core.xla_model as xm +>>> import torch_xla.core.xla_builder as xb +>>> +>>> device = xm.xla_device() +>>> +>>> lower = torch.tensor([2], dtype=torch.int32, device=device) +>>> upper = torch.tensor([52], dtype=torch.int32, device=device) +>>> plus_value = torch.tensor([1], dtype=torch.int32, device=device) +>>> init_val = torch.tensor([1], dtype=torch.int32, device=device) +>>> +>>> def body_fun(*argus): +... plus_value, init_val = argus +... return plus_value, torch.add(plus_value, init_val) +... +>>> _, _, _, res_ = fori_loop(upper, lower, body_fun, plus_value, init_val) +>>> res_ +tensor([51], device='xla:0', dtype=torch.int32) +``` + +For more example and detailed user guide, please read [this test file](https://github.com/pytorch/xla/blob/master/test/test_fori_loop_with_while_loop_simple_add_dispatch_in_torch.py). PyTorch/XLA would include `while_loop` support in 2.3 for simple test case, complex test case and support for `fori_loop` and `scan` would be added after 2.3 From 2ec770629685830c2959c64cb0a70228be9c63fc Mon Sep 17 00:00:00 2001 From: Yeounoh Chung Date: Fri, 19 Apr 2024 15:14:29 -0700 Subject: [PATCH 06/53] Update XLA pin, 04/19/2024 (#6944) --- WORKSPACE | 4 +-- setup.py | 2 +- torch_xla/csrc/runtime/BUILD | 2 +- .../csrc/runtime/ifrt_computation_client.cc | 28 +++++++++++-------- .../csrc/runtime/pjrt_computation_client.cc | 8 +++--- torch_xla/csrc/runtime/xla_coordinator.h | 2 +- 6 files changed, 25 insertions(+), 21 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index e4d8a73fdc0..9c6963dae65 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -50,9 +50,9 @@ http_archive( "//openxla_patches:gpu_race_condition.diff", "//openxla_patches:f16_abi_clang.diff", ], - strip_prefix = "xla-1acf05ef0d41181caaf0cd691aa9d453ffc41a73", + strip_prefix = "xla-54ca388f9ad9e8bbcb0ef823752d6b47a99d0b5f", urls = [ - "https://github.com/openxla/xla/archive/1acf05ef0d41181caaf0cd691aa9d453ffc41a73.tar.gz", + "https://github.com/openxla/xla/archive/54ca388f9ad9e8bbcb0ef823752d6b47a99d0b5f.tar.gz", ], ) diff --git a/setup.py b/setup.py index d45b0b7fc3c..dbe47007aff 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ base_dir = os.path.dirname(os.path.abspath(__file__)) -_date = '20240409' +_date = '20240418' _libtpu_version = f'0.1.dev{_date}' _libtpu_storage_path = f'https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/wheels/libtpu-nightly/libtpu_nightly-{_libtpu_version}-py3-none-any.whl' _jax_version = f'0.4.27.dev{_date}' diff --git a/torch_xla/csrc/runtime/BUILD b/torch_xla/csrc/runtime/BUILD index 6f746972355..582b69d8a50 100644 --- a/torch_xla/csrc/runtime/BUILD +++ b/torch_xla/csrc/runtime/BUILD @@ -237,7 +237,7 @@ cc_library( deps = [ ":debug_macros", ":sys_util", - "@tsl//tsl/distributed_runtime/preemption:preemption_sync_manager", + "@xla//xla/tsl/distributed_runtime/preemption:preemption_sync_manager", "@xla//xla/pjrt/distributed", ], ) diff --git a/torch_xla/csrc/runtime/ifrt_computation_client.cc b/torch_xla/csrc/runtime/ifrt_computation_client.cc index 029f9268342..20ee9b0bfa6 100644 --- a/torch_xla/csrc/runtime/ifrt_computation_client.cc +++ b/torch_xla/csrc/runtime/ifrt_computation_client.cc @@ -58,18 +58,6 @@ torch::lazy::hash_t hash_comp_env( xla::ifrt::Client* client, std::vector& ordered_devices) { torch::lazy::hash_t hash = hash::HashXlaEnvVars(); - auto topology_desc = client->GetTopologyForDevices(ordered_devices); - if (topology_desc.ok()) { - // Some backends support a topology description which provides a better - // view of the specific compilation environment. - auto serialized = topology_desc.value()->Serialize(); - if (serialized.ok()) { - return torch::lazy::HashCombine( - hash, - torch::lazy::DataHash(serialized->data(), serialized->length())); - } - // If serialization fails, fallthrough to the manual approach. - } std::string platform_name(client->platform_name()); std::string platform_version(client->platform_version()); hash = torch::lazy::HashCombine( @@ -78,10 +66,26 @@ torch::lazy::hash_t hash_comp_env( hash = torch::lazy::HashCombine( hash, torch::lazy::StringHash(platform_version.c_str())); // Include global devices in the hash, ensuring order is consistent. + xla::ifrt::DeviceList::Devices ifrt_devices; for (auto& device : ordered_devices) { std::string device_str(device->ToString()); hash = torch::lazy::HashCombine( hash, torch::lazy::StringHash(device_str.c_str())); + ifrt_devices.push_back(device); + } + + xla::ifrt::DeviceList device_list(std::move(ifrt_devices)); + auto topology_desc = client->GetTopologyForDevices(device_list); + if (topology_desc.ok()) { + // Some backends support a topology description which provides a better + // view of the specific compilation environment. + auto serialized = topology_desc.value()->Serialize(); + if (serialized.ok()) { + return torch::lazy::HashCombine( + hash, + torch::lazy::DataHash(serialized->data(), serialized->length())); + } + // If serialization fails, fallthrough to the manual approach. } return hash; } diff --git a/torch_xla/csrc/runtime/pjrt_computation_client.cc b/torch_xla/csrc/runtime/pjrt_computation_client.cc index 188e26f8ac2..a129a476a2e 100644 --- a/torch_xla/csrc/runtime/pjrt_computation_client.cc +++ b/torch_xla/csrc/runtime/pjrt_computation_client.cc @@ -463,7 +463,7 @@ std::vector PjRtComputationClient::TransferFromDevice( metrics::TimedSection timed(TransferFromDeviceMetric()); tsl::profiler::TraceMe activity("PjRtComputationClient::TransferFromDevice", tsl::profiler::TraceMeLevel::kInfo); - std::vector> futures; + std::vector> futures; futures.reserve(handles.size()); std::vector literals; literals.reserve(handles.size()); @@ -679,7 +679,7 @@ PjRtComputationClient::ExecuteComputation( TF_VLOG(5) << "ExecuteComputation acquiring PJRT device lock for " << device << " Done"; - std::optional> returned_future; + std::optional> returned_future; std::vector> results = pjrt_computation.executable ->ExecuteSharded(buffers, pjrt_device, execute_options, @@ -779,8 +779,8 @@ PjRtComputationClient::ExecuteReplicated( TF_VLOG(5) << "ExecuteReplicated acquiring PJRT device lock for " << spmd_device_str << " Done"; - std::optional>> returned_futures = - std::vector>(); + std::optional>> returned_futures = + std::vector>(); std::vector>> results; { tsl::profiler::TraceMe activity( diff --git a/torch_xla/csrc/runtime/xla_coordinator.h b/torch_xla/csrc/runtime/xla_coordinator.h index ae85c79a941..fb2cfaf99f5 100644 --- a/torch_xla/csrc/runtime/xla_coordinator.h +++ b/torch_xla/csrc/runtime/xla_coordinator.h @@ -3,8 +3,8 @@ #include -#include "tsl/distributed_runtime/preemption/preemption_sync_manager.h" #include "xla/pjrt/distributed/distributed.h" +#include "xla/tsl/distributed_runtime/preemption/preemption_sync_manager.h" namespace torch_xla { namespace runtime { From 9ba844a4b2fa0ca30eaeeeaf8aa3b742fffc3bcc Mon Sep 17 00:00:00 2001 From: Yeounoh Chung Date: Fri, 19 Apr 2024 15:21:31 -0700 Subject: [PATCH 07/53] Update jinja and sphinx versions to address the vulnearbility (#6946) --- docs/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 26f491f6c15..411e6642ff7 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,6 +1,6 @@ mistune==0.8.4 -sphinx==2.4.4 +sphinx==5.0.0 docutils==0.16 -Jinja2<3.1 +Jinja2==3.1.3 m2r -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme From b06c9c7700e13b7731a2b2f3b9ddbbfef2d0793c Mon Sep 17 00:00:00 2001 From: Yukio Siraichi Date: Sat, 20 Apr 2024 11:19:56 -0300 Subject: [PATCH 08/53] Make `nms` fallback by default. (#6933) --- test/cpp/run_tests.sh | 2 +- test/run_tests.sh | 2 +- test/test_operations.py | 7 +++++++ test/tpu/run_tests.sh | 4 ++-- torch_xla/csrc/xla_manual_registration.cpp | 14 ++++++++++++++ 5 files changed, 25 insertions(+), 4 deletions(-) diff --git a/test/cpp/run_tests.sh b/test/cpp/run_tests.sh index 74244322840..d6b492dc694 100755 --- a/test/cpp/run_tests.sh +++ b/test/cpp/run_tests.sh @@ -5,7 +5,7 @@ BUILDTYPE="opt" VERB= FILTER= LOGFILE=/tmp/pytorch_cpp_test.log -XLA_EXPERIMENTAL="nonzero:masked_select" +XLA_EXPERIMENTAL="nonzero:masked_select:nms" BAZEL_REMOTE_CACHE="0" BAZEL_VERB="test" diff --git a/test/run_tests.sh b/test/run_tests.sh index 4d4bd530e27..8926318dc38 100755 --- a/test/run_tests.sh +++ b/test/run_tests.sh @@ -104,7 +104,7 @@ function run_xla_hlo_debug { function run_dynamic { echo "Running in DynamicShape mode: $@" - XLA_EXPERIMENTAL="nonzero:masked_select:masked_scatter" run_test "$@" + XLA_EXPERIMENTAL="nonzero:masked_select:masked_scatter:nms" run_test "$@" } function run_eager_debug { diff --git a/test/test_operations.py b/test/test_operations.py index 7fb9f5bc3e3..ff32c268927 100644 --- a/test/test_operations.py +++ b/test/test_operations.py @@ -88,6 +88,12 @@ def onlyOnCUDA(fn): return unittest.skipIf(accelerator != "cuda", "PJRT_DEVICE=CUDA required")(fn) +def onlyIfXLAExperimentalContains(feat): + experimental = os.environ.get("XLA_EXPERIMENTAL", "").split(":") + return unittest.skipIf(feat not in experimental, + f"XLA_EXPERIMENTAL={feat} required") + + def _gen_tensor(*args, **kwargs): return torch.randn(*args, **kwargs) @@ -2454,6 +2460,7 @@ def test_dropout(self): # These tests were extracted and adapted from torchvision. # Source: vision/test/test_ops.py +@onlyIfXLAExperimentalContains("nms") class TestNMS(test_utils.XlaTestCase): def _reference_nms(self, boxes, scores, iou_threshold): diff --git a/test/tpu/run_tests.sh b/test/tpu/run_tests.sh index 413951854d6..dc2f4e96dba 100755 --- a/test/tpu/run_tests.sh +++ b/test/tpu/run_tests.sh @@ -11,8 +11,8 @@ python3 test/spmd/test_xla_distributed_checkpoint.py python3 test/spmd/test_train_spmd_linear_model.py python3 test/spmd/test_xla_spmd_python_api_interaction.py python3 test/spmd/test_xla_auto_sharding.py -XLA_EXPERIMENTAL=nonzero:masked_select python3 test/ds/test_dynamic_shape_models.py -v -XLA_EXPERIMENTAL=nonzero:masked_select python3 test/ds/test_dynamic_shapes.py -v +XLA_EXPERIMENTAL=nonzero:masked_select:nms python3 test/ds/test_dynamic_shape_models.py -v +XLA_EXPERIMENTAL=nonzero:masked_select:nms python3 test/ds/test_dynamic_shapes.py -v python3 test/test_autocast.py python3 test/dynamo/test_dynamo.py python3 test/spmd/test_spmd_debugging.py diff --git a/torch_xla/csrc/xla_manual_registration.cpp b/torch_xla/csrc/xla_manual_registration.cpp index dc7df436ec7..6020ef6bc04 100644 --- a/torch_xla/csrc/xla_manual_registration.cpp +++ b/torch_xla/csrc/xla_manual_registration.cpp @@ -1,7 +1,9 @@ #include #include +#include "torch_xla/csrc/aten_cpu_fallback.h" #include "torch_xla/csrc/aten_xla_bridge.h" +#include "torch_xla/csrc/debug_util.h" #include "torch_xla/csrc/ops/nms.h" #include "torch_xla/csrc/ops/ops.h" #include "torch_xla/csrc/tensor_methods.h" @@ -11,10 +13,22 @@ namespace torch_xla { namespace manual { namespace { +struct NmsOp { + using schema = at::Tensor(const at::Tensor&, const at::Tensor&, double); + using ptr_schema = schema*; + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "torchvision::nms") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") +}; + at::Tensor nms_kernel(const at::Tensor& boxes, const at::Tensor& scores, double iou_threshold) { TORCH_LAZY_FN_COUNTER_TIMED_TRACING("xla::"); + if (!DebugUtil::ExperimentEnabled("nms")) { + return at::native::call_fallback_fn<&xla_cpu_fallback, NmsOp>::call( + boxes, scores, iou_threshold); + } + XLA_CHECK_EQ(boxes.dim(), 2) << "nms(): boxes should be a 2D tensor."; XLA_CHECK_EQ(boxes.size(1), 4) << "nms(): boxes should be a 2D tensor of shape [N, 4]."; From 62a2b11c8ae00bab8740b8b15f88c8596305d2e1 Mon Sep 17 00:00:00 2001 From: Brian Hirsh Date: Mon, 22 Apr 2024 14:06:33 -0400 Subject: [PATCH 09/53] revert expand test with dynamo (#6950) --- test/dynamo/test_dynamo.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/test/dynamo/test_dynamo.py b/test/dynamo/test_dynamo.py index 01d77e1f78f..c3dfe6bbed1 100644 --- a/test/dynamo/test_dynamo.py +++ b/test/dynamo/test_dynamo.py @@ -671,21 +671,6 @@ def foo(x): self.assertEqual(expected.dtype, actual.dtype) self.assertEqual(expected.device, actual.device) - def test_return_expand(self): - - def foo(x): - return x.expand(2, -1) - - optfoo = torch.compile(backend="openxla")(foo) - - t = torch.arange(10) - Xt = t.to(xm.xla_device()) - - expected = foo(t) - actual = optfoo(Xt) - - self.assertEqual(expected, actual.cpu()) - if __name__ == '__main__': test = unittest.main() From 46919a478fa6d4ba50ddbe9aa6e74343d1d650e0 Mon Sep 17 00:00:00 2001 From: Bhavya Bahl Date: Mon, 22 Apr 2024 15:28:19 -0700 Subject: [PATCH 10/53] Lower embedding bag forward only (#6951) --- codegen/xla_native_functions.yaml | 1 + test/cpp/test_aten_xla_tensor_5.cpp | 21 +++ test/pytorch_test_base.py | 1 + torch_xla/csrc/aten_xla_type.cpp | 33 +++++ torch_xla/csrc/ops/embedding_bag.cpp | 192 +++++++++++++++++++++++++++ torch_xla/csrc/ops/embedding_bag.h | 31 +++++ torch_xla/csrc/tensor_methods.cpp | 15 +++ torch_xla/csrc/tensor_methods.h | 5 + 8 files changed, 299 insertions(+) create mode 100644 torch_xla/csrc/ops/embedding_bag.cpp create mode 100644 torch_xla/csrc/ops/embedding_bag.h diff --git a/codegen/xla_native_functions.yaml b/codegen/xla_native_functions.yaml index 199025dc7e1..de5500a0c5b 100644 --- a/codegen/xla_native_functions.yaml +++ b/codegen/xla_native_functions.yaml @@ -361,6 +361,7 @@ supported: - zero_ - _native_batch_norm_legit - _native_batch_norm_legit.no_stats + - _embedding_bag_forward_only # Note: [functionalization and CompositeExplicitAutograd] # Below are all operators that are "composite" in core, # but require us to explicitly re-enable functionalization in order to use them. diff --git a/test/cpp/test_aten_xla_tensor_5.cpp b/test/cpp/test_aten_xla_tensor_5.cpp index 4070779529f..07e4c2dae86 100644 --- a/test/cpp/test_aten_xla_tensor_5.cpp +++ b/test/cpp/test_aten_xla_tensor_5.cpp @@ -267,6 +267,27 @@ TEST_F(AtenXlaTensorTest, TestEmbedding) { }); } +TEST_F(AtenXlaTensorTest, TestEmbeddingBag) { + torch::Tensor weight = + torch::rand({32, 4}, torch::TensorOptions(torch::kFloat)); + torch::Tensor indices = + torch::randint(0, 31, {10}, torch::TensorOptions(torch::kLong)); + torch::Tensor offsets = torch::arange(0, 10, 3); + auto out = torch::embedding_bag(weight, indices, offsets); + torch::Tensor result = std::get<0>(out); + ForEachDevice([&](const torch::Device& device) { + torch::Tensor xla_weight = CopyToDevice(weight, device); + torch::Tensor xla_indices = CopyToDevice(indices, device); + torch::Tensor xla_offsets = CopyToDevice(offsets, device); + auto xla_out = torch::embedding_bag(xla_weight, xla_indices, xla_offsets); + torch::Tensor xla_result = std::get<0>(xla_out); + AllClose(result, xla_result); + ExpectCounterNotChanged("aten::.*", cpp_test::GetIgnoredCounters()); + ExpectCounterChanged("xla::_embedding_bag_forward_only", + cpp_test::GetIgnoredCounters()); + }); +} + TEST_F(AtenXlaTensorTest, TestOneHot) { int num_classes = 5; torch::Tensor input = diff --git a/test/pytorch_test_base.py b/test/pytorch_test_base.py index 88ad0f6bc3d..3a6dcdd96c6 100644 --- a/test/pytorch_test_base.py +++ b/test/pytorch_test_base.py @@ -70,6 +70,7 @@ 'test_pdist_norm_backward_xla', # pdist_single 'test_pdist_norm_forward_xla', # pdist_single 'test_nuclear_norm_axes_small_brute_force', + 'test_nondeterministic_alert_EmbeddingBag_max_xla', # FIXME: implement embedding_bag_backward 'test_mul_intertype_scalar', 'test_masked_select_discontiguous', # FIXME: wrong result 'test_memory_format_type', diff --git a/torch_xla/csrc/aten_xla_type.cpp b/torch_xla/csrc/aten_xla_type.cpp index a7ae1c47964..56a69ca1e05 100644 --- a/torch_xla/csrc/aten_xla_type.cpp +++ b/torch_xla/csrc/aten_xla_type.cpp @@ -1290,6 +1290,38 @@ at::Tensor XLANativeFunctions::embedding_dense_backward( num_weights, padding_idx, scale_grad_by_freq)); } +std::tuple +XLANativeFunctions::_embedding_bag_forward_only( + const at::Tensor& weight, const at::Tensor& indices, + const at::Tensor& offsets, bool scale_grad_by_freq, int64_t mode, + bool sparse, const c10::optional& per_sample_weights, + bool include_last_offset, int64_t padding_idx) { + TORCH_LAZY_FN_COUNTER_TIMED_TRACING("xla::"); + if (mode == 1 || scale_grad_by_freq || sparse || padding_idx != -1) { + return at::native::call_fallback_fn< + &xla_cpu_fallback, + ATEN_OP(_embedding_bag_forward_only)>::call(weight, indices, offsets, + scale_grad_by_freq, mode, + sparse, per_sample_weights, + include_last_offset, + padding_idx); + } + auto indices_tensor = bridge::GetXlaTensor(indices); + auto sample_weights = + per_sample_weights.has_value() && per_sample_weights.value().defined() + ? bridge::GetXlaTensor(per_sample_weights.value()) + : tensor_methods::full_like(indices_tensor, 1.0, + *torch_xla::bridge::GetXlaDevice(weight), + at::ScalarType::Float); + auto result = tensor_methods::embedding_bag( + bridge::GetXlaTensor(weight), indices_tensor, + bridge::GetXlaTensor(offsets), mode, sample_weights, include_last_offset); + return std::make_tuple(bridge::AtenFromXlaTensor(std::get<0>(result)), + bridge::AtenFromXlaTensor(std::get<1>(result)), + bridge::AtenFromXlaTensor(std::get<2>(result)), + bridge::AtenFromXlaTensor(std::get<3>(result))); +} + at::Tensor XLANativeFunctions::empty_symint( at::SymIntArrayRef sym_size, c10::optional dtype, c10::optional layout, c10::optional device, @@ -3709,6 +3741,7 @@ at::Tensor XLANativeFunctions::embedding_symint(const at::Tensor& weight, scale_grad_by_freq, sparse); } + // TODO: We need to make use of the TPU embedding core here eventually. TORCH_LAZY_FN_COUNTER_TIMED_TRACING("xla::"); return bridge::AtenFromXlaTensor(tensor_methods::embedding( bridge::GetXlaTensor(weight), bridge::GetXlaTensor(indices))); diff --git a/torch_xla/csrc/ops/embedding_bag.cpp b/torch_xla/csrc/ops/embedding_bag.cpp new file mode 100644 index 00000000000..d2bb034a005 --- /dev/null +++ b/torch_xla/csrc/ops/embedding_bag.cpp @@ -0,0 +1,192 @@ +#include "torch_xla/csrc/ops/embedding_bag.h" + +#include "torch_xla/csrc/helpers.h" +#include "torch_xla/csrc/lowering_context.h" +#include "torch_xla/csrc/ops/infer_output_shape.h" +#include "torch_xla/csrc/ops/xla_ops.h" +#include "torch_xla/csrc/shape_helper.h" +#include "torch_xla/csrc/xla_lower_util.h" +#include "tsl/platform/stacktrace.h" +#include "xla/client/lib/constants.h" +#include "xla/client/lib/loops.h" +#include "xla/client/lib/slicing.h" +#include "xla/shape_util.h" + +namespace torch_xla { +namespace { +const int MODE_SUM = 0; +const int MODE_MEAN = 1; +const int MODE_MAX = 2; +std::vector BuildEmbeddingBag(xla::XlaOp weight, xla::XlaOp indices, + xla::XlaOp offsets, + xla::XlaOp per_sample_weights, + bool include_last_offset, int mode) { + xla::Shape offset_shape = ShapeHelper::ShapeOfXlaOp(offsets); + int64_t n = offset_shape.dimensions(0); + xla::Shape weight_shape = ShapeHelper::ShapeOfXlaOp(weight); + int64_t weight_dim = weight_shape.dimensions(1); + xla::Shape indices_shape = ShapeHelper::ShapeOfXlaOp(indices); + int64_t num_embeddings = indices_shape.dimensions(0); + XLA_CHECK(indices_shape.rank() == 1 || indices_shape.rank() == 2) + << "input has to be a 1D or 2D Tensor, but got Tensor of dimension " + << indices_shape.rank(); + if (indices_shape.rank() == 1) { + XLA_CHECK(offset_shape.rank() == 1) + << "offsets has to be a 1D Tensor, but got Tensor of dimension " + << offset_shape.rank(); + } + XLA_CHECK(weight_shape.rank() == 2) + << "weight has to be a 2D Tensor, but got Tensor of dimension " + << weight_shape.rank(); + + xla::XlaOp output2 = xla::ZerosLike(indices); + xla::XlaOp output3 = xla::ZerosLike(offsets); + std::vector sizes = {n, weight_dim}; + xla::XlaOp output4 = + xla::Zeros(offsets.builder(), + xla::ShapeUtil::MakeShape(offset_shape.element_type(), sizes)); + + xla::XlaOp embeddings = xla::TorchIndexSelect(weight, indices, 0); + xla::XlaOp embeddings_weighted = xla::Mul( + embeddings, xla::ConvertElementType( + xla::BroadcastInDim(per_sample_weights, + {num_embeddings, weight_dim}, {0}), + weight_shape.element_type())); + + std::vector shape_elements = { + xla::ShapeUtil::MakeShape(offset_shape.element_type(), {}), + xla::ShapeUtil::MakeShape(offset_shape.element_type(), {}), + xla::ShapeUtil::MakeShape(weight_shape.element_type(), + {num_embeddings, weight_dim}), + xla::ShapeUtil::MakeShape(weight_shape.element_type(), {1, weight_dim})}; + xla::Shape result_shape = xla::ShapeUtil::MakeTupleShape(shape_elements); + + xla::XlaComputation condition; + { + xla::XlaBuilder builder("condition"); + auto prev = xla::Parameter(&builder, 0, result_shape, "prev"); + auto index = xla::GetTupleElement(prev, 0); + auto final_value = xla::GetTupleElement(prev, 1); + xla::Lt(index, final_value); + condition = builder.Build().value(); + } + + xla::XlaComputation body; + { + xla::XlaBuilder builder("body"); + auto prev = xla::Parameter(&builder, 0, result_shape, "prev"); + auto index = xla::GetTupleElement(prev, 0); + auto emb = xla::GetTupleElement(prev, 2); + auto w = xla::GetTupleElement(prev, 3); + + xla::XlaOp slice = xla::DynamicSlice( + emb, + {index, xla::ConvertElementType(xla::ConstantR0(&builder, 0), + offset_shape.element_type())}, + {1, weight_dim}); + xla::XlaOp result = + mode == MODE_SUM ? xla::Add(w, slice) : xla::Max(w, slice); + + xla::Tuple(&builder, + { + xla::Add(index, xla::ConvertElementType( + xla::ConstantR0(&builder, 1), + offset_shape.element_type())), + xla::GetTupleElement(prev, 1), + xla::GetTupleElement(prev, 2), + result, + }); + body = builder.Build().value(); + } + + xla::Array initial_vector({1, weight_dim}, 0.f); + std::vector results; + for (int64_t i = 0; i < n; i++) { + xla::XlaOp start = xla::DynamicSlice( + offsets, {xla::ConstantR0(offsets.builder(), i)}, {1}); + if (i == n - 1 && include_last_offset) continue; + xla::XlaOp end = + i == n - 1 && !include_last_offset + ? xla::ConvertElementType(xla::ConstantR1( + offsets.builder(), 1, num_embeddings), + offset_shape.element_type()) + : xla::DynamicSlice( + offsets, {xla::ConstantR0(offsets.builder(), i + 1)}, + {1}); + // Create a While node with computations for the condition and the body. + auto init_tuple = xla::Tuple( + offsets.builder(), + {xla::Reshape(start, {0}, {}), xla::Reshape(end, {0}, {}), + embeddings_weighted, + xla::ConvertElementType( + xla::ConstantFromArray(offsets.builder(), initial_vector), + weight_shape.element_type())}); + auto result = xla::While(condition, body, init_tuple); + results.push_back(xla::GetTupleElement(result, 3)); + }; + xla::XlaOp output1 = xla::ConcatInDim(offsets.builder(), results, 0); + return {output1, output2, output3, output4}; +} + +xla::Shape NodeOutputShapes(const torch::lazy::Value& weight, + const torch::lazy::Value& indices, + const torch::lazy::Value& offsets, + const torch::lazy::Value& per_sample_weights, + bool include_last_offset, bool mode) { + auto lower_for_shapes_fn = + [&](absl::Span operands) -> xla::XlaOp { + return xla::Tuple( + operands[0].builder(), + BuildEmbeddingBag(operands[0], operands[1], operands[2], operands[3], + include_last_offset, mode)); + }; + + std::vector input_shapes = { + GetXlaShape(weight), GetXlaShape(indices), GetXlaShape(offsets), + GetXlaShape(per_sample_weights)}; + + return InferOutputShape(absl::MakeSpan(input_shapes), lower_for_shapes_fn); +} +} // namespace + +std::string EmbeddingBag::ToString() const { + std::stringstream ss; + ss << XlaNode::ToString(); + return ss.str(); +} + +EmbeddingBag::EmbeddingBag(const torch::lazy::Value& weight, + const torch::lazy::Value& indices, + const torch::lazy::Value& offsets, int64_t mode, + const torch::lazy::Value& per_sample_weights, + bool include_last_offset) + : XlaNode( + torch::lazy::OpKind(at::aten::embedding_bag), + {weight, indices, offsets, per_sample_weights}, + [&]() { + return NodeOutputShapes(weight, indices, offsets, + per_sample_weights, include_last_offset, + mode); + }, + /*num_outputs=*/4, torch::lazy::MHash(mode, include_last_offset)), + mode_(mode), + include_last_offset_(include_last_offset) {} + +torch::lazy::NodePtr EmbeddingBag::Clone(torch::lazy::OpList operands) const { + return torch::lazy::MakeNode(operands.at(0), operands.at(1), + operands.at(2), mode_, + operands.at(3), false); +} + +XlaOpVector EmbeddingBag::Lower(LoweringContext* loctx) const { + xla::XlaOp weight = loctx->GetOutputOp(operand(0)); + xla::XlaOp indices = loctx->GetOutputOp(operand(1)); + xla::XlaOp offsets = loctx->GetOutputOp(operand(2)); + xla::XlaOp per_sample_weights = loctx->GetOutputOp(operand(3)); + std::vector ops = + BuildEmbeddingBag(weight, indices, offsets, per_sample_weights, + include_last_offset_, mode_); + return ReturnOps(absl::MakeSpan(ops), loctx); +} + +} // namespace torch_xla \ No newline at end of file diff --git a/torch_xla/csrc/ops/embedding_bag.h b/torch_xla/csrc/ops/embedding_bag.h new file mode 100644 index 00000000000..4d9b0a6eecb --- /dev/null +++ b/torch_xla/csrc/ops/embedding_bag.h @@ -0,0 +1,31 @@ +#ifndef XLA_TORCH_XLA_CSRC_OPS_EMBEDDING_BAG_H_ +#define XLA_TORCH_XLA_CSRC_OPS_EMBEDDING_BAG_H_ + +#include + +#include "torch_xla/csrc/ir.h" + +namespace torch_xla { + +class EmbeddingBag : public XlaNode { + public: + EmbeddingBag(const torch::lazy::Value& weight, + const torch::lazy::Value& indices, + const torch::lazy::Value& offsets, int64_t mode, + const torch::lazy::Value& per_sample_weights, + bool include_last_offset); + + std::string ToString() const override; + + torch::lazy::NodePtr Clone(torch::lazy::OpList operands) const override; + + XlaOpVector Lower(LoweringContext* loctx) const override; + + private: + int64_t mode_; + bool include_last_offset_; +}; + +} // namespace torch_xla + +#endif // XLA_TORCH_XLA_CSRC_OPS_EMBEDDING_BAG_H_ \ No newline at end of file diff --git a/torch_xla/csrc/tensor_methods.cpp b/torch_xla/csrc/tensor_methods.cpp index f27dc786fb5..fbb240f31d3 100644 --- a/torch_xla/csrc/tensor_methods.cpp +++ b/torch_xla/csrc/tensor_methods.cpp @@ -48,6 +48,7 @@ #include "torch_xla/csrc/ops/dynamic_view.h" #include "torch_xla/csrc/ops/einsum.h" #include "torch_xla/csrc/ops/einsum_backward.h" +#include "torch_xla/csrc/ops/embedding_bag.h" #include "torch_xla/csrc/ops/expand.h" #include "torch_xla/csrc/ops/expand_symint.h" #include "torch_xla/csrc/ops/exponential.h" @@ -1292,6 +1293,20 @@ XLATensorPtr embedding(const XLATensorPtr& weight, return tensor_ops::Embedding(weight, indices); } +std::tuple +embedding_bag(const XLATensorPtr& weight, const XLATensorPtr& indices, + const XLATensorPtr& offsets, int64_t mode, + const XLATensorPtr& per_sample_weights, + bool include_last_offset) { + torch::lazy::NodePtr node = torch::lazy::MakeNode( + weight->GetIrValue(), indices->GetIrValue(), offsets->GetIrValue(), mode, + per_sample_weights->GetIrValue(), include_last_offset); + return std::make_tuple(weight->CreateFrom(torch::lazy::Value(node, 0)), + weight->CreateFrom(torch::lazy::Value(node, 1)), + weight->CreateFrom(torch::lazy::Value(node, 2)), + weight->CreateFrom(torch::lazy::Value(node, 3))); +} + XLATensorPtr exp(const XLATensorPtr& input) { return input->CreateFrom(Exp(input->GetIrValue())); } diff --git a/torch_xla/csrc/tensor_methods.h b/torch_xla/csrc/tensor_methods.h index f27465fd67d..6a7005a5f0f 100644 --- a/torch_xla/csrc/tensor_methods.h +++ b/torch_xla/csrc/tensor_methods.h @@ -381,6 +381,11 @@ XLATensorPtr embedding_dense_backward(const XLATensorPtr& grad_output, int64_t num_weights, int64_t padding_idx, bool scale_grad_by_freq); +std::tuple +embedding_bag(const XLATensorPtr& weight, const XLATensorPtr& indices, + const XLATensorPtr& offsets, int64_t mode, + const XLATensorPtr& per_sample_weights, bool include_last_offset); + XLATensorPtr embedding(const XLATensorPtr& weight, const XLATensorPtr& indices); XLATensorPtr eq(const XLATensorPtr& input, const at::Scalar& other); From 6fd448dcf34b53ca4d3f406c0caa7c1fff522b65 Mon Sep 17 00:00:00 2001 From: DeWitt Clinton Date: Tue, 23 Apr 2024 17:35:41 -0700 Subject: [PATCH 11/53] Cleanup the code example in the torch_xla2 README. (#6939) --- experimental/torch_xla2/README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/experimental/torch_xla2/README.md b/experimental/torch_xla2/README.md index f30be7ff1da..0dccde701d6 100644 --- a/experimental/torch_xla2/README.md +++ b/experimental/torch_xla2/README.md @@ -80,9 +80,10 @@ Now let's execute a model under torch_xla2. We'll start with a simple 2-layer mo it can be in theory any instance of `torch.nn.Module`. ```python +import torch +import torch.nn as nn +import torch.nn.functional as F -import torch_xla2 -from torch import nn class MyModel(nn.Module): def __init__(self): @@ -101,8 +102,8 @@ class MyModel(nn.Module): m = MyModel() # Execute this model using torch -inputs = (torch.randn(3, 3, 28, 28), ) -print(m(*inputs)) +inputs = torch.randn(3, 3, 28, 28) +print(m(inputs)) ``` This model `m` contains 2 parts: the weights that is stored inside of the model @@ -114,6 +115,7 @@ to `XLA` devices. This can be accomplished with `torch_xla2.tensor.move_to_devic We need move both the weights and the input to xla devices: ```python +import torch_xla2 from torch.utils import _pytree as pytree from torch_xla2.tensor import move_to_device @@ -121,7 +123,7 @@ inputs = move_to_device(inputs) new_state_dict = pytree.tree_map_only(torch.Tensor, move_to_device, m.state_dict()) m.load_state_dict(new_state_dict, assign=True) -res = m(*inputs) +res = m(inputs) print(type(res)) # outputs XLATensor2 ``` From 89efd17830aa4e81c9394f5a6224e2dbda8330c6 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Wed, 24 Apr 2024 08:34:33 -0700 Subject: [PATCH 12/53] [torch_xla2] Simplify developer setup steps (#6905) Co-authored-by: qihqi --- .github/workflows/torch_xla2.yml | 6 +- experimental/torch_xla2/README.md | 156 +++--------------- experimental/torch_xla2/dev-requirements.txt | 10 +- experimental/torch_xla2/pyproject.toml | 29 ++-- experimental/torch_xla2/test-requirements.txt | 5 + experimental/torch_xla2/test_requirements.txt | 5 - 6 files changed, 48 insertions(+), 163 deletions(-) create mode 100644 experimental/torch_xla2/test-requirements.txt delete mode 100644 experimental/torch_xla2/test_requirements.txt diff --git a/.github/workflows/torch_xla2.yml b/.github/workflows/torch_xla2.yml index 7c5a88bf430..441addad422 100644 --- a/.github/workflows/torch_xla2.yml +++ b/.github/workflows/torch_xla2.yml @@ -34,10 +34,8 @@ jobs: shell: bash working-directory: experimental/torch_xla2 run: | - pip install pytest absl-py jax[cpu] flatbuffers tensorflow - pip install torch --index-url https://download.pytorch.org/whl/cpu - pip install -r test_requirements.txt - pip install -e . + pip install -r test-requirements.txt + pip install -e .[cpu] - name: Run tests working-directory: experimental/torch_xla2 shell: bash diff --git a/experimental/torch_xla2/README.md b/experimental/torch_xla2/README.md index 0dccde701d6..fba08f40498 100644 --- a/experimental/torch_xla2/README.md +++ b/experimental/torch_xla2/README.md @@ -4,7 +4,8 @@ Currently this is only source-installable. Requires Python version >= 3.10. -### NOTE: +### NOTE: + Please don't install torch-xla from instructions in https://github.com/pytorch/xla/blob/master/CONTRIBUTING.md . In particular, the following are not needed: @@ -18,153 +19,44 @@ TorchXLA2 and torch-xla have different installation instructions, please follow the instructions below from scratch (fresh venv / conda environment.) -### 1. Install dependencies - -#### 1.0 (optional) Make a virtualenv / conda env, and activate it. - -```bash -conda create --name python=3.10 -conda activate -``` -Or, -```bash -python -m venv create my_venv -source my_venv/bin/activate -``` - -#### 1.1 Install torch CPU, even if your device has GPU or TPU: +### 1. Installing `torch_xla2` -```bash -pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu -``` +#### 1.0 (recommended) Make a virtualenv / conda env -Or, follow official instructions in [pytorch.org](https://pytorch.org/get-started/locally/) to install for your OS. +If you are using VSCode, then [you can create a new environment from +UI](https://code.visualstudio.com/docs/python/environments). Select the +`dev-requirements.txt` when asked to install project dependencies. -#### 1.2 Install Jax for either GPU or TPU +Otherwise create a new environment from the command line. -If you are using Google Cloud TPU, then ```bash -pip install jax[tpu] -f https://storage.googleapis.com/jax-releases/libtpu_releases.html -``` +# Option 1: venv +python -m venv create my_venv +source my_venv/bin/activate -If you are using a machine with NVidia GPU: +# Option 2: conda +conda create --name python=3.10 +conda activate -```bash -pip install --upgrade "jax[cuda12_pip]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +# Either way, install the dev requirements. +pip install -r dev-requirements.txt ``` -If you are using a CPU-only machine: -```bash -pip install --upgrade "jax[cpu]" -``` +Note: `dev-requirements.txt` will install the CPU-only version of PyTorch. -Or, follow the official instructions in https://jax.readthedocs.io/en/latest/installation.html to install for your OS or Device. +#### 1.1 Install this package -#### 1.3 Install this package +Install `torch_xla2` from source for your platform: ```bash -pip install -e . +pip install -e .[cpu] +pip install -e .[cuda] +pip install -e .[tpu] -f https://storage.googleapis.com/libtpu-releases/index.html ``` -#### 1.4 (optional) verify installation by running tests +#### 1.2 (optional) verify installation by running tests ```bash -pip install -r test_requirements.txt +pip install -r test-requirements.txt pytest test ``` - - -## Run a model - -Now let's execute a model under torch_xla2. We'll start with a simple 2-layer model -it can be in theory any instance of `torch.nn.Module`. - -```python -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class MyModel(nn.Module): - def __init__(self): - super().__init__() - self.fc1 = nn.Linear(28 * 28, 120) - self.fc2 = nn.Linear(120, 84) - self.fc3 = nn.Linear(84, 10) - - def forward(self, x): - x = x.view(-1, 28 * 28) - x = F.relu(self.fc1(x)) - x = F.relu(self.fc2(x)) - x = self.fc3(x) - return x - -m = MyModel() - -# Execute this model using torch -inputs = torch.randn(3, 3, 28, 28) -print(m(inputs)) -``` - -This model `m` contains 2 parts: the weights that is stored inside of the model -and it's submodules (`nn.Linear`). - -To execute this model with `torch_xla2`; we need to move the tensors involved in compute -to `XLA` devices. This can be accomplished with `torch_xla2.tensor.move_to_device`. - -We need move both the weights and the input to xla devices: - -```python -import torch_xla2 -from torch.utils import _pytree as pytree -from torch_xla2.tensor import move_to_device - -inputs = move_to_device(inputs) -new_state_dict = pytree.tree_map_only(torch.Tensor, move_to_device, m.state_dict()) -m.load_state_dict(new_state_dict, assign=True) - -res = m(inputs) - -print(type(res)) # outputs XLATensor2 -``` - -### Executing with jax.jit - -The above script will execute the model using eager mode Jax as backend. This -does allow executing torch models on TPU, but is often slower than what we can -achieve with `jax.jit`. - -`jax.jit` is a function that takes a Jax function (i.e. a function that takes jax array -and returns jax array) into the same function, but faster. - -We have made the `jax_jit` decorator that would accomplish the same with functions -that takes and returns `torch.Tensor`. To use this, the first step is to create -a functional version of this model: this means the parameters should be passed in -as input instead of being attributes on class: - - -```python - -def model_func(param, inputs): - return torch.func.functional_call(m, param, inputs) - -``` -Here we use [torch.func.functional_call](https://pytorch.org/docs/stable/generated/torch.func.functional_call.html) -from PyTorch to replace the model -weights with `param`, then call the model. This is equivalent to: - -```python -def model_func(param, inputs): - m.load_state_dict(param) - return m(*inputs) -``` - -Now, we can apply `jax_jit` - -```python -from torch_xla2.extra import jax_jit -model_func_jitted = jax_jit(model_func) -print(model_func_jitted(new_state_dict, inputs)) -``` - - diff --git a/experimental/torch_xla2/dev-requirements.txt b/experimental/torch_xla2/dev-requirements.txt index 4a32310fbda..004a1d71ad7 100644 --- a/experimental/torch_xla2/dev-requirements.txt +++ b/experimental/torch_xla2/dev-requirements.txt @@ -1,9 +1,3 @@ -absl-py==2.0.0 -flatbuffers==23.5.26 -jax==0.4.23 -jaxlib==0.4.23 -pytest -tensorflow +-f https://download.pytorch.org/whl/torch torch==2.2.1+cpu -immutabledict -sentencepiece \ No newline at end of file +ruff~=0.3.5 diff --git a/experimental/torch_xla2/pyproject.toml b/experimental/torch_xla2/pyproject.toml index d0d2a42dec8..0c2101dbcb9 100644 --- a/experimental/torch_xla2/pyproject.toml +++ b/experimental/torch_xla2/pyproject.toml @@ -2,29 +2,30 @@ requires = ["hatchling"] build-backend = "hatchling.build" - [project] version = "0.0.1" name = "torch_xla2" dependencies = [ "absl-py", - "flatbuffers", + "immutabledict", + "jax>=0.4.24", "pytest", - "tensorflow", - - # Note: Exclude these because otherwise on pip install . - # pip will install libs from pypi which is the GPU version - # of these libs. - # We most likely need CPU version of torch and TPU version of - # jax. So it's best for users to install them by hand - # See more at README.md - # "jax>=0.4.24", - # "jaxlib>=0.4.24", - # "torch", + "tensorflow-cpu", + # Developers should install `dev-requirements.txt` first + "torch>=2.2.1", ] - requires-python = ">=3.10" license = {file = "LICENSE"} +[project.optional-dependencies] +cpu = ["jax[cpu]"] +# Add libtpu index `-f https://storage.googleapis.com/libtpu-releases/index.html` +tpu = ["jax[tpu]"] +cuda = ["jax[cuda12]"] + [tool.pytest.ini_options] addopts="-n auto" + +[tool.ruff] +line-length = 80 +indent-width = 2 diff --git a/experimental/torch_xla2/test-requirements.txt b/experimental/torch_xla2/test-requirements.txt new file mode 100644 index 00000000000..1deead455a1 --- /dev/null +++ b/experimental/torch_xla2/test-requirements.txt @@ -0,0 +1,5 @@ +-r dev-requirements.txt +pytest +pytest-xdist +sentencepiece +expecttest diff --git a/experimental/torch_xla2/test_requirements.txt b/experimental/torch_xla2/test_requirements.txt deleted file mode 100644 index c8596327236..00000000000 --- a/experimental/torch_xla2/test_requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -pytest -immutabledict -sentencepiece -pytest-xdist -expecttest \ No newline at end of file From fa090a24a185ec039c03f707c43773e5ec2beac5 Mon Sep 17 00:00:00 2001 From: Siyuan Liu Date: Wed, 24 Apr 2024 09:11:46 -0700 Subject: [PATCH 13/53] Create rc13 trigger (#6956) --- .../artifacts.auto.tfvars | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/infra/tpu-pytorch-releases/artifacts.auto.tfvars b/infra/tpu-pytorch-releases/artifacts.auto.tfvars index 0229a79c190..7b5701db14e 100644 --- a/infra/tpu-pytorch-releases/artifacts.auto.tfvars +++ b/infra/tpu-pytorch-releases/artifacts.auto.tfvars @@ -35,32 +35,32 @@ nightly_builds = [ versioned_builds = [ # Remove libtpu from PyPI builds { - git_tag = "v2.3.0-rc12" - package_version = "2.3.0-rc12" + git_tag = "v2.3.0-rc13" + package_version = "2.3.0-rc13" pytorch_git_rev = "v2.3.0-rc12" accelerator = "tpu" python_version = "3.8" bundle_libtpu = "0" }, { - git_tag = "v2.3.0-rc12" - package_version = "2.3.0-rc12" + git_tag = "v2.3.0-rc13" + package_version = "2.3.0-rc13" pytorch_git_rev = "v2.3.0-rc12" accelerator = "tpu" python_version = "3.9" bundle_libtpu = "0" }, { - git_tag = "v2.3.0-rc12" - package_version = "2.3.0-rc12" + git_tag = "v2.3.0-rc13" + package_version = "2.3.0-rc13" pytorch_git_rev = "v2.3.0-rc12" accelerator = "tpu" python_version = "3.10" bundle_libtpu = "0" }, { - git_tag = "v2.3.0-rc12" - package_version = "2.3.0-rc12" + git_tag = "v2.3.0-rc13" + package_version = "2.3.0-rc13" pytorch_git_rev = "v2.3.0-rc12" accelerator = "tpu" python_version = "3.11" @@ -68,25 +68,25 @@ versioned_builds = [ }, # Bundle libtpu for Kaggle { - git_tag = "v2.3.0-rc12" - package_version = "2.3.0-rc12+libtpu" + git_tag = "v2.3.0-rc13" + package_version = "2.3.0-rc13+libtpu" pytorch_git_rev = "v2.3.0-rc12" accelerator = "tpu" python_version = "3.10" bundle_libtpu = "1" }, { - git_tag = "v2.3.0-rc12" + git_tag = "v2.3.0-rc13" pytorch_git_rev = "v2.3.0-rc12" - package_version = "2.3.0-rc12" + package_version = "2.3.0-rc13" accelerator = "cuda" cuda_version = "12.1" python_version = "3.8" }, { - git_tag = "v2.3.0-rc12" + git_tag = "v2.3.0-rc13" pytorch_git_rev = "v2.3.0-rc12" - package_version = "2.3.0-rc12" + package_version = "2.3.0-rc13" accelerator = "cuda" cuda_version = "12.1" python_version = "3.10" From b5574d835196c0d12c6dd25b41c3e69b7bd00736 Mon Sep 17 00:00:00 2001 From: qihqi Date: Wed, 24 Apr 2024 10:28:00 -0700 Subject: [PATCH 14/53] update torch deps to 2.3 (#6959) --- experimental/torch_xla2/dev-requirements.txt | 2 +- experimental/torch_xla2/test/test_ops.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/experimental/torch_xla2/dev-requirements.txt b/experimental/torch_xla2/dev-requirements.txt index 004a1d71ad7..208f70d5fef 100644 --- a/experimental/torch_xla2/dev-requirements.txt +++ b/experimental/torch_xla2/dev-requirements.txt @@ -1,3 +1,3 @@ -f https://download.pytorch.org/whl/torch -torch==2.2.1+cpu +torch==2.3.0+cpu ruff~=0.3.5 diff --git a/experimental/torch_xla2/test/test_ops.py b/experimental/torch_xla2/test/test_ops.py index ed14e636e5c..72a39ae8582 100644 --- a/experimental/torch_xla2/test/test_ops.py +++ b/experimental/torch_xla2/test/test_ops.py @@ -570,6 +570,7 @@ "special.xlog1py", "split", "split_with_sizes", + "split_with_sizes_copy", "sqrt", "square", "stack", From 69eeace357f1e606c3444a11fc4003e2a386c3a2 Mon Sep 17 00:00:00 2001 From: Siyuan Liu Date: Wed, 24 Apr 2024 12:42:06 -0700 Subject: [PATCH 15/53] update rc14 (#6962) --- .../artifacts.auto.tfvars | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/infra/tpu-pytorch-releases/artifacts.auto.tfvars b/infra/tpu-pytorch-releases/artifacts.auto.tfvars index 7b5701db14e..e47e93b27bd 100644 --- a/infra/tpu-pytorch-releases/artifacts.auto.tfvars +++ b/infra/tpu-pytorch-releases/artifacts.auto.tfvars @@ -35,32 +35,32 @@ nightly_builds = [ versioned_builds = [ # Remove libtpu from PyPI builds { - git_tag = "v2.3.0-rc13" - package_version = "2.3.0-rc13" + git_tag = "v2.3.0-rc14" + package_version = "2.3.0-rc14" pytorch_git_rev = "v2.3.0-rc12" accelerator = "tpu" python_version = "3.8" bundle_libtpu = "0" }, { - git_tag = "v2.3.0-rc13" - package_version = "2.3.0-rc13" + git_tag = "v2.3.0-rc14" + package_version = "2.3.0-rc14" pytorch_git_rev = "v2.3.0-rc12" accelerator = "tpu" python_version = "3.9" bundle_libtpu = "0" }, { - git_tag = "v2.3.0-rc13" - package_version = "2.3.0-rc13" + git_tag = "v2.3.0-rc14" + package_version = "2.3.0-rc14" pytorch_git_rev = "v2.3.0-rc12" accelerator = "tpu" python_version = "3.10" bundle_libtpu = "0" }, { - git_tag = "v2.3.0-rc13" - package_version = "2.3.0-rc13" + git_tag = "v2.3.0-rc14" + package_version = "2.3.0-rc14" pytorch_git_rev = "v2.3.0-rc12" accelerator = "tpu" python_version = "3.11" @@ -68,25 +68,25 @@ versioned_builds = [ }, # Bundle libtpu for Kaggle { - git_tag = "v2.3.0-rc13" - package_version = "2.3.0-rc13+libtpu" + git_tag = "v2.3.0-rc14" + package_version = "2.3.0-rc14+libtpu" pytorch_git_rev = "v2.3.0-rc12" accelerator = "tpu" python_version = "3.10" bundle_libtpu = "1" }, { - git_tag = "v2.3.0-rc13" + git_tag = "v2.3.0-rc14" pytorch_git_rev = "v2.3.0-rc12" - package_version = "2.3.0-rc13" + package_version = "2.3.0-rc14" accelerator = "cuda" cuda_version = "12.1" python_version = "3.8" }, { - git_tag = "v2.3.0-rc13" + git_tag = "v2.3.0-rc14" pytorch_git_rev = "v2.3.0-rc12" - package_version = "2.3.0-rc13" + package_version = "2.3.0-rc14" accelerator = "cuda" cuda_version = "12.1" python_version = "3.10" From a7749fa10e4cb08626ae4570ee2027b5a240d4ba Mon Sep 17 00:00:00 2001 From: Jiewen Tan Date: Wed, 24 Apr 2024 14:06:47 -0700 Subject: [PATCH 16/53] [Doc] Update Pallas user guide (#6961) Summary: This PR adds a preliminary user guide for Pallas. Test Plan: Skip CI. --- docs/pallas.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 docs/pallas.md diff --git a/docs/pallas.md b/docs/pallas.md new file mode 100644 index 00000000000..46c80b79f2e --- /dev/null +++ b/docs/pallas.md @@ -0,0 +1,57 @@ +# Custom Kernels via Pallas + +With the rise of OpenAI [triton](https://openai.com/research/triton), custom kernels become more and more popular in the GPU community, for instance, the introduction of [FlashAttention](https://github.com/Dao-AILab/flash-attention) and [PagedAttention](https://blog.vllm.ai/2023/06/20/vllm.html). In order to provide the feature parity in the TPU world, Google has introduced [Pallas](http://go/jax-pallas) and [Mosaic](http://go/mosaic-tpu). For PyTorch/XLA to continue pushing the performance in TPU, we have to support custom kernels, and the best way is through Pallas and Mosaic. The design doc is [TBA](). + +Let's assume you have a Pallas kernel defined as follow: +```python3 +import jax +from jax.experimental import pallas as pl +import jax.numpy as jnp + +def add_vectors_kernel(x_ref, y_ref, o_ref): + x, y = x_ref[...], y_ref[...] + o_ref[...] = x + y + +@jax.jit +def add_vectors(x: jax.Array, y: jax.Array) -> jax.Array: + return pl.pallas_call(add_vectors_kernel, + out_shape=jax.ShapeDtypeStruct(x.shape, x.dtype) + )(x, y) +``` + +## Adopt the above kernel to be compatible with PyTorch/XLA + +Example usage: +```python3 +q = torch.randn(3, 2, 128, 4).to("xla") +k = torch.randn(3, 2, 128, 4).to("xla") +v = torch.randn(3, 2, 128, 4).to("xla") + +# Adopts any Pallas kernel +from torch_xla.experimental.custom_kernel import make_kernel_from_pallas +pt_kernel = make_kernel_from_pallas(add_vectors, lambda x, y: [(x.shape, x.dtype)]) +output = pt_kernel(q, k) +``` +For simple kernels, the adoption is just as simple as one liner. For more complicated kernels, you can refer to our Flash Attention implementation for details. + +## Use built-in kernels + +Besides manually wrapping external Pallas kernels, there are built-in kernels where the adoptions are done by PyTorch/XLA already. + +Example usage: +```python3 +# Use built-in kernels +from torch_xla.experimental.custom_kernel import flash_attention +output = flash_attention(q, k, v) +``` + +You can just use it like any other torch.ops. + +## HuggingFace Llama 3 Example +We have a fork of HF Llama 3 to demonstrate a potential integration [here](https://github.com/pytorch-tpu/transformers/tree/alanwaketan/flash_attention). + +## Dependencies +The Pallas integration depends on JAX to function. However, not every JAX version is compatible with your installed PyTorch/XLA. To install the proper JAX: +```bash +pip install torch_xla[pallas] -f https://storage.googleapis.com/jax-releases/jax_nightly_releases.html -f https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html +``` From 5369e7d6014cbc1382b767bab027c2e0e2a71867 Mon Sep 17 00:00:00 2001 From: Siyuan Liu Date: Wed, 24 Apr 2024 14:17:05 -0700 Subject: [PATCH 17/53] Add final 2.3 trigger (#6963) --- .../artifacts.auto.tfvars | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/infra/tpu-pytorch-releases/artifacts.auto.tfvars b/infra/tpu-pytorch-releases/artifacts.auto.tfvars index e47e93b27bd..16902f663fd 100644 --- a/infra/tpu-pytorch-releases/artifacts.auto.tfvars +++ b/infra/tpu-pytorch-releases/artifacts.auto.tfvars @@ -35,58 +35,58 @@ nightly_builds = [ versioned_builds = [ # Remove libtpu from PyPI builds { - git_tag = "v2.3.0-rc14" - package_version = "2.3.0-rc14" - pytorch_git_rev = "v2.3.0-rc12" + git_tag = "v2.3.0" + package_version = "2.3.0" + pytorch_git_rev = "v2.3.0" accelerator = "tpu" python_version = "3.8" bundle_libtpu = "0" }, { - git_tag = "v2.3.0-rc14" - package_version = "2.3.0-rc14" - pytorch_git_rev = "v2.3.0-rc12" + git_tag = "v2.3.0" + package_version = "2.3.0" + pytorch_git_rev = "v2.3.0" accelerator = "tpu" python_version = "3.9" bundle_libtpu = "0" }, { - git_tag = "v2.3.0-rc14" - package_version = "2.3.0-rc14" - pytorch_git_rev = "v2.3.0-rc12" + git_tag = "v2.3.0" + package_version = "2.3.0" + pytorch_git_rev = "v2.3.0" accelerator = "tpu" python_version = "3.10" bundle_libtpu = "0" }, { - git_tag = "v2.3.0-rc14" - package_version = "2.3.0-rc14" - pytorch_git_rev = "v2.3.0-rc12" + git_tag = "v2.3.0" + package_version = "2.3.0" + pytorch_git_rev = "v2.3.0" accelerator = "tpu" python_version = "3.11" bundle_libtpu = "0" }, # Bundle libtpu for Kaggle { - git_tag = "v2.3.0-rc14" - package_version = "2.3.0-rc14+libtpu" - pytorch_git_rev = "v2.3.0-rc12" + git_tag = "v2.3.0" + package_version = "2.3.0+libtpu" + pytorch_git_rev = "v2.3.0" accelerator = "tpu" python_version = "3.10" bundle_libtpu = "1" }, { - git_tag = "v2.3.0-rc14" - pytorch_git_rev = "v2.3.0-rc12" - package_version = "2.3.0-rc14" + git_tag = "v2.3.0" + pytorch_git_rev = "v2.3.0" + package_version = "2.3.0" accelerator = "cuda" cuda_version = "12.1" python_version = "3.8" }, { - git_tag = "v2.3.0-rc14" - pytorch_git_rev = "v2.3.0-rc12" - package_version = "2.3.0-rc14" + git_tag = "v2.3.0" + pytorch_git_rev = "v2.3.0" + package_version = "2.3.0" accelerator = "cuda" cuda_version = "12.1" python_version = "3.10" From 76f7dd061ac5bf8251e1090185887bd159d54958 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Wed, 24 Apr 2024 14:42:43 -0700 Subject: [PATCH 18/53] Temporarily ignore torch commit in CI test (#6964) --- .github/workflows/_test_python.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_test_python.yml b/.github/workflows/_test_python.yml index bd260cdb2d1..11c0677feea 100644 --- a/.github/workflows/_test_python.yml +++ b/.github/workflows/_test_python.yml @@ -101,14 +101,18 @@ jobs: # TODO: Add these in setup.py pip install fsspec pip install rich - - name: Record PyTorch commit - run: echo "PYTORCH_COMMIT=$(python -c 'import torch_xla.version; print(torch_xla.version.__torch_gitrev__)')" >> $GITHUB_ENV + + echo "Import check..." + python -c "import torch_xla" + # TODO(wcromar): re-enable this because it's important sometimes + # - name: Record PyTorch commit + # run: echo "PYTORCH_COMMIT=$(python -c 'import torch_xla.version; print(torch_xla.version.__torch_gitrev__)')" >> $GITHUB_ENV - name: Checkout PyTorch Repo uses: actions/checkout@v4 with: repository: pytorch/pytorch path: pytorch - ref: ${{ env.PYTORCH_COMMIT }} + # ref: ${{ env.PYTORCH_COMMIT }} - name: Checkout PyTorch/XLA Repo uses: actions/checkout@v4 with: From af74c349ce56ebb3eb4c9af05e070f2dd14bb74b Mon Sep 17 00:00:00 2001 From: Jiewen Tan Date: Wed, 24 Apr 2024 17:34:33 -0700 Subject: [PATCH 19/53] [Doc] Improve docker instructions (#6969) Summary: Clarify some caveats for using the TPU docker images in the landing page. Test Plan: Skip CI. --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index d1653eb7b53..8ebd5a2ac29 100644 --- a/README.md +++ b/README.md @@ -217,6 +217,11 @@ replace the `torch_xla` with `torch` or `torchvision` on above wheel links. | 1.13 | `gcr.io/tpu-pytorch/xla:r1.13_3.8_tpuvm` | | nightly python | `us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.10_tpuvm` | +To use the above dockers, please pass `--privileged --net host --shm-size=16G` along. Here is an example: +```bash +docker run --privileged --net host --shm-size=16G -it us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.10_tpuvm /bin/bash +``` +
| Version | GPU CUDA 12.1 Docker | From 6ed202608814cab34512d4bbad491c6c6eab0980 Mon Sep 17 00:00:00 2001 From: Wonjoo Lee Date: Wed, 24 Apr 2024 20:14:08 -0700 Subject: [PATCH 20/53] Enable PagedAttention through Pallas (#6912) --- test/test_pallas.py | 165 ++++++++++++++++++++++++ torch_xla/experimental/custom_kernel.py | 93 +++++++++++-- 2 files changed, 248 insertions(+), 10 deletions(-) diff --git a/test/test_pallas.py b/test/test_pallas.py index f8480782094..089394b71d3 100644 --- a/test/test_pallas.py +++ b/test/test_pallas.py @@ -10,6 +10,8 @@ from torch_xla import runtime as xr from torch_xla._internal import tpu +import numpy as np + if xr.device_type() == 'TPU': from torch_xla.experimental.custom_kernel import jax_import_guard jax_import_guard() @@ -26,6 +28,32 @@ def _attention(self, q, k, v): attn_output = attn_weight @ v return attn_output + # The following helper functions prefixed with _pagedattention are used for PagedAttention unit tests + # Reference: https://github.com/google/jax/blob/main/tests/pallas/paged_attention_kernel_test.py + def _pagedattention_generate_qkv( + self, + seq_lens, + page_size, + max_seq_len, + num_kv_heads, + num_heads, + head_dim, + dtype=torch.float32, + ): + assert max_seq_len % page_size == 0 + pages_per_sequence = max_seq_len // page_size + batch_size = len(seq_lens) + total_pages = batch_size * pages_per_sequence + k_pages = torch.randn( + num_kv_heads, total_pages, page_size, head_dim, dtype=dtype) + v_pages = torch.randn( + num_kv_heads, total_pages, page_size, head_dim, dtype=dtype) + page_indices = torch.randperm( + batch_size * pages_per_sequence, dtype=torch.int32) + page_indices = page_indices.reshape(batch_size, pages_per_sequence) + q = torch.randn(batch_size, num_heads, head_dim, dtype=dtype) + return q, k_pages, v_pages, page_indices + @unittest.skipIf(xr.device_type() != 'TPU', "This test only works on TPU.") def test_tpu_custom_call_pallas_add(self): # This payload is generated by the following Pallas code: @@ -454,6 +482,143 @@ def test_flash_attention_backward(self): self.assertTrue(torch.allclose(i[0].grad.cpu(), i[1].cpu(), atol=1e-05)) jax.config.update('jax_default_matmul_precision', jax.lax.Precision.DEFAULT) + @unittest.skipIf(xr.device_type() != 'TPU' or tpu.version() < 4, + "This test only works on TPUv4+.") + def test_paged_attention_wrapper(self): + from torch_xla.experimental.custom_kernel import paged_attention + from jax.experimental.pallas.ops.tpu.paged_attention.paged_attention_kernel import paged_attention as jax_paged_attention + + max_kv_len = 2048 + block_size = 512 + page_size = 64 + num_kv_heads = 8 + q_kv_head_ratio = 8 + head_dim = 256 + dtype = torch.float32 + seq_lens = torch.tensor([0, 3, 256, 513, 1023, 2048], dtype=torch.int32) + + q, k_pages, v_pages, page_indices = self._pagedattention_generate_qkv( + seq_lens, + page_size, + max_kv_len, + num_kv_heads, + num_kv_heads * q_kv_head_ratio, + head_dim, + ) + + q_xla = q.to("xla") + k_pages_xla = k_pages.to("xla") + v_pages_xla = v_pages.to("xla") + seq_lens_xla = seq_lens.to("xla") + page_indices_xla = page_indices.to("xla") + + output = paged_attention( + q_xla, + k_pages_xla, + v_pages_xla, + seq_lens_xla, + page_indices_xla, + pages_per_compute_block=block_size // page_size, + ) + + q_jax = jnp.array(q.numpy(), dtype=jnp.float32) + k_pages_jax = jnp.array(k_pages.numpy(), dtype=jnp.float32) + v_pages_jax = jnp.array(v_pages.numpy(), dtype=jnp.float32) + seq_lens_jax = jnp.array(seq_lens.numpy(), dtype=jnp.int32) + page_indices_jax = jnp.array(page_indices.numpy(), dtype=jnp.int32) + expected_output = torch.from_numpy( + np.array( + jax_paged_attention( + q_jax, + k_pages_jax, + v_pages_jax, + seq_lens_jax, + page_indices_jax, + pages_per_compute_block=block_size // page_size, + ))) + + self.assertTrue( + torch.allclose( + output.cpu()[seq_lens > 0], + expected_output.cpu()[seq_lens > 0], + atol=1e-5, + rtol=1e-5)) + + @unittest.skipIf(xr.device_type() != 'TPU' or tpu.version() < 4, + "This test only works on TPUv4+.") + def test_paged_attention_wrapper_with_dynamo(self): + from torch_xla.experimental.custom_kernel import paged_attention + from jax.experimental.pallas.ops.tpu.paged_attention.paged_attention_kernel import paged_attention as jax_paged_attention + + max_kv_len = 2048 + block_size = 512 + page_size = 64 + num_kv_heads = 8 + q_kv_head_ratio = 8 + head_dim = 256 + dtype = torch.float32 + seq_lens = torch.tensor([0, 3, 256, 513, 1023, 2048], dtype=torch.int32) + + q, k_pages, v_pages, page_indices = self._pagedattention_generate_qkv( + seq_lens, + page_size, + max_kv_len, + num_kv_heads, + num_kv_heads * q_kv_head_ratio, + head_dim, + ) + + q_xla = q.to("xla") + k_pages_xla = k_pages.to("xla") + v_pages_xla = v_pages.to("xla") + seq_lens_xla = seq_lens.to("xla") + page_indices_xla = page_indices.to("xla") + + def paged_attention_wrapper(q, k, v, seq_lens, page_indices, + pages_per_compute_block): + return paged_attention( + q_xla, + k_pages_xla, + v_pages_xla, + seq_lens_xla, + page_indices_xla, + pages_per_compute_block=block_size // page_size, + ) + + compiled_paged_attention = torch.compile( + paged_attention_wrapper, backend="openxla") + output = paged_attention_wrapper( + q_xla, + k_pages_xla, + v_pages_xla, + seq_lens_xla, + page_indices_xla, + pages_per_compute_block=block_size // page_size, + ) + + q_jax = jnp.array(q.numpy(), dtype=jnp.float32) + k_pages_jax = jnp.array(k_pages.numpy(), dtype=jnp.float32) + v_pages_jax = jnp.array(v_pages.numpy(), dtype=jnp.float32) + seq_lens_jax = jnp.array(seq_lens.numpy(), dtype=jnp.int32) + page_indices_jax = jnp.array(page_indices.numpy(), dtype=jnp.int32) + expected_output = torch.from_numpy( + np.array( + jax_paged_attention( + q_jax, + k_pages_jax, + v_pages_jax, + seq_lens_jax, + page_indices_jax, + pages_per_compute_block=block_size // page_size, + ))) + + self.assertTrue( + torch.allclose( + output.cpu()[seq_lens > 0], + expected_output.cpu()[seq_lens > 0], + atol=1e-5, + rtol=1e-5)) + if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) diff --git a/torch_xla/experimental/custom_kernel.py b/torch_xla/experimental/custom_kernel.py index bb4ce0c4e23..42b11d3ea9b 100644 --- a/torch_xla/experimental/custom_kernel.py +++ b/torch_xla/experimental/custom_kernel.py @@ -371,6 +371,67 @@ def flash_attention( return FlashAttention.apply(q, k, v, causal, partition_spec, mesh) +def paged_attention(q, k_pages, v_pages, lengths, page_indices, + pages_per_compute_block): + # Import JAX within the function such that we don't need to call the jax_import_guard() + # in the global scope which could cause problems for xmp.spawn. + jax_import_guard() + from jax.experimental.pallas.ops.tpu.paged_attention.paged_attention_kernel import paged_attention + + payload, tensor_args = trace_pallas( + paged_attention, + q, + k_pages, + v_pages, + lengths, + page_indices, + pages_per_compute_block=pages_per_compute_block, + static_argnames=["pages_per_compute_block"], + ) + + batch_size, num_heads, head_dim = q.shape + num_kv_heads, _, page_size, head_dim_k = k_pages.shape + batch_size_paged_indices, pages_per_sequence = page_indices.shape + q_output_dtype = torch.float32 + if (num_heads // num_kv_heads) % 8 != 0: + q = q.reshape(batch_size, num_heads, 1, head_dim) + q_output_dtype = q.dtype + + page_indices_reshaped = page_indices.reshape(-1) + buffer_index = torch.zeros((1,), dtype=torch.int32).to("xla") + step = torch.zeros((1,), dtype=torch.int32).to("xla") + output_shape = torch.Size(list(q.shape[:-1]) + [1]) + + output, _, _ = torch_xla._XLAC._xla_tpu_custom_call( + [ + lengths, + page_indices_reshaped, + buffer_index, + step, + q, + k_pages, + v_pages, + ], payload, [q.shape, output_shape, output_shape], + [q_output_dtype, torch.float32, torch.float32]) + + return output.reshape(batch_size, num_heads, head_dim).to(q.dtype) + + +def non_xla_attetion(q, k, v, attention_type): + # This will be called when dynamo use fake tensor to construct the fake output. + # We need to make sure output tensor's shape is correct. + if k.device != torch.device("meta"): + warnings.warn( + f'XLA {attention_type} attention should only be applied to tensors on XLA device' + ) + + # perform a regular attention if input tensors are not on XLA device. + attn_weight = q @ k.transpose(-2, -1) + attn_weight = torch.nn.functional.softmax(attn_weight, dim=-1) + attn_output = attn_weight @ v + return attn_output + + XLA_LIB.define( "flash_attention(Tensor q, Tensor k, Tensor v, bool casual=False) -> Tensor", ) @@ -389,14 +450,26 @@ def flash_attention_non_xla(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, causal: bool = False): - # This will be called when dynamo use fake tensor to construct the fake output. - # We need to make sure output tensor's shape is correct. - if k.device != torch.device("meta"): - warnings.warn( - 'XLA flash attention should only be applied to tensors on XLA device') + return non_xla_attetion(q, k, v, "flash") - # perform a regular attention if input tensors are not on XLA device. - attn_weight = q @ k.transpose(-2, -1) - attn_weight = torch.nn.functional.softmax(attn_weight, dim=-1) - attn_output = attn_weight @ v - return attn_output + +XLA_LIB.define( + "paged_attention(Tensor q, Tensor k_pages, Tensor v_pages, Tensor lengths, Tensor page_indices, int pages_per_compute_block) -> Tensor", +) + + +@impl(XLA_LIB, "paged_attention", "XLA") +def paged_attention_xla(q: torch.Tensor, k_pages: torch.Tensor, + v_pages: torch.Tensor, lengths: torch.Tensor, + page_indices: torch.Tensor, + pages_per_compute_block: int): + return paged_attention(q, k_pages, v_pages, lengths, page_indices, + pages_per_compute_block) + + +@impl(XLA_LIB, "paged_attention", "CompositeExplicitAutograd") +def paged_attention_non_xla(q: torch.Tensor, k_pages: torch.Tensor, + v_pages: torch.Tensor, lengths: torch.Tensor, + page_indices: torch.Tensor, + pages_per_compute_block: int): + return non_xla_attetion(q, k, v, "paged") From 0a204a6b0cf0984d1f6214f73d8ef533c0dcf1b3 Mon Sep 17 00:00:00 2001 From: Siyuan Liu Date: Wed, 24 Apr 2024 21:45:50 -0700 Subject: [PATCH 21/53] Update readme for 2.3 releae (#6967) --- README.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8ebd5a2ac29..6bd1bb844d7 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ started: To install PyTorch/XLA a new TPU VM: ``` -pip install torch~=2.2.0 torch_xla[tpu]~=2.2.0 -f https://storage.googleapis.com/libtpu-releases/index.html +pip install torch~=2.3.0 torch_xla[tpu]~=2.3.0 -f https://storage.googleapis.com/libtpu-releases/index.html ``` To update your existing training loop, make the following changes: @@ -143,10 +143,10 @@ bucket. | Version | Cloud TPU/GPU VMs Wheel | | --- | ----------- | -| 2.2 (Python 3.8) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.2.0-cp38-cp38-manylinux_2_28_x86_64.whl` | -| 2.2 (Python 3.10) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.2.0-cp310-cp310-manylinux_2_28_x86_64.whl` | -| 2.2 (CUDA 12.1 + Python 3.8) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/cuda/12.1/torch_xla-2.2.0-cp38-cp38-manylinux_2_28_x86_64.whl` | -| 2.2 (CUDA 12.1 + Python 3.10) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/cuda/12.1/torch_xla-2.2.0-cp310-cp310-manylinux_2_28_x86_64.whl` | +| 2.3 (Python 3.8) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.3.0-cp38-cp38-manylinux_2_28_x86_64.whl` | +| 2.3 (Python 3.10) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.3.0-cp310-cp310-manylinux_2_28_x86_64.whl` | +| 2.3 (CUDA 12.1 + Python 3.8) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/cuda/12.1/torch_xla-2.3.0-cp38-cp38-manylinux_2_28_x86_64.whl` | +| 2.3 (CUDA 12.1 + Python 3.10) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/cuda/12.1/torch_xla-2.3.0-cp310-cp310-manylinux_2_28_x86_64.whl` | | nightly (Python 3.8) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly-cp38-cp38-linux_x86_64.whl` | | nightly (Python 3.10) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly-cp310-cp310-linux_x86_64.whl` | | nightly (CUDA 12.1 + Python 3.8) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/cuda/12.1/torch_xla-nightly-cp38-cp38-linux_x86_64.whl` | @@ -157,6 +157,10 @@ bucket. | Version | Cloud TPU VMs Wheel | |---------|-------------------| +| 2.2 (Python 3.8) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.2.0-cp38-cp38-manylinux_2_28_x86_64.whl` | +| 2.2 (Python 3.10) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.2.0-cp310-cp310-manylinux_2_28_x86_64.whl` | +| 2.2 (CUDA 12.1 + Python 3.8) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/cuda/12.1/torch_xla-2.2.0-cp38-cp38-manylinux_2_28_x86_64.whl` | +| 2.2 (CUDA 12.1 + Python 3.10) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/cuda/12.1/torch_xla-2.2.0-cp310-cp310-manylinux_2_28_x86_64.whl` | | 2.1 (XRT + Python 3.10) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/xrt/tpuvm/torch_xla-2.1.0%2Bxrt-cp310-cp310-manylinux_2_28_x86_64.whl` | | 2.1 (Python 3.8) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.1.0-cp38-cp38-linux_x86_64.whl` | | 2.0 (Python 3.8) | `https://storage.googleapis.com/tpu-pytorch/wheels/tpuvm/torch_xla-2.0-cp38-cp38-linux_x86_64.whl` | @@ -211,6 +215,7 @@ replace the `torch_xla` with `torch` or `torchvision` on above wheel links. | Version | Cloud TPU VMs Docker | | --- | ----------- | +| 2.3 | `us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.3.0_3.10_tpuvm` | | 2.2 | `us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.2.0_3.10_tpuvm` | | 2.1 | `us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.1.0_3.10_tpuvm` | | 2.0 | `gcr.io/tpu-pytorch/xla:r2.0_3.8_tpuvm` | @@ -226,6 +231,7 @@ docker run --privileged --net host --shm-size=16G -it us-central1-docker.pkg.dev | Version | GPU CUDA 12.1 Docker | | --- | ----------- | +| 2.3 | `us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.3.0_3.10_cuda_12.1` | | 2.2 | `us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.2.0_3.10_cuda_12.1` | | 2.1 | `us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.1.0_3.10_cuda_12.1` | | nightly | `us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.8_cuda_12.1` | From abe090addd847099ff8aab680c98798f5e660e33 Mon Sep 17 00:00:00 2001 From: Siyuan Liu Date: Wed, 24 Apr 2024 21:46:05 -0700 Subject: [PATCH 22/53] Update GPU readme (#6968) --- docs/gpu.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/gpu.md b/docs/gpu.md index c2678164f4e..de1cf807361 100644 --- a/docs/gpu.md +++ b/docs/gpu.md @@ -71,9 +71,12 @@ source ~/.bashrc ### Wheel ``` -pip3 install torch==2.2.0 -pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/cuda/12.1/torch_xla-2.2.0-cp38-cp38-manylinux_2_28_x86_64.whl +pip3 install torch==2.3.0 +# GPU whl for python 3.10 + cuda 12.1 +pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/cuda/12.1/torch_xla-2.3.0-cp310-cp310-manylinux_2_28_x86_64.whl ``` +Wheels for other Python version and CUDA version can be found [here](https://github.com/pytorch/xla?tab=readme-ov-file#available-docker-images-and-wheels). + ## Run a simple model In order to run below examples, you need to clone the pytorch/xla repo to access the imagenet example(We already clone it in our docker). From 0054ec0897506684ee29db8e277eacf41fcb8d4b Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Thu, 25 Apr 2024 09:05:41 -0700 Subject: [PATCH 23/53] Write `torch_xla.version.__torch_gitrev__` to file directly (#6966) --- .github/workflows/_test_python.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/_test_python.yml b/.github/workflows/_test_python.yml index 11c0677feea..960b326450b 100644 --- a/.github/workflows/_test_python.yml +++ b/.github/workflows/_test_python.yml @@ -104,15 +104,20 @@ jobs: echo "Import check..." python -c "import torch_xla" - # TODO(wcromar): re-enable this because it's important sometimes - # - name: Record PyTorch commit - # run: echo "PYTORCH_COMMIT=$(python -c 'import torch_xla.version; print(torch_xla.version.__torch_gitrev__)')" >> $GITHUB_ENV + - name: Record PyTorch commit + run: | + # Don't just pipe output in shell because imports may do extra logging + python -c " + import torch_xla.version + with open('$GITHUB_ENV', 'a') as f: + f.write(f'PYTORCH_COMMIT={torch_xla.version.__torch_gitrev__}\n') + " - name: Checkout PyTorch Repo uses: actions/checkout@v4 with: repository: pytorch/pytorch path: pytorch - # ref: ${{ env.PYTORCH_COMMIT }} + ref: ${{ env.PYTORCH_COMMIT }} - name: Checkout PyTorch/XLA Repo uses: actions/checkout@v4 with: From 2a204e9b473831776def499c8106bafe2c418d24 Mon Sep 17 00:00:00 2001 From: Brian Hirsh Date: Thu, 25 Apr 2024 12:42:47 -0400 Subject: [PATCH 24/53] fix pytorch CI after pin update, change test to use assertLessEqual (#6973) --- test/dynamo/test_dynamo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/dynamo/test_dynamo.py b/test/dynamo/test_dynamo.py index c3dfe6bbed1..2a7e99dd4d3 100644 --- a/test/dynamo/test_dynamo.py +++ b/test/dynamo/test_dynamo.py @@ -489,13 +489,13 @@ def test_resnet18(self): # Graph 1: forward # Graph 2: backward # Graph 3: sync input for backward - self.assertEqual(met.metric_data('CompileTime')[0], 3) + self.assertLessEqual(met.metric_data('CompileTime')[0], 3) # We execute 3 graphs per step. - self.assertEqual(met.metric_data('ExecuteTime')[0], sample_count * 3) + self.assertLessEqual(met.metric_data('ExecuteTime')[0], sample_count * 3) # one for each forward and one for each backward - self.assertEqual( + self.assertLessEqual( met.metric_data('RunCachedGraphInputData')[0], sample_count * 2) - self.assertEqual( + self.assertLessEqual( met.metric_data('RunCachedGraphOutputData')[0], sample_count * 2) From 4b481349d3b7f5487e8c68749415543de1a590a5 Mon Sep 17 00:00:00 2001 From: Manfei <41607353+ManfeiBai@users.noreply.github.com> Date: Thu, 25 Apr 2024 16:57:29 -0700 Subject: [PATCH 25/53] Update Openxla-pin to 04/24 (#6975) --- WORKSPACE | 4 ++-- setup.py | 2 +- torch_xla/csrc/runtime/ifrt_computation_client.cc | 13 +++++++------ torch_xla/csrc/runtime/ifrt_computation_client.h | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 9c6963dae65..9fe770bedff 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -50,9 +50,9 @@ http_archive( "//openxla_patches:gpu_race_condition.diff", "//openxla_patches:f16_abi_clang.diff", ], - strip_prefix = "xla-54ca388f9ad9e8bbcb0ef823752d6b47a99d0b5f", + strip_prefix = "xla-fe08041b23d8baa0d00967913a1d6e8a0c348df3", urls = [ - "https://github.com/openxla/xla/archive/54ca388f9ad9e8bbcb0ef823752d6b47a99d0b5f.tar.gz", + "https://github.com/openxla/xla/archive/fe08041b23d8baa0d00967913a1d6e8a0c348df3.tar.gz", ], ) diff --git a/setup.py b/setup.py index dbe47007aff..31f4eaf679c 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ base_dir = os.path.dirname(os.path.abspath(__file__)) -_date = '20240418' +_date = '20240425' _libtpu_version = f'0.1.dev{_date}' _libtpu_storage_path = f'https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/wheels/libtpu-nightly/libtpu_nightly-{_libtpu_version}-py3-none-any.whl' _jax_version = f'0.4.27.dev{_date}' diff --git a/torch_xla/csrc/runtime/ifrt_computation_client.cc b/torch_xla/csrc/runtime/ifrt_computation_client.cc index 20ee9b0bfa6..c48cf1555ff 100644 --- a/torch_xla/csrc/runtime/ifrt_computation_client.cc +++ b/torch_xla/csrc/runtime/ifrt_computation_client.cc @@ -96,7 +96,7 @@ std::string IfrtComputationClient::IfrtDeviceToString( xla::ifrt::Device* const device) const { std::string platform = absl::AsciiStrToUpper(device->client()->platform_name()); - int ordinal = global_ordinals_.at(device->id()); + int ordinal = global_ordinals_.at(device->Id().value()); std::string str = absl::StrFormat("%s:%d", platform, ordinal); return str; } @@ -124,11 +124,12 @@ IfrtComputationClient::IfrtComputationClient() { // a device's global ordinal separately from its device ID. Order the // devices by increasing ID to assign global ordinals. std::vector ordered_devices(client_->device_count()); - std::partial_sort_copy(client_->devices().begin(), client_->devices().end(), - ordered_devices.begin(), ordered_devices.end(), - [](auto& a, auto& b) { return a->id() < b->id(); }); + std::partial_sort_copy( + client_->devices().begin(), client_->devices().end(), + ordered_devices.begin(), ordered_devices.end(), + [](auto& a, auto& b) { return a->Id().value() < b->Id().value(); }); for (auto* device : ordered_devices) { - global_ordinals_[device->id()] = global_ordinals_.size(); + global_ordinals_[device->Id().value()] = global_ordinals_.size(); std::string device_str = IfrtDeviceToString(device); string_to_device_.emplace(device_str, device); } @@ -615,7 +616,7 @@ std::vector IfrtComputationClient::GetAllDevices() const { int IfrtComputationClient::GetNumProcesses() const { int max_process_index = client_->process_index(); for (auto* device : client_->devices()) { - max_process_index = std::max(max_process_index, device->process_index()); + max_process_index = std::max(max_process_index, device->ProcessIndex()); } return max_process_index + 1; diff --git a/torch_xla/csrc/runtime/ifrt_computation_client.h b/torch_xla/csrc/runtime/ifrt_computation_client.h index d6d914ad8da..38d0de97204 100644 --- a/torch_xla/csrc/runtime/ifrt_computation_client.h +++ b/torch_xla/csrc/runtime/ifrt_computation_client.h @@ -134,7 +134,7 @@ class IfrtComputationClient : public ComputationClient { // global_ordinals_ tracks a map from PjRtDeviceId to the device's // dense global ordinal. std::unordered_map global_ordinals_; - std::unordered_map string_to_device_; + std::unordered_map string_to_device_; std::shared_ptr> replication_devices_; OperationManager operation_manager_; tsl::thread::ThreadPool pool_ = tsl::thread::ThreadPool( From 2bf59e0ce42a8ca68693a5307a40b9899da27ed4 Mon Sep 17 00:00:00 2001 From: JackCaoG <59073027+JackCaoG@users.noreply.github.com> Date: Fri, 26 Apr 2024 09:01:43 -0700 Subject: [PATCH 26/53] Move test_grad_checkpoint.py to tpu test list (#6976) --- test/run_tests.sh | 1 - test/tpu/run_tests.sh | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/test/run_tests.sh b/test/run_tests.sh index 8926318dc38..e263b64daa7 100755 --- a/test/run_tests.sh +++ b/test/run_tests.sh @@ -162,7 +162,6 @@ function run_xla_op_tests1 { run_dynamic "$CDIR/ds/test_dynamic_shapes.py" run_dynamic "$CDIR/ds/test_dynamic_shape_models.py" "$@" --verbosity=$VERBOSITY run_eager_debug "$CDIR/test_operations.py" "$@" --verbosity=$VERBOSITY - run_test "$CDIR/test_grad_checkpoint.py" run_test "$CDIR/test_operations.py" "$@" --verbosity=$VERBOSITY run_test_without_functionalization "$CDIR/test_operations.py" "$@" --verbosity=$VERBOSITY run_pt_xla_debug "$CDIR/debug_tool/test_pt_xla_debug.py" diff --git a/test/tpu/run_tests.sh b/test/tpu/run_tests.sh index dc2f4e96dba..b2a8fff33dc 100755 --- a/test/tpu/run_tests.sh +++ b/test/tpu/run_tests.sh @@ -14,6 +14,7 @@ python3 test/spmd/test_xla_auto_sharding.py XLA_EXPERIMENTAL=nonzero:masked_select:nms python3 test/ds/test_dynamic_shape_models.py -v XLA_EXPERIMENTAL=nonzero:masked_select:nms python3 test/ds/test_dynamic_shapes.py -v python3 test/test_autocast.py +python3 test/test_grad_checkpoint.py python3 test/dynamo/test_dynamo.py python3 test/spmd/test_spmd_debugging.py python3 test/pjrt/test_dtypes.py From 023e2c83dcf20b973ad4ec60b27469fcada02af5 Mon Sep 17 00:00:00 2001 From: Manfei <41607353+ManfeiBai@users.noreply.github.com> Date: Fri, 26 Apr 2024 10:28:21 -0700 Subject: [PATCH 27/53] Revert "Update Openxla-pin to 04/24" (#6980) --- WORKSPACE | 4 ++-- setup.py | 2 +- torch_xla/csrc/runtime/ifrt_computation_client.cc | 13 ++++++------- torch_xla/csrc/runtime/ifrt_computation_client.h | 2 +- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 9fe770bedff..9c6963dae65 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -50,9 +50,9 @@ http_archive( "//openxla_patches:gpu_race_condition.diff", "//openxla_patches:f16_abi_clang.diff", ], - strip_prefix = "xla-fe08041b23d8baa0d00967913a1d6e8a0c348df3", + strip_prefix = "xla-54ca388f9ad9e8bbcb0ef823752d6b47a99d0b5f", urls = [ - "https://github.com/openxla/xla/archive/fe08041b23d8baa0d00967913a1d6e8a0c348df3.tar.gz", + "https://github.com/openxla/xla/archive/54ca388f9ad9e8bbcb0ef823752d6b47a99d0b5f.tar.gz", ], ) diff --git a/setup.py b/setup.py index 31f4eaf679c..dbe47007aff 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ base_dir = os.path.dirname(os.path.abspath(__file__)) -_date = '20240425' +_date = '20240418' _libtpu_version = f'0.1.dev{_date}' _libtpu_storage_path = f'https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/wheels/libtpu-nightly/libtpu_nightly-{_libtpu_version}-py3-none-any.whl' _jax_version = f'0.4.27.dev{_date}' diff --git a/torch_xla/csrc/runtime/ifrt_computation_client.cc b/torch_xla/csrc/runtime/ifrt_computation_client.cc index c48cf1555ff..20ee9b0bfa6 100644 --- a/torch_xla/csrc/runtime/ifrt_computation_client.cc +++ b/torch_xla/csrc/runtime/ifrt_computation_client.cc @@ -96,7 +96,7 @@ std::string IfrtComputationClient::IfrtDeviceToString( xla::ifrt::Device* const device) const { std::string platform = absl::AsciiStrToUpper(device->client()->platform_name()); - int ordinal = global_ordinals_.at(device->Id().value()); + int ordinal = global_ordinals_.at(device->id()); std::string str = absl::StrFormat("%s:%d", platform, ordinal); return str; } @@ -124,12 +124,11 @@ IfrtComputationClient::IfrtComputationClient() { // a device's global ordinal separately from its device ID. Order the // devices by increasing ID to assign global ordinals. std::vector ordered_devices(client_->device_count()); - std::partial_sort_copy( - client_->devices().begin(), client_->devices().end(), - ordered_devices.begin(), ordered_devices.end(), - [](auto& a, auto& b) { return a->Id().value() < b->Id().value(); }); + std::partial_sort_copy(client_->devices().begin(), client_->devices().end(), + ordered_devices.begin(), ordered_devices.end(), + [](auto& a, auto& b) { return a->id() < b->id(); }); for (auto* device : ordered_devices) { - global_ordinals_[device->Id().value()] = global_ordinals_.size(); + global_ordinals_[device->id()] = global_ordinals_.size(); std::string device_str = IfrtDeviceToString(device); string_to_device_.emplace(device_str, device); } @@ -616,7 +615,7 @@ std::vector IfrtComputationClient::GetAllDevices() const { int IfrtComputationClient::GetNumProcesses() const { int max_process_index = client_->process_index(); for (auto* device : client_->devices()) { - max_process_index = std::max(max_process_index, device->ProcessIndex()); + max_process_index = std::max(max_process_index, device->process_index()); } return max_process_index + 1; diff --git a/torch_xla/csrc/runtime/ifrt_computation_client.h b/torch_xla/csrc/runtime/ifrt_computation_client.h index 38d0de97204..d6d914ad8da 100644 --- a/torch_xla/csrc/runtime/ifrt_computation_client.h +++ b/torch_xla/csrc/runtime/ifrt_computation_client.h @@ -134,7 +134,7 @@ class IfrtComputationClient : public ComputationClient { // global_ordinals_ tracks a map from PjRtDeviceId to the device's // dense global ordinal. std::unordered_map global_ordinals_; - std::unordered_map string_to_device_; + std::unordered_map string_to_device_; std::shared_ptr> replication_devices_; OperationManager operation_manager_; tsl::thread::ThreadPool pool_ = tsl::thread::ThreadPool( From 3f5ff0f590b861b5048cd319c6ba3a177098e28e Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Fri, 26 Apr 2024 12:26:58 -0700 Subject: [PATCH 28/53] Update CODEOWNERS for build infrastructure (#6953) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 87072a65bce..bfff4ef8422 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -/infra @will-cromar @JackCaoG @yeounoh @mateuszlewko @stgpetrovic +/infra @will-cromar @JackCaoG @lsy323 From b3be775a9775324f90b37259cecf067b359b1ce5 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Fri, 26 Apr 2024 12:56:10 -0700 Subject: [PATCH 29/53] Move `.torch_pin` and handle in ansible (#6920) --- .circleci/README.md | 19 ----------- .circleci/setup_ci_environment.sh | 2 +- .github/README.md | 19 +++++++++++ .github/workflows/_build_torch_xla.yml | 1 - .github/workflows/lintercheck.yml | 2 +- OP_LOWERING_GUIDE.md | 2 +- benchmarks/run_benchmark.sh | 2 +- docs/README.md | 6 ++-- .../ansible/roles/build_srcs/tasks/main.yaml | 15 +++++++++ scripts/apply_patches.sh | 2 +- test/benchmarks/run_tests.sh | 2 +- test/run_tests.sh | 2 +- torch_patches/README.md | 32 ------------------- 13 files changed, 44 insertions(+), 62 deletions(-) delete mode 100644 .circleci/README.md create mode 100644 .github/README.md delete mode 100644 torch_patches/README.md diff --git a/.circleci/README.md b/.circleci/README.md deleted file mode 100644 index d01e6138317..00000000000 --- a/.circleci/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CircleCI Overview -PyTorch and PyTorch/XLA use CircleCI to lint, build, and test each PR that is submitted. All CircleCI tests should succeed before the PR is merged into master. PyTorch CircleCI pins PyTorch/XLA to a specific commit. On the other hand, PyTorch/XLA CircleCI pulls PyTorch from master unless a pin is manually provided. This README will go through the reasons of these pins, how to pin a PyTorch/XLA PR to an upstream PyTorch PR, and how to coordinate a merge for breaking PyTorch changes. - -## Why does PyTorch CircleCI pin PyTorch/XLA? -As mentioned above, [PyTorch CircleCI pins PyTorch/XLA](https://github.com/pytorch/pytorch/blob/master/.jenkins/pytorch/common_utils.sh#L119) to a "known good" commit to prevent accidental changes from PyTorch/XLA to break PyTorch CircleCI without warning. PyTorch has hundreds of commits each week, and this pin ensures that PyTorch/XLA as a downstream package does not cause failures in PyTorch CircleCI. - -## Why does PyTorch/XLA CircleCI pull from PyTorch master? -[PyTorch/XLA CircleCI pulls PyTorch from master](https://github.com/pytorch/xla/blob/f3415929683880192b63b285921c72439af55bf0/.circleci/common.sh#L15) unless a PyTorch pin is manually provided. PyTorch/XLA is a downstream package to PyTorch, and pulling from master ensures that PyTorch/XLA will stay up-to-date and works with the latest PyTorch changes. - -## Pinning PyTorch PR in PyTorch/XLA PR -Sometimes a PyTorch/XLA PR needs to be pinned to a specific PyTorch PR to test new featurues, fix breaking changes, etc. Since PyTorch/XLA CircleCI pulls from PyTorch master by default, we need to manually provided a PyTorch pin. In a PyTorch/XLA PR, PyTorch an be manually pinned by creating a `.torch_pin` under `/torch_patches`. The `.torch_pin` should have the corresponding PyTorch PR number prefixed by "#". Take a look at [example here](https://github.com/pytorch/xla/pull/3792/commits/40f41fb98b0f2386d287eeac0bae86e873d4a9d8). Before the PyTorch/XLA PR gets merged, the `.torch_pin` must be deleted. - -## Coodinating merges for breaking PyTorch PRs -When PyTorch PR introduces a breaking change, its PyTorch/XLA CircleCI tests will fail. Steps for fixing and merging such breaking PyTorch change is as following: -1. Create a PyTorch/XLA PR to fix this issue with `.torch_pin` and rebase with master to ensure the PR is up-to-date with the latest commit on PyTorch/XLA. Once this PR is created, it'll create a commit hash that will be used in step 2. If you have multiple commits in the PR, use the last one's hash. **Important note: When you rebase this PR, it'll create a new commit hash and make the old hash obsolete. Be cautious about rebasing, and if you rebase, make sure you inform the PyTorch PR's author.** -2. Rebase (or ask the PR owner to rebase) the PyTorch PR with master. Update the PyTorch PR to pin the PyTorch/XLA to the commit hash created in step 1 by updating `pytorch/.github/ci_commit_pins/xla.txt`. -3. Once CircleCI tests are green on both ends, merge PyTorch PR. -4. Remove the `.torch_pin` in PyTorch/XLA PR and merge. To be noted, `git commit --amend` should be avoided in this step as PyTorch CI will keep using the commit hash created in step 1 until other PRs update that manually or the nightly buildbot updates that automatically. -5. Finally, don't delete your branch until 2 days later. See step 4 for explanations. diff --git a/.circleci/setup_ci_environment.sh b/.circleci/setup_ci_environment.sh index eba2c373b8a..87a61524e7e 100755 --- a/.circleci/setup_ci_environment.sh +++ b/.circleci/setup_ci_environment.sh @@ -58,7 +58,7 @@ sudo apt-get -y remove linux-image-generic linux-headers-generic linux-generic d # How to figure out what the correct versions of these packages are? # My preferred method is to start a Docker instance of the correct # Ubuntu version (e.g., docker run -it ubuntu:16.04) and then ask -# apt what the packages you need are. Note that the CircleCI image +# apt what the packages you need are. Note that the CI image # comes with Docker. # # Using 'retry' here as belt-and-suspenders even though we are diff --git a/.github/README.md b/.github/README.md new file mode 100644 index 00000000000..c2f4d37426c --- /dev/null +++ b/.github/README.md @@ -0,0 +1,19 @@ +# CI Overview +PyTorch and PyTorch/XLA use CI to lint, build, and test each PR that is submitted. All CI tests should succeed before the PR is merged into master. PyTorch CI pins PyTorch/XLA to a specific commit. On the other hand, PyTorch/XLA CI pulls PyTorch from master unless a pin is manually provided. This README will go through the reasons of these pins, how to pin a PyTorch/XLA PR to an upstream PyTorch PR, and how to coordinate a merge for breaking PyTorch changes. + +## Why does PyTorch CI pin PyTorch/XLA? +As mentioned above, [PyTorch CI pins PyTorch/XLA](https://github.com/pytorch/pytorch/blob/master/.jenkins/pytorch/common_utils.sh#L119) to a "known good" commit to prevent accidental changes from PyTorch/XLA to break PyTorch CI without warning. PyTorch has hundreds of commits each week, and this pin ensures that PyTorch/XLA as a downstream package does not cause failures in PyTorch CI. + +## Why does PyTorch/XLA CI pull from PyTorch master? +[PyTorch/XLA CI pulls PyTorch from master](https://github.com/pytorch/xla/blob/f3415929683880192b63b285921c72439af55bf0/.circleci/common.sh#L15) unless a PyTorch pin is manually provided. PyTorch/XLA is a downstream package to PyTorch, and pulling from master ensures that PyTorch/XLA will stay up-to-date and works with the latest PyTorch changes. + +## Pinning PyTorch PR in PyTorch/XLA PR +Sometimes a PyTorch/XLA PR needs to be pinned to a specific PyTorch PR to test new featurues, fix breaking changes, etc. Since PyTorch/XLA CI pulls from PyTorch master by default, we need to manually provided a PyTorch pin. In a PyTorch/XLA PR, PyTorch an be manually pinned by creating a `.torch_pin` file at the root of the repository. The `.torch_pin` should have the corresponding PyTorch PR number prefixed by "#". Take a look at [example here](https://github.com/pytorch/xla/pull/3792/commits/40f41fb98b0f2386d287eeac0bae86e873d4a9d8). Before the PyTorch/XLA PR gets merged, the `.torch_pin` must be deleted. + +## Coodinating merges for breaking PyTorch PRs +When PyTorch PR introduces a breaking change, its PyTorch/XLA CI tests will fail. Steps for fixing and merging such breaking PyTorch change is as following: +1. Create a PyTorch/XLA PR to fix this issue with `.torch_pin` and rebase with master to ensure the PR is up-to-date with the latest commit on PyTorch/XLA. Once this PR is created, it'll create a commit hash that will be used in step 2. If you have multiple commits in the PR, use the last one's hash. **Important note: When you rebase this PR, it'll create a new commit hash and make the old hash obsolete. Be cautious about rebasing, and if you rebase, make sure you inform the PyTorch PR's author.** +2. Rebase (or ask the PR owner to rebase) the PyTorch PR with master. Update the PyTorch PR to pin the PyTorch/XLA to the commit hash created in step 1 by updating `pytorch/.github/ci_commit_pins/xla.txt`. +3. Once CI tests are green on both ends, merge PyTorch PR. +4. Remove the `.torch_pin` in PyTorch/XLA PR and merge. To be noted, `git commit --amend` should be avoided in this step as PyTorch CI will keep using the commit hash created in step 1 until other PRs update that manually or the nightly buildbot updates that automatically. +5. Finally, don't delete your branch until 2 days later. See step 4 for explanations. diff --git a/.github/workflows/_build_torch_xla.yml b/.github/workflows/_build_torch_xla.yml index 3e85b7c4c98..c3200b76ef1 100644 --- a/.github/workflows/_build_torch_xla.yml +++ b/.github/workflows/_build_torch_xla.yml @@ -38,7 +38,6 @@ jobs: repository: pytorch/pytorch path: pytorch submodules: recursive - # TODO: correct pin - name: Checkout PyTorch/XLA Repo uses: actions/checkout@v4 with: diff --git a/.github/workflows/lintercheck.yml b/.github/workflows/lintercheck.yml index 6598b98da32..b17c608f883 100644 --- a/.github/workflows/lintercheck.yml +++ b/.github/workflows/lintercheck.yml @@ -24,7 +24,7 @@ jobs: if: github.event_name == 'push' && github.event.ref == 'refs/heads/master' shell: bash run: | - TORCH_PIN=./torch_patches/.torch_pin + TORCH_PIN=./.torch_pin if [[ -f "${TORCH_PIN}" ]]; then echo "Please remove ${TORCH_PIN} before landing." exit 1 diff --git a/OP_LOWERING_GUIDE.md b/OP_LOWERING_GUIDE.md index b445a1d8998..535d7cf596c 100644 --- a/OP_LOWERING_GUIDE.md +++ b/OP_LOWERING_GUIDE.md @@ -25,7 +25,7 @@ All file mentioned below lives under the `xla/torch_xla/csrc` folder, with the e 7. `ops/` directory contains all `ir::ops` declaration and definition. Smaller nodes can be put in `ops/ops.h/.cpp`. More complicated nodes can be put into a separate file. All ops inherit from `ir::ops::Node` and provide a way to lower input `ir::Value` to a sequence of `XlaOp`. ## Unit Test -Our CircleCI runs PyTorch native python tests for every change and every day. Those tests will use XLA implementation if we provide a lowering. We usually don’t need to add additional python tests for PyTorch/XLA unless we want to verify some xla behaviors(like dynamic shape) or we skipped the pytorch native test for some reason. The python test should be added to `xla/test/test_operations.py` if it is required. We also need to add CPP tests in `xla/test/cpp/test_aten_xla_tensor.cpp`. This test should call PyTorch c++ API and verify our implementation yields the same result as PyTorch native implementation. We also need to verify if the xla implementation is called when the tensor is a XLA tensor by checking the `aten::op` and `xla::op` counters. +Our CI runs PyTorch native python tests for every change and every day. Those tests will use XLA implementation if we provide a lowering. We usually don’t need to add additional python tests for PyTorch/XLA unless we want to verify some xla behaviors(like dynamic shape) or we skipped the pytorch native test for some reason. The python test should be added to `xla/test/test_operations.py` if it is required. We also need to add CPP tests in `xla/test/cpp/test_aten_xla_tensor.cpp`. This test should call PyTorch c++ API and verify our implementation yields the same result as PyTorch native implementation. We also need to verify if the xla implementation is called when the tensor is a XLA tensor by checking the `aten::op` and `xla::op` counters. ## Tips The process of lowering is breaking down the PyTorch operations into a sequence of XlaOp. To provide a good lowering of the PyTorch operation, one needs to have a good grasp of what XLA is capable of. Reading the XlaOp document and looking into how similar ops is lowered is the best way to achieve that. You can find a minimal Op lowering example in [this pr](https://github.com/pytorch/xla/pull/2969). You can also find a slightly more complicated example with backward lowering in [this pr](https://github.com/pytorch/xla/pull/2972). diff --git a/benchmarks/run_benchmark.sh b/benchmarks/run_benchmark.sh index fd8a055bccc..e4e483947d9 100644 --- a/benchmarks/run_benchmark.sh +++ b/benchmarks/run_benchmark.sh @@ -5,7 +5,7 @@ LOGFILE=/tmp/benchmark_test.log # Note [Keep Going] # -# Set the `CONTINUE_ON_ERROR` flag to `true` to make the CircleCI tests continue on error. +# Set the `CONTINUE_ON_ERROR` flag to `true` to make the CI tests continue on error. # This will allow you to see all the failures on your PR, not stopping with the first # test failure like the default behavior. CONTINUE_ON_ERROR="${CONTINUE_ON_ERROR:-0}" diff --git a/docs/README.md b/docs/README.md index 33a0ce5bc36..a405597c798 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,12 +1,12 @@ ## Publish documentation for a new release. -CircleCI job `pytorch_xla_linux_debian11_and_push_doc` is specified to run on `release/*` branches, but it was not +CI job `pytorch_xla_linux_debian11_and_push_doc` is specified to run on `release/*` branches, but it was not run on release branches due to "Only build pull requests" setting. Turning off "Only build pull requests" will result in much larger volumes in jobs which is often unnecessary. We're waiting for [this feature request](https://ideas.circleci.com/ideas/CCI-I-215) to be implemented so that we could override this setting on some branches. Before the feature is available on CircleCi side, we'll use a manual process to publish documentation for release. -[Documentation for master branch](http://pytorch.org/xla/master/) is still updated automatically by the CircleCI job. +[Documentation for master branch](http://pytorch.org/xla/master/) is still updated automatically by the CI job. But we'll need to manually commit the new versioned doc and point http://pytorch.org/xla to the documentation of new stable release. @@ -22,4 +22,4 @@ cd /tmp/xla git add . git commit -m "Publish 1.5 documentation." git push origin gh-pages -``` \ No newline at end of file +``` diff --git a/infra/ansible/roles/build_srcs/tasks/main.yaml b/infra/ansible/roles/build_srcs/tasks/main.yaml index d945f150d38..87adde1ed21 100644 --- a/infra/ansible/roles/build_srcs/tasks/main.yaml +++ b/infra/ansible/roles/build_srcs/tasks/main.yaml @@ -1,3 +1,18 @@ +- name: Read PyTorch pin + ansible.builtin.command: cat {{ (src_root, 'pytorch/xla/.torch_pin') | path_join }} + register: torch_pin + # Pin may not exist + ignore_errors: true + +- name: Checkout PyTorch pin + # ansible.builtin.git wants to fetch the entire history, so check out the pin manually + ansible.builtin.shell: + cmd: | + git fetch origin {{ torch_pin.stdout }} + git checkout --recurse-submodules {{ torch_pin.stdout }} + chdir: "{{ (src_root, 'pytorch') | path_join }}" + when: torch_pin is succeeded + - name: Build PyTorch ansible.builtin.command: cmd: python setup.py bdist_wheel diff --git a/scripts/apply_patches.sh b/scripts/apply_patches.sh index 923b68c79d4..7ba0a3ef8e3 100755 --- a/scripts/apply_patches.sh +++ b/scripts/apply_patches.sh @@ -7,7 +7,7 @@ XDIR=$CDIR/.. PTDIR=$XDIR/.. OPENXLADIR=$XDIR/third_party/xla -TORCH_PIN="$XDIR/torch_patches/.torch_pin" +TORCH_PIN="$XDIR/.torch_pin" if [ -f "$TORCH_PIN" ]; then CID=$(cat "$TORCH_PIN") # If starts with # and it's not merged into master, fetch from origin diff --git a/test/benchmarks/run_tests.sh b/test/benchmarks/run_tests.sh index 3832b21ed22..fce6140a4fe 100755 --- a/test/benchmarks/run_tests.sh +++ b/test/benchmarks/run_tests.sh @@ -9,7 +9,7 @@ export PYTHONPATH=$PYTHONPATH:$CDIR/../../benchmarks/ # Note [Keep Going] # -# Set the `CONTINUE_ON_ERROR` flag to `true` to make the CircleCI tests continue on error. +# Set the `CONTINUE_ON_ERROR` flag to `true` to make the CI tests continue on error. # This will allow you to see all the failures on your PR, not stopping with the first # test failure like the default behavior. CONTINUE_ON_ERROR="${CONTINUE_ON_ERROR:-0}" diff --git a/test/run_tests.sh b/test/run_tests.sh index e263b64daa7..1c5095baa5a 100755 --- a/test/run_tests.sh +++ b/test/run_tests.sh @@ -8,7 +8,7 @@ VERBOSITY=2 # Note [Keep Going] # -# Set the `CONTINUE_ON_ERROR` flag to `true` to make the CircleCI tests continue on error. +# Set the `CONTINUE_ON_ERROR` flag to `true` to make the CI tests continue on error. # This will allow you to see all the failures on your PR, not stopping with the first # test failure like the default behavior. CONTINUE_ON_ERROR="${CONTINUE_ON_ERROR:-0}" diff --git a/torch_patches/README.md b/torch_patches/README.md deleted file mode 100644 index f6476f64ca5..00000000000 --- a/torch_patches/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# Guidelines For Patch File Names - -Files with extension '.diff' are consider as git patches by apply script. - -A file for PyTorch PR _N_ needs to be named 'N.diff'. - -Patch files which are not related to PyTorch PRs, should begin with an 'X' character, -followed by a two digit number, followed by a dash ('-'), a name, and '.diff'. -Example: - -``` -X10-optimizer.diff -``` - -Patch file are alphabetically ordered, so PyTorch PR patches are always applied -before the non PyTorch ones. - - -There's a special file `torch_patches/.torch_pin`, which is used to coordinate landing PRs in -`pytorch/pytorch` and `pytorch/xla`. - -To test a `pytorch/xla` PR against a `pytorch/pytorch` PR or branch, -put the PR number or branch name in this file. -Example: - -``` -#32451 -# or -my_awesome_branch # (must live in `pytorch/pytorch`) -``` - -In the case where the pytorch/pytorch PR also depends on the pytorch/xla PR, you will also need to update the https://github.com/pytorch/pytorch/blob/main/.github/ci_commit_pins/xla.txt to match the latest hash of your pytorch/xla PR. To be noted, the hash from a PR produced by a fork won't work in this case. Then you need to find someone from the pytorch/xla team to produe a branch PR for you. From b9a9449f205d10769660c428cc68755a6ffe183a Mon Sep 17 00:00:00 2001 From: JackCaoG <59073027+JackCaoG@users.noreply.github.com> Date: Fri, 26 Apr 2024 13:00:30 -0700 Subject: [PATCH 30/53] Update dynamo test to be less constrain (#6981) --- test/dynamo/test_dynamo.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/dynamo/test_dynamo.py b/test/dynamo/test_dynamo.py index 2a7e99dd4d3..946ae914b04 100644 --- a/test/dynamo/test_dynamo.py +++ b/test/dynamo/test_dynamo.py @@ -641,10 +641,7 @@ def test_all_cpu_tensor(self): # there should be 18 paramters + 1 input self.assertGreater(len(w), 15) self.assertIn('Found tensor with shape torch.Size', str(w[0].message)) - # no XLA operation should happens except a empty mark_step. Partitioner should offload all CPU - # ops to CPU. - self.assertEqual(len(met.counter_names()), 1) - self.assertIn('MarkStep', met.counter_names()) + self.assertLessEqual(len(met.counter_names()), 1) class DynamoOperationsTests(test_utils.XlaTestCase): From b834e49907aab60e096620664ca8ba328ef92141 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Fri, 26 Apr 2024 15:12:10 -0700 Subject: [PATCH 31/53] Build CPP tests in new CI workflow (#6947) --- .github/scripts/run_tests.sh | 108 +++++++++++++ .github/workflows/_build_torch_xla.yml | 9 +- .../workflows/{_test_python.yml => _test.yml} | 24 ++- .github/workflows/_test_cpp.yml | 150 ------------------ .github/workflows/build_and_test.yml | 33 +--- BUILD | 20 +++ build_util.py | 4 - infra/ansible/config/env.yaml | 2 +- infra/ansible/config/vars.yaml | 2 + .../ansible/roles/build_srcs/tasks/main.yaml | 16 ++ setup.py | 4 + torch_xla/csrc/runtime/pjrt_registry.cc | 18 ++- 12 files changed, 200 insertions(+), 190 deletions(-) create mode 100755 .github/scripts/run_tests.sh rename .github/workflows/{_test_python.yml => _test.yml} (90%) delete mode 100644 .github/workflows/_test_cpp.yml diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh new file mode 100755 index 00000000000..ae59a51490d --- /dev/null +++ b/.github/scripts/run_tests.sh @@ -0,0 +1,108 @@ +set -ex + +function run_torch_xla_python_tests() { + PYTORCH_DIR=$1 + XLA_DIR=$2 + USE_COVERAGE="${3:-0}" + + pushd $XLA_DIR + echo "Running Python Tests" + if [ "$USE_COVERAGE" != "0" ]; then + pip install coverage==6.5.0 --upgrade + pip install coverage-lcov + pip install toml + ./test/run_tests.sh + coverage combine + mkdir lcov && cp .coverage lcov/ + coverage-lcov --data_file_path lcov/.coverage + coverage html + cp lcov.info htmlcov/ + mv htmlcov ~/ + chmod -R 755 ~/htmlcov + else + ./test/run_tests.sh + fi + popd +} + +function run_torch_xla_cpp_tests() { + PYTORCH_DIR=$1 + XLA_DIR=$2 + USE_COVERAGE="${3:-0}" + + TORCH_DIR=$(python -c "import pkgutil; import os; print(os.path.dirname(pkgutil.get_loader('torch').get_filename()))") + export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${TORCH_DIR}/lib + if [ -x "$(command -v nvidia-smi)" ]; then + CUDA_PLUGIN_DIR=$(python -c "import pkgutil; import os; print(os.path.dirname(pkgutil.get_loader('torch_xla_cuda_plugin').get_filename()))") + export PJRT_LIBRARY_PATH=$CUDA_PLUGIN_DIR/lib/pjrt_c_api_gpu_plugin.so + export PJRT_DEVICE=LIBRARY + export PJRT_DYNAMIC_PLUGINS=1 + else + export PJRT_DEVICE=CPU + fi + export XLA_EXPERIMENTAL="nonzero:masked_select:nms" + + test_names1=("test_aten_xla_tensor_1" + "test_aten_xla_tensor_2" + "test_aten_xla_tensor_3" + "test_aten_xla_tensor_4" + "pjrt_computation_client_test" + "ifrt_computation_client_test") + test_names2=("test_aten_xla_tensor_5" + "test_aten_xla_tensor_6" + "test_ir" + "test_lazy" + "test_replication" + "test_tensor" + # disable test_xla_backend_intf since it is flaky on upstream + #"test_xla_backend_intf" + "test_xla_sharding") + if [[ "$RUN_CPP_TESTS1" == "cpp_tests1" ]]; then + test_names=("${test_names1[@]}") + elif [[ "$RUN_CPP_TESTS2" == "cpp_tests2" ]]; then + test_names=("${test_names2[@]}") + else + test_names=("${test_names1[@]}" "${test_names2[@]}") + fi + + for name in "${test_names[@]}"; do + echo "Running $name cpp test..." + /tmp/test/bin/${name} + done +} + +function run_torch_xla_benchmark_tests() { + XLA_DIR=$1 + pushd $XLA_DIR + echo "Running Benchmark Tests" + test/benchmarks/run_tests.sh -L"" +} + +PYTORCH_DIR=$1 +XLA_DIR=$2 +USE_COVERAGE="${3:-0}" +RUN_CPP="${RUN_CPP_TESTS:0}" +RUN_PYTHON="${RUN_PYTHON_TESTS:0}" + +if [ -x "$(command -v nvidia-smi)" ]; then + num_devices=$(nvidia-smi --list-gpus | wc -l) + echo "Found $num_devices GPU devices..." + export GPU_NUM_DEVICES=$num_devices +fi +export PYTORCH_TESTING_DEVICE_ONLY_FOR="xla" +export CXX_ABI=$(python -c "import torch;print(int(torch._C._GLIBCXX_USE_CXX11_ABI))") + +if [[ -z "$RUN_BENCHMARK_TESTS" && -z "$RUN_CPP_TESTS1" && -z "$RUN_CPP_TESTS2" && -z "$RUN_PYTHON_TESTS" ]]; then + run_torch_xla_python_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE + run_torch_xla_cpp_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE + run_torch_xla_benchmark_tests $XLA_DIR +else + # run tests separately. + if [[ "$RUN_PYTHON_TESTS" == "python_tests" ]]; then + run_torch_xla_python_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE + elif [[ "$RUN_BENCHMARK_TESTS" == "benchmark_tests" ]]; then + run_torch_xla_benchmark_tests $XLA_DIR + else + run_torch_xla_cpp_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE + fi +fi diff --git a/.github/workflows/_build_torch_xla.yml b/.github/workflows/_build_torch_xla.yml index c3200b76ef1..7614242fd7a 100644 --- a/.github/workflows/_build_torch_xla.yml +++ b/.github/workflows/_build_torch_xla.yml @@ -26,7 +26,7 @@ jobs: GOOGLE_APPLICATION_CREDENTIALS: /tmp/default_credentials.json BAZEL_JOBS: 16 BAZEL_REMOTE_CACHE: 1 - # BUILD_CPP_TESTS: 1 + BUILD_CPP_TESTS: 1 steps: - name: Setup gcloud shell: bash @@ -46,9 +46,14 @@ jobs: shell: bash run: | cd pytorch/xla/infra/ansible - ansible-playbook playbook.yaml -vvv -e "stage=build arch=amd64 accelerator=tpu src_root=${GITHUB_WORKSPACE} bundle_libtpu=0 cache_suffix=-ci" --skip-tags=fetch_srcs,install_deps + ansible-playbook playbook.yaml -vvv -e "stage=build arch=amd64 accelerator=tpu src_root=${GITHUB_WORKSPACE} bundle_libtpu=0 build_cpp_tests=1 cache_suffix=-ci" --skip-tags=fetch_srcs,install_deps - name: Upload wheel uses: actions/upload-artifact@v4 with: name: torch-xla-wheels path: /dist/*.whl + - name: Upload CPP test binaries + uses: actions/upload-artifact@v4 + with: + name: cpp-test-bin + path: /tmp/test/bin diff --git a/.github/workflows/_test_python.yml b/.github/workflows/_test.yml similarity index 90% rename from .github/workflows/_test_python.yml rename to .github/workflows/_test.yml index 960b326450b..ffb73a156fa 100644 --- a/.github/workflows/_test_python.yml +++ b/.github/workflows/_test.yml @@ -53,6 +53,10 @@ jobs: run_xla_op_tests3: 'xla_op3' - run_python_tests: 'python_tests' run_torch_mp_op_tests: 'torch_mp_op' + - run_cpp_tests: 'cpp_tests' + run_cpp_tests1: 'cpp_tests1' + - run_cpp_tests: 'cpp_tests' + run_cpp_tests2: 'cpp_tests2' timeout-minutes: ${{ inputs.timeout-minutes }} env: GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }} @@ -64,6 +68,8 @@ jobs: RUN_XLA_OP_TESTS2: ${{ matrix.run_xla_op_tests2 }} RUN_XLA_OP_TESTS3: ${{ matrix.run_xla_op_tests3 }} RUN_TORCH_MP_OP_TESTS: ${{ matrix.run_torch_mp_op_tests }} + RUN_CPP_TESTS1: ${{ matrix.run_cpp_tests1 }} + RUN_CPP_TESTS2: ${{ matrix.run_cpp_tests2 }} BAZEL_JOBS: 16 BAZEL_REMOTE_CACHE: 1 steps: @@ -76,6 +82,19 @@ jobs: with: name: torch-xla-wheels path: /tmp/wheels/ + - name: Fetch CPP test binaries + uses: actions/download-artifact@v4 + with: + name: cpp-test-bin + path: /tmp/test/bin + if: ${{ matrix.run_cpp_tests }} + # GitHub Actions doesn't preserve executable permissions + # https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss + - name: Set CPP test permissions + run: | + chmod +x /tmp/test/bin/* + ls -l /tmp/test/bin + if: ${{ matrix.run_cpp_tests }} - name: Fetch CUDA plugin uses: actions/download-artifact@v4 with: @@ -134,10 +153,7 @@ jobs: fi - name: Test shell: bash - run: | - source pytorch/xla/.circleci/common.sh - - run_torch_xla_tests pytorch/ pytorch/xla/ $USE_COVERAGE + run: pytorch/xla/.github/scripts/run_tests.sh pytorch/ pytorch/xla/ $USE_COVERAGE - name: Upload coverage results if: ${{ inputs.collect-coverage }} shell: bash diff --git a/.github/workflows/_test_cpp.yml b/.github/workflows/_test_cpp.yml deleted file mode 100644 index d0056d34963..00000000000 --- a/.github/workflows/_test_cpp.yml +++ /dev/null @@ -1,150 +0,0 @@ -name: xla-test -on: - workflow_call: - inputs: - docker-image: - required: true - type: string - description: Image to test on - runner: - required: false - type: string - description: Runner type for the test - default: linux.12xlarge - collect-coverage: - required: false - type: boolean - description: Set to true to collect coverage information - default: false - timeout-minutes: - required: false - type: number - default: 270 - description: | - Set the maximum (in minutes) how long the workflow should take to finish - disable-pjrt: - required: false - type: string - default: 0 - description: Whether to disable PJRT tests - test-script: - required: false - type: string - default: test.sh - description: Which test script to run - - secrets: - gcloud-service-key: - required: true - description: Secret to access Bazel build cache -jobs: - test: - runs-on: ${{ inputs.runner }} - strategy: - fail-fast: false - matrix: - include: - # Use readable strings as they define the workflow titles. - - run_cpp_tests1: 'cpp_tests1' - - run_cpp_tests2: 'cpp_tests2' - timeout-minutes: ${{ inputs.timeout-minutes }} - env: - DOCKER_IMAGE: ${{ inputs.docker-image }} - WORKDIR: /var/lib/jenkins/workspace - GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }} - USE_COVERAGE: ${{ inputs.collect-coverage && '1' || '0' }} - XLA_SKIP_TORCH_OP_TESTS: ${{ inputs.disable-pjrt }} - XLA_SKIP_MP_OP_TESTS: ${{ inputs.disable-pjrt }} - RUN_CPP_TESTS1: ${{ matrix.run_cpp_tests1 }} - RUN_CPP_TESTS2: ${{ matrix.run_cpp_tests2 }} - steps: - - name: Setup Linux - uses: pytorch/test-infra/.github/actions/setup-linux@main - - name: Setup SSH (Click me for login details) - uses: pytorch/test-infra/.github/actions/setup-ssh@main - with: - github-secret: ${{ secrets.GITHUB_TOKEN }} - instructions: | - Tests are done inside the container, to start an interactive session run: - docker exec -it $(docker container ps --format '{{.ID}}') bash - - name: Install gcloud CLI - if: ${{ inputs.collect-coverage }} - shell: bash - run: | - sudo tee -a /etc/yum.repos.d/google-cloud-sdk.repo << EOM - [google-cloud-cli] - name=Google Cloud CLI - baseurl=https://packages.cloud.google.com/yum/repos/cloud-sdk-el8-x86_64 - enabled=1 - gpgcheck=1 - repo_gpgcheck=0 - gpgkey=https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg - EOM - sudo yum install -y google-cloud-cli - - name: Auth to GCR - if: ${{ inputs.collect-coverage }} - shell: bash - run: | - echo "${GCLOUD_SERVICE_KEY}" | gcloud auth activate-service-account --key-file=- - - name: Download and run docker image from GCR - shell: bash - run: | - echo "DOCKER_IMAGE: ${DOCKER_IMAGE}" - docker pull "${DOCKER_IMAGE}" - pid=$(docker run --shm-size=16g ${GPU_FLAG:-} -e USE_COVERAGE -e XLA_SKIP_TORCH_OP_TESTS -e XLA_SKIP_MP_OP_TESTS -e RUN_BENCHMARK_TESTS -e RUN_CPP_TESTS1 -e RUN_CPP_TESTS2 -e RUN_PYTHON_TESTS -e RUN_XLA_OP_TESTS1 -e RUN_XLA_OP_TESTS2 -e RUN_XLA_OP_TESTS3 -e RUN_TORCH_MP_OP_TESTS -t -d -w "$WORKDIR" "${DOCKER_IMAGE}") - echo "${GCLOUD_SERVICE_KEY}" | docker exec -i "${pid}" sh -c "cat >> /tmp/pytorch/xla/default_credentials.json" - echo "pid=${pid}" >> "${GITHUB_ENV}" - - name: Test - shell: bash - run: | - docker exec --privileged -u jenkins "${pid}" bash -c '.circleci/${{ inputs.test-script }}' - - name: Upload coverage results - if: ${{ inputs.collect-coverage }} - shell: bash - env: - CIRCLE_WORKFLOW_ID: ${{ github.run_id }} - CIRCLE_BUILD_NUM: ${{ github.run_number }} - BENCHMARK_TEST_NAME: ${{ env.RUN_BENCHMARK_TESTS }} - PYTHON_TEST_NAME: ${{ env.RUN_PYTHON_TESTS }}${{ env.RUN_XLA_OP_TESTS1 }}${{ env.RUN_XLA_OP_TESTS2 }}${{ env.RUN_XLA_OP_TESTS3 }}${{ env.RUN_TORCH_MP_OP_TESTS }} - CPP_TEST_NAME: ${{ env.RUN_CPP_TESTS1 }}${{ env.RUN_CPP_TESTS2 }} - run: | - # TODO(yeounoh) collect coverage report as needed. - if [ -n "${BENCHMARK_TEST_NAME}" ]; then - exit 0 - fi - docker cp "${pid}":/home/jenkins/htmlcov "${GITHUB_WORKSPACE}" - if [ -n "${GPU_FLAG:-}" ]; then - if [ -n "${PYTHON_TEST_NAME}" ]; then - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_python_coverage_${PYTHON_TEST_NAME}.out - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_python_coverage_${PYTHON_TEST_NAME}.out - fi - if [ -n "${CPP_TEST_NAME}" ]; then - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_cpp_coverage_${CPP_TEST_NAME}.out - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_cpp_coverage_${CPP_TEST_NAME}.out - fi - else - if [ -n "${PYTHON_TEST_NAME}" ]; then - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_python_coverage_${PYTHON_TEST_NAME}.out - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_python_coverage_${PYTHON_TEST_NAME}.out - fi - - if [ -n "${CPP_TEST_NAME}" ]; then - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_cpp_coverage_${CPP_TEST_NAME}.out - gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_cpp_coverage_${CPP_TEST_NAME}.out - fi - - if [ "${CPP_TEST_NAME}" == "cpp_tests1" ]; then - ABS_METADATA='{"host": "github", "project": "pytorchxla", "trace_type": "LCOV", "commit_id": '\"${GITHUB_SHA}\"', "ref": "HEAD", "source": "https://github.com/pytorch/xla", "owner": "cloud-tpu-pt-dev", "bug_component": "587012"}' - echo $ABS_METADATA > abs_metadata.json - gsutil cp abs_metadata.json gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/metadata.json - - INC_METADATA='{"host": "github", "project": "pytorchxla", "trace_type": "LCOV", "patchset_num": 1, "change_id": '${CIRCLE_BUILD_NUM}', "owner": "cloud-tpu-pt-dev", "bug_component": "587012"}' - echo $INC_METADATA > inc_metadata.json - gsutil cp inc_metadata.json gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/metadata.json - fi - fi - - - name: Teardown Linux - uses: pytorch/test-infra/.github/actions/teardown-linux@main - if: always() - diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e5738b5a6af..ce90448738a 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -30,29 +30,6 @@ jobs: secrets: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - test-cpp-cpu: - name: "CPU C++ tests" - uses: ./.github/workflows/_test_cpp.yml - needs: build - with: - docker-image: ${{ needs.build.outputs.docker-image }} - timeout-minutes: 120 - collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics - secrets: - gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - - test-cpp-cuda: - name: "GPU C++ tests" - uses: ./.github/workflows/_test_cpp.yml - needs: build - with: - docker-image: ${{ needs.build.outputs.docker-image }} - runner: linux.8xlarge.nvidia.gpu - timeout-minutes: 300 - collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics - secrets: - gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - push-docs: name: "Build & publish docs" if: github.event_name == 'push' && (github.event.ref == 'refs/heads/master' || startsWith(github.event.ref, 'refs/tags/r')) @@ -81,8 +58,8 @@ jobs: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} test-python-cpu: - name: "CPU Python tests" - uses: ./.github/workflows/_test_python.yml + name: "CPU tests" + uses: ./.github/workflows/_test.yml needs: build-torch-xla with: dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_tpuvm @@ -91,9 +68,9 @@ jobs: secrets: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - test-python-cuda: - name: "GPU Python tests" - uses: ./.github/workflows/_test_python.yml + test-cuda: + name: "GPU tests" + uses: ./.github/workflows/_test.yml needs: [build-torch-xla, build-cuda-plugin] with: dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1 diff --git a/BUILD b/BUILD index 6949f6dc748..60b601240fc 100644 --- a/BUILD +++ b/BUILD @@ -30,3 +30,23 @@ cc_binary( "@xla//xla/stream_executor:cuda_platform", ]), ) + +test_suite( + name = "cpp_tests", + # testonly = True, + tests = [ + "//test/cpp:test_aten_xla_tensor_1", + "//test/cpp:test_aten_xla_tensor_2", + "//test/cpp:test_aten_xla_tensor_3", + "//test/cpp:test_aten_xla_tensor_4", + "//test/cpp:test_aten_xla_tensor_5", + "//test/cpp:test_aten_xla_tensor_6", + "//test/cpp:test_ir", + "//test/cpp:test_lazy", + "//test/cpp:test_replication", + "//test/cpp:test_tensor", + "//test/cpp:test_xla_sharding", + "//torch_xla/csrc/runtime:pjrt_computation_client_test", + "//torch_xla/csrc/runtime:ifrt_computation_client_test", + ], +) diff --git a/build_util.py b/build_util.py index 78e4bd5e453..487f5116323 100644 --- a/build_util.py +++ b/build_util.py @@ -36,10 +36,6 @@ def bazel_options_from_env() -> Iterable[str]: bazel_flags.append('--remote_default_exec_properties=cache-silo-key=%s' % cache_silo_name) - if check_env_flag('BUILD_CPP_TESTS', default='0'): - bazel_flags.append('//test/cpp:all') - bazel_flags.append('//torch_xla/csrc/runtime:all') - bazel_jobs = os.getenv('BAZEL_JOBS', default='') if bazel_jobs: bazel_flags.append('--jobs=%s' % bazel_jobs) diff --git a/infra/ansible/config/env.yaml b/infra/ansible/config/env.yaml index d324729ce11..ea785519bae 100644 --- a/infra/ansible/config/env.yaml +++ b/infra/ansible/config/env.yaml @@ -22,7 +22,7 @@ build_env: common: LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" # Set explicitly to 0 as setup.py defaults this flag to true if unset. - BUILD_CPP_TESTS: 0 + BUILD_CPP_TESTS: "{{ build_cpp_tests }}" # Force GCC because clang/bazel has issues. CC: gcc-10 CXX: g++-10 diff --git a/infra/ansible/config/vars.yaml b/infra/ansible/config/vars.yaml index c1ca7a93d27..1ab00087b60 100644 --- a/infra/ansible/config/vars.yaml +++ b/infra/ansible/config/vars.yaml @@ -14,3 +14,5 @@ nightly_release: false bundle_libtpu: 1 # Suffix for bazel remote cache key cache_suffix: "" +# Whether to build C++ tests with `torch_xla` wheel +build_cpp_tests: 0 diff --git a/infra/ansible/roles/build_srcs/tasks/main.yaml b/infra/ansible/roles/build_srcs/tasks/main.yaml index 87adde1ed21..d69e9012718 100644 --- a/infra/ansible/roles/build_srcs/tasks/main.yaml +++ b/infra/ansible/roles/build_srcs/tasks/main.yaml @@ -92,6 +92,22 @@ state: absent mode: '0755' +- name: Create temp directory for C++ tests + ansible.builtin.file: + path: /tmp/test/bin + state: directory + mode: '0755' + when: build_cpp_tests + +- name: Collect C++ test files + ansible.builtin.shell: | + cd pytorch/xla/build/temp* + bazel query 'kind(".*_test", tests(//:cpp_tests))' --output=label | xargs -n 1 bazel cquery --output=files | xargs cp -t /tmp/test/bin + args: + executable: bash + chdir: "{{ src_root }}" + when: build_cpp_tests + - name: Read Torchvision pin ansible.builtin.command: cat {{ (src_root, 'pytorch') | path_join }}/.github/ci_commit_pins/vision.txt register: torchvision_pin diff --git a/setup.py b/setup.py index dbe47007aff..a1db046e679 100644 --- a/setup.py +++ b/setup.py @@ -223,6 +223,10 @@ def bazel_build(self, ext): f"--symlink_prefix={os.path.join(self.build_temp, 'bazel-')}" ] + build_cpp_tests = build_util.check_env_flag('BUILD_CPP_TESTS', default='0') + if build_cpp_tests: + bazel_argv.append('//:cpp_tests') + import torch cxx_abi = os.getenv('CXX_ABI') or getattr(torch._C, '_GLIBCXX_USE_CXX11_ABI', None) diff --git a/torch_xla/csrc/runtime/pjrt_registry.cc b/torch_xla/csrc/runtime/pjrt_registry.cc index 99e23f4b555..52b06d89cb4 100644 --- a/torch_xla/csrc/runtime/pjrt_registry.cc +++ b/torch_xla/csrc/runtime/pjrt_registry.cc @@ -21,8 +21,24 @@ namespace runtime { namespace { +// Placeholder plugin for testing only. Does not implement multiprocessing or +// configuration. Very likely will not work from Python code. +class LibraryPlugin : public PjRtPlugin { + public: + std::string library_path() const override { + return sys_util::GetEnvString("PJRT_LIBRARY_PATH", ""); + } + + const std::unordered_map + client_create_options() const override { + return {}; + } + + bool requires_xla_coordinator() const override { return false; } +}; + std::unordered_map> - pjrt_plugins_; + pjrt_plugins_ = {{"LIBRARY", std::make_shared()}}; xla::GpuAllocatorConfig GetGpuAllocatorConfig() { auto allocator_config = xla::GpuAllocatorConfig{}; From 174f4077b8bad4cdaec4d537b89167b9ad77ec56 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Fri, 26 Apr 2024 16:16:55 -0700 Subject: [PATCH 32/53] Run TPU CI when label is on PR (#6984) --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ce90448738a..ffb75c3ef8d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -42,7 +42,7 @@ jobs: # New CI workflow build-torch-xla: - name: "Build PyTorch/XLA (TPU)" + name: "Build PyTorch/XLA" uses: ./.github/workflows/_build_torch_xla.yml with: dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_tpuvm @@ -86,5 +86,5 @@ jobs: uses: ./.github/workflows/_tpu_ci.yml needs: build-torch-xla # Only run this for HEAD and releases - if: github.event_name == 'push' + if: github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'tpuci') From 6443e593ad10940836d950fc9e2d41f9cf5c345d Mon Sep 17 00:00:00 2001 From: qihqi Date: Mon, 29 Apr 2024 09:15:54 -0700 Subject: [PATCH 33/53] Add readme to call a model (lost due to merge conflicts) (#6986) --- experimental/torch_xla2/README.md | 93 +++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/experimental/torch_xla2/README.md b/experimental/torch_xla2/README.md index fba08f40498..594d5380882 100644 --- a/experimental/torch_xla2/README.md +++ b/experimental/torch_xla2/README.md @@ -60,3 +60,96 @@ pip install -e .[tpu] -f https://storage.googleapis.com/libtpu-releases/index.ht pip install -r test-requirements.txt pytest test ``` + +## Run a model + +Now let's execute a model under torch_xla2. We'll start with a simple 2-layer model +it can be in theory any instance of `torch.nn.Module`. + +```python +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class MyModel(nn.Module): + def __init__(self): + super().__init__() + self.fc1 = nn.Linear(28 * 28, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + x = x.view(-1, 28 * 28) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return x + +m = MyModel() + +# Execute this model using torch +inputs = torch.randn(3, 3, 28, 28) +print(m(inputs)) +``` + +This model `m` contains 2 parts: the weights that is stored inside of the model +and it's submodules (`nn.Linear`). + +To execute this model with `torch_xla2`; we need to move the tensors involved in compute +to `XLA` devices. This can be accomplished with `torch_xla2.tensor.move_to_device`. + +We need move both the weights and the input to xla devices: + +```python +import torch_xla2 +from torch.utils import _pytree as pytree +from torch_xla2.tensor import move_to_device + +inputs = move_to_device(inputs) +new_state_dict = pytree.tree_map_only(torch.Tensor, move_to_device, m.state_dict()) +m.load_state_dict(new_state_dict, assign=True) + +res = m(inputs) + +print(type(res)) # outputs XLATensor2 +``` + +### Executing with jax.jit + +The above script will execute the model using eager mode Jax as backend. This +does allow executing torch models on TPU, but is often slower than what we can +achieve with `jax.jit`. + +`jax.jit` is a function that takes a Jax function (i.e. a function that takes jax array +and returns jax array) into the same function, but faster. + +We have made the `jax_jit` decorator that would accomplish the same with functions +that takes and returns `torch.Tensor`. To use this, the first step is to create +a functional version of this model: this means the parameters should be passed in +as input instead of being attributes on class: + + +```python + +def model_func(param, inputs): + return torch.func.functional_call(m, param, inputs) + +``` +Here we use [torch.func.functional_call](https://pytorch.org/docs/stable/generated/torch.func.functional_call.html) +from PyTorch to replace the model +weights with `param`, then call the model. This is equivalent to: + +```python +def model_func(param, inputs): + m.load_state_dict(param) + return m(*inputs) +``` + +Now, we can apply `jax_jit` + +```python +from torch_xla2.extra import jax_jit +model_func_jitted = jax_jit(model_func) +print(model_func_jitted(new_state_dict, inputs)) +``` From 6d01bb6e331ad898d54abe555d2f9de502a2b9d4 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Mon, 29 Apr 2024 10:07:58 -0700 Subject: [PATCH 34/53] Fix permission issues during CI checkout (#6985) --- .github/workflows/_build.yml | 5 +++++ .github/workflows/build_and_test.yml | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index 789d0579272..c5420e15dfc 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -45,6 +45,11 @@ jobs: XLA_CUDA: ${{ inputs.cuda }} BAZEL_JOBS: 16 steps: + # See https://github.com/actions/checkout/issues/1014#issuecomment-1906802802 + - name: Fix workspace permissions + run: | + ls -la + rm -rvf ${GITHUB_WORKSPACE}/* - name: Setup Linux uses: pytorch/test-infra/.github/actions/setup-linux@main - name: Setup SSH (Click me for login details) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ffb75c3ef8d..8b285b06696 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -21,7 +21,7 @@ concurrency: jobs: # Old CI workflow build: - name: "Build PyTorch/XLA (GPU)" + name: "Build upstream CI image" uses: ./.github/workflows/_build.yml with: ecr-docker-image-base: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base From 971ebe1ff0a9446eed60457d5a711f8d57cf33a4 Mon Sep 17 00:00:00 2001 From: Manfei <41607353+ManfeiBai@users.noreply.github.com> Date: Mon, 29 Apr 2024 10:09:13 -0700 Subject: [PATCH 35/53] [Revert Revert] Update OpenXLA-pin update to Apr25 (#6982) --- WORKSPACE | 4 ++-- setup.py | 2 +- test/pjrt/test_runtime_tpu.py | 5 +++-- torch_xla/csrc/runtime/ifrt_computation_client.cc | 13 +++++++------ torch_xla/csrc/runtime/ifrt_computation_client.h | 2 +- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 9c6963dae65..9fe770bedff 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -50,9 +50,9 @@ http_archive( "//openxla_patches:gpu_race_condition.diff", "//openxla_patches:f16_abi_clang.diff", ], - strip_prefix = "xla-54ca388f9ad9e8bbcb0ef823752d6b47a99d0b5f", + strip_prefix = "xla-fe08041b23d8baa0d00967913a1d6e8a0c348df3", urls = [ - "https://github.com/openxla/xla/archive/54ca388f9ad9e8bbcb0ef823752d6b47a99d0b5f.tar.gz", + "https://github.com/openxla/xla/archive/fe08041b23d8baa0d00967913a1d6e8a0c348df3.tar.gz", ], ) diff --git a/setup.py b/setup.py index a1db046e679..92ccd1004d3 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ base_dir = os.path.dirname(os.path.abspath(__file__)) -_date = '20240418' +_date = '20240425' _libtpu_version = f'0.1.dev{_date}' _libtpu_storage_path = f'https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/wheels/libtpu-nightly/libtpu_nightly-{_libtpu_version}-py3-none-any.whl' _jax_version = f'0.4.27.dev{_date}' diff --git a/test/pjrt/test_runtime_tpu.py b/test/pjrt/test_runtime_tpu.py index 0def33ae275..744039a4f58 100644 --- a/test/pjrt/test_runtime_tpu.py +++ b/test/pjrt/test_runtime_tpu.py @@ -206,7 +206,8 @@ def _runtime_device_attributes(): def test_runtime_device_attributes(self): result = pjrt.run_multiprocess(self._runtime_device_attributes) for device in result.values(): - self.assertCountEqual(['coords', 'core_on_chip'], list(device.keys())) + self.assertCountEqual(['coords', 'core_on_chip', 'num_cores'], + list(device.keys())) self.assertIsInstance(device['coords'], list) self.assertIsInstance(device['core_on_chip'], int) @@ -218,7 +219,7 @@ def test_global_runtime_device_attributes(self): results = pjrt.run_multiprocess(self._global_runtime_device_attributes) for result in results.values(): for device in result: - self.assertCountEqual(['coords', 'core_on_chip', 'name'], + self.assertCountEqual(['coords', 'core_on_chip', 'name', 'num_cores'], list(device.keys())) self.assertIsInstance(device['coords'], list) self.assertIsInstance(device['core_on_chip'], int) diff --git a/torch_xla/csrc/runtime/ifrt_computation_client.cc b/torch_xla/csrc/runtime/ifrt_computation_client.cc index 20ee9b0bfa6..c48cf1555ff 100644 --- a/torch_xla/csrc/runtime/ifrt_computation_client.cc +++ b/torch_xla/csrc/runtime/ifrt_computation_client.cc @@ -96,7 +96,7 @@ std::string IfrtComputationClient::IfrtDeviceToString( xla::ifrt::Device* const device) const { std::string platform = absl::AsciiStrToUpper(device->client()->platform_name()); - int ordinal = global_ordinals_.at(device->id()); + int ordinal = global_ordinals_.at(device->Id().value()); std::string str = absl::StrFormat("%s:%d", platform, ordinal); return str; } @@ -124,11 +124,12 @@ IfrtComputationClient::IfrtComputationClient() { // a device's global ordinal separately from its device ID. Order the // devices by increasing ID to assign global ordinals. std::vector ordered_devices(client_->device_count()); - std::partial_sort_copy(client_->devices().begin(), client_->devices().end(), - ordered_devices.begin(), ordered_devices.end(), - [](auto& a, auto& b) { return a->id() < b->id(); }); + std::partial_sort_copy( + client_->devices().begin(), client_->devices().end(), + ordered_devices.begin(), ordered_devices.end(), + [](auto& a, auto& b) { return a->Id().value() < b->Id().value(); }); for (auto* device : ordered_devices) { - global_ordinals_[device->id()] = global_ordinals_.size(); + global_ordinals_[device->Id().value()] = global_ordinals_.size(); std::string device_str = IfrtDeviceToString(device); string_to_device_.emplace(device_str, device); } @@ -615,7 +616,7 @@ std::vector IfrtComputationClient::GetAllDevices() const { int IfrtComputationClient::GetNumProcesses() const { int max_process_index = client_->process_index(); for (auto* device : client_->devices()) { - max_process_index = std::max(max_process_index, device->process_index()); + max_process_index = std::max(max_process_index, device->ProcessIndex()); } return max_process_index + 1; diff --git a/torch_xla/csrc/runtime/ifrt_computation_client.h b/torch_xla/csrc/runtime/ifrt_computation_client.h index d6d914ad8da..38d0de97204 100644 --- a/torch_xla/csrc/runtime/ifrt_computation_client.h +++ b/torch_xla/csrc/runtime/ifrt_computation_client.h @@ -134,7 +134,7 @@ class IfrtComputationClient : public ComputationClient { // global_ordinals_ tracks a map from PjRtDeviceId to the device's // dense global ordinal. std::unordered_map global_ordinals_; - std::unordered_map string_to_device_; + std::unordered_map string_to_device_; std::shared_ptr> replication_devices_; OperationManager operation_manager_; tsl::thread::ThreadPool pool_ = tsl::thread::ThreadPool( From 75278161c81f9308d412690b3e4d7cb23f35e8ab Mon Sep 17 00:00:00 2001 From: Avik Chaudhuri Date: Mon, 29 Apr 2024 10:58:38 -0700 Subject: [PATCH 36/53] Update test_export_fx_passes.py (#6972) --- test/stablehlo/test_export_fx_passes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/stablehlo/test_export_fx_passes.py b/test/stablehlo/test_export_fx_passes.py index d1e731abd6e..82650997316 100644 --- a/test/stablehlo/test_export_fx_passes.py +++ b/test/stablehlo/test_export_fx_passes.py @@ -18,7 +18,7 @@ class ExportFxPassTest(unittest.TestCase): def test_decompose_dynamic_shape_select(self): args = (torch.rand((10, 197, 768)), 1, 0) - dynamic_shapes = ([{0: Dim("bs")}, None, None],) + dynamic_shapes = (({0: Dim("bs")}, None, None),) m = wrap_func_as_nn_module(torch.ops.aten.select.int) ep = export(m, args, dynamic_shapes=dynamic_shapes) out1 = ep.module()(*args) @@ -55,7 +55,7 @@ def forward(self, x): def test_embedding_indices_flatten(self): args = (torch.rand((20, 768)), torch.randint(0, 15, (3, 10)).to(torch.int64)) - dynamic_shapes = ([None, {0: Dim("bs")}],) + dynamic_shapes = ((None, {0: Dim("bs")}),) m = wrap_func_as_nn_module(torch.ops.aten.embedding.default) ep = export(m, args, dynamic_shapes=dynamic_shapes) print(ep) From 7cc78a6882cc78d22dd73e036908b8ba19567aeb Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Mon, 29 Apr 2024 12:06:47 -0700 Subject: [PATCH 37/53] Change name of CI documentation (#6994) --- .github/{README.md => ci.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{README.md => ci.md} (100%) diff --git a/.github/README.md b/.github/ci.md similarity index 100% rename from .github/README.md rename to .github/ci.md From c91171d50fee9bff20d71fa70469801aab849f17 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Mon, 29 Apr 2024 13:12:57 -0700 Subject: [PATCH 38/53] Rework docs push (#6954) --- .github/workflows/_docs.yml | 87 ++++++++++++++++++---------- .github/workflows/build_and_test.yml | 18 +++--- 2 files changed, 64 insertions(+), 41 deletions(-) diff --git a/.github/workflows/_docs.yml b/.github/workflows/_docs.yml index ed9a4ab0ea9..23be34db2e3 100644 --- a/.github/workflows/_docs.yml +++ b/.github/workflows/_docs.yml @@ -2,10 +2,10 @@ name: xla-docs-build on: workflow_call: inputs: - docker-image: + dev-image: required: true type: string - description: Image to build docs in + description: Base image for builds runner: required: false type: string @@ -15,35 +15,60 @@ on: torchxla-bot-token: required: true jobs: - push-docs: - runs-on: ${{ inputs.runner }} + build-docs: + runs-on: ubuntu-latest timeout-minutes: 45 + container: + image: ${{ inputs.dev-image }} env: - DOCKER_IMAGE: ${{ inputs.docker-image }} - WORKDIR: /var/lib/jenkins/workspace + BRANCH_NAME: ${{ github.ref_name }} steps: - - name: Setup Linux - uses: pytorch/test-infra/.github/actions/setup-linux@main - - name: Setup SSH (Click me for login details) - uses: pytorch/test-infra/.github/actions/setup-ssh@main - with: - github-secret: ${{ secrets.GITHUB_TOKEN }} - instructions: | - Doc builds are done inside container. Interactive session can be started by following: - docker exec -it $(docker container ps --format '{{.ID}}') bash - - name: Download and run docker image from GCR - shell: bash - env: - GITHUB_TORCH_XLA_BOT_TOKEN: ${{ secrets. torchxla-bot-token }} - run: | - echo "DOCKER_IMAGE: ${DOCKER_IMAGE}" - docker pull "${DOCKER_IMAGE}" - pid=$(docker run -e GITHUB_TORCH_XLA_BOT_TOKEN -t -d -w "$WORKDIR" "${DOCKER_IMAGE}") - echo "${GCLOUD_SERVICE_KEY}" | docker exec -i "${pid}" sh -c "cat >> /tmp/pytorch/xla/default_credentials.json" - echo "pid=${pid}" >> "${GITHUB_ENV}" - - name: Build & publish docs - shell: bash - run: docker exec -u jenkins "${pid}" bash -c '.circleci/doc_push.sh' - - name: Teardown Linux - uses: pytorch/test-infra/.github/actions/teardown-linux@main - if: always() + - name: Fetch wheels + uses: actions/download-artifact@v4 + with: + name: torch-xla-wheels + path: /tmp/wheels/ + - name: Install wheels + shell: bash + run: | + pip install /tmp/wheels/*.whl + - name: Checkout PyTorch/XLA Repo + uses: actions/checkout@v4 + with: + path: pytorch/xla + - name: Build docs + shell: bash + run: | + cd pytorch/xla/docs + pip install -r requirements.txt + sphinx-build -b html source build + - name: Checkout GitHub Pages + uses: actions/checkout@v4 + with: + path: gh-pages + ref: gh-pages + - name: Merge changes + shell: bash + run: | + subdir=${{ env.BRANCH_NAME == 'master' && 'master' || format('{0}/{1}', 'release', env.BRANCH_NAME) }} + mkdir -p gh-pages/$subdir + cp -fR pytorch/xla/docs/build/* gh-pages/$subdir + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: 'gh-pages' + deploy: + needs: build-docs + runs-on: ubuntu-latest + if: github.event_name == 'push' + + # Deploy to the github-pages environment + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 + with: + token: ${{ secrets.torchxla-bot-token }} diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8b285b06696..f7dc5b35426 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -30,16 +30,6 @@ jobs: secrets: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} - push-docs: - name: "Build & publish docs" - if: github.event_name == 'push' && (github.event.ref == 'refs/heads/master' || startsWith(github.event.ref, 'refs/tags/r')) - uses: ./.github/workflows/_docs.yml - needs: build - with: - docker-image: ${{ needs.build.outputs.docker-image }} - secrets: - torchxla-bot-token: ${{ secrets.TORCH_XLA_BOT_TOKEN }} - # New CI workflow build-torch-xla: name: "Build PyTorch/XLA" @@ -88,3 +78,11 @@ jobs: # Only run this for HEAD and releases if: github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'tpuci') + push-docs: + name: "Build docs" + uses: ./.github/workflows/_docs.yml + needs: build-torch-xla + with: + dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_tpuvm + secrets: + torchxla-bot-token: ${{ secrets.TORCH_XLA_BOT_TOKEN }} From 0e032b17f4041fc3cbea6e0e6dac1b55e91a1cee Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Mon, 29 Apr 2024 13:43:25 -0700 Subject: [PATCH 39/53] `sudo rm` leftover files in GHA (#6995) --- .github/workflows/_build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index c5420e15dfc..685c63e05e4 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -49,7 +49,7 @@ jobs: - name: Fix workspace permissions run: | ls -la - rm -rvf ${GITHUB_WORKSPACE}/* + sudo rm -rvf ${GITHUB_WORKSPACE}/* - name: Setup Linux uses: pytorch/test-infra/.github/actions/setup-linux@main - name: Setup SSH (Click me for login details) From 73b915b55d96553a0e370b2bab01f47b8c2a9e7c Mon Sep 17 00:00:00 2001 From: Avik Chaudhuri Date: Mon, 29 Apr 2024 16:31:23 -0700 Subject: [PATCH 40/53] Fixes to dynamic_shapes args in test_unbounded_dynamism.py (#6999) --- test/stablehlo/test_unbounded_dynamism.py | 54 +++++++++++------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/test/stablehlo/test_unbounded_dynamism.py b/test/stablehlo/test_unbounded_dynamism.py index e185a47007e..3cd17a7fe34 100644 --- a/test/stablehlo/test_unbounded_dynamism.py +++ b/test/stablehlo/test_unbounded_dynamism.py @@ -27,7 +27,7 @@ class UnboundedDynamismExportTest(unittest.TestCase): def test_add(self): args = (torch.rand((10, 197, 768)), torch.rand((10, 197, 768))) - dynamic_shapes = ([{0: Dim("dim")}, {0: Dim("dim")}],) + dynamic_shapes = (({0: Dim("dim")}, {0: Dim("dim")}),) m = wrap_func_as_nn_module(torch.ops.aten.add.Tensor) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -45,7 +45,7 @@ def test_add(self): def test_add_scalar(self): args = (torch.rand((10, 197, 768)), 0.345) - dynamic_shapes = ([{0: Dim("dim")}, None],) + dynamic_shapes = (({0: Dim("dim")}, None),) m = wrap_func_as_nn_module(torch.ops.aten.add.Tensor) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -62,7 +62,7 @@ def test_add_scalar(self): def test_addmm(self): args = (torch.rand((5)), torch.rand((10, 5)), torch.rand((5, 5))) - dynamic_shapes = ([None, {0: Dim("dim")}, None],) + dynamic_shapes = ((None, {0: Dim("dim")}, None),) m = wrap_func_as_nn_module(torch.ops.aten.addmm.default) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -83,7 +83,7 @@ def test_bmm(self): torch.rand((24, 197, 64)), torch.rand((24, 64, 197)), ) - dynamic_shapes = ([{0: Dim("dim")}, {0: Dim("dim")}],) + dynamic_shapes = (({0: Dim("dim")}, {0: Dim("dim")}),) m = wrap_func_as_nn_module(torch.ops.aten.bmm.default) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -104,7 +104,7 @@ def test_bmm_dynamic_out_dim(self): torch.rand((8, 128, 256)), torch.rand((8, 256, 3)), ) - dynamic_shapes = ([None, {2: Dim("dim")}],) + dynamic_shapes = ((None, {2: Dim("dim")}),) m = wrap_func_as_nn_module(torch.ops.aten.bmm.default) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -125,7 +125,7 @@ def test_bmm_dynamic_reduction_dim(self): torch.rand((8, 128, 3)), torch.rand((8, 3, 256)), ) - dynamic_shapes = ([{2: Dim("dim")}, {1: Dim("dim")}],) + dynamic_shapes = (({2: Dim("dim")}, {1: Dim("dim")}),) m = wrap_func_as_nn_module(torch.ops.aten.bmm.default) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -143,7 +143,7 @@ def test_bmm_dynamic_reduction_dim(self): def test_cat(self): args = (torch.rand((10, 1, 768)), torch.rand((10, 196, 768))) - dynamic_shapes = ([{0: Dim("dim")}, {0: Dim("dim")}],) + dynamic_shapes = (({0: Dim("dim")}, {0: Dim("dim")}),) m = wrap_func_as_nn_module( lambda x, y: torch.ops.aten.cat.default([x, y], 1)) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) @@ -166,7 +166,7 @@ def test_conv(self): torch.rand((5, 3, 16, 16)), torch.rand((5)), ) - dynamic_shapes = ([{0: Dim("dim")}, None, None],) + dynamic_shapes = (({0: Dim("dim")}, None, None),) m = wrap_func_as_nn_module( lambda x, y, z: torch.ops.aten.convolution.default( x, @@ -197,7 +197,7 @@ def test_conv1d(self): torch.rand((3, 1, 800)), torch.rand((512, 1, 10)), ) - dynamic_shapes = ([{0: Dim("dim")}, None],) + dynamic_shapes = (({0: Dim("dim")}, None),) # dynamic_shapes = None m = wrap_func_as_nn_module(lambda x, y: torch.ops.aten.convolution.default( x, @@ -225,7 +225,7 @@ def test_conv1d(self): def test_cumsum(self): args = (torch.rand((10, 5)), 1) - dynamic_shapes = ([{0: Dim("dim")}, None],) + dynamic_shapes = (({0: Dim("dim")}, None),) m = wrap_func_as_nn_module(torch.ops.aten.cumsum.default) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -242,7 +242,7 @@ def test_cumsum(self): def test_div(self): args = (torch.rand((10, 12, 197)), torch.rand((10, 12, 197))) - dynamic_shapes = ([{0: Dim("dim")}, {0: Dim("dim")}],) + dynamic_shapes = (({0: Dim("dim")}, {0: Dim("dim")}),) m = wrap_func_as_nn_module(torch.ops.aten.div.Tensor) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -260,7 +260,7 @@ def test_div(self): def test_div_scalar(self): args = (torch.rand((10, 12, 197)), 8.0) - dynamic_shapes = ([{0: Dim("dim")}, None],) + dynamic_shapes = (({0: Dim("dim")}, None),) m = wrap_func_as_nn_module(torch.ops.aten.div.Tensor) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -277,7 +277,7 @@ def test_div_scalar(self): def test_gelu(self): args = (torch.rand((3, 5)),) - dynamic_shapes = ([{0: Dim("dim")}],) + dynamic_shapes = (({0: Dim("dim")},),) m = wrap_func_as_nn_module(torch.ops.aten.gelu) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -342,7 +342,7 @@ def forward(self, x): def test_mul(self): args = (torch.rand((10, 2, 768)), torch.rand((10, 2, 768))) - dynamic_shapes = ([{0: Dim("dim")}, {0: Dim("dim")}],) + dynamic_shapes = (({0: Dim("dim")}, {0: Dim("dim")}),) m = wrap_func_as_nn_module(torch.ops.aten.mul.Tensor) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -360,7 +360,7 @@ def test_mul(self): def test_mul_scalar(self): args = (torch.rand((10, 2, 768)), 0.125) - dynamic_shapes = ([{0: Dim("dim")}, None],) + dynamic_shapes = (({0: Dim("dim")}, None),) m = wrap_func_as_nn_module(torch.ops.aten.mul.Tensor) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -483,7 +483,7 @@ def forward(self, x, weight, bias): def test_permute(self): args = (torch.rand((10, 197, 12, 64)),) - dynamic_shapes = ([{0: Dim("dim")}],) + dynamic_shapes = (({0: Dim("dim")},),) m = wrap_func_as_nn_module( lambda x: torch.ops.aten.permute.default(x, [0, 2, 1, 3])) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) @@ -502,7 +502,7 @@ def test_permute(self): def test_select(self): args = (torch.rand((10, 197, 768)), 1, 0) - dynamic_shapes = ([{0: Dim("dim")}, None, None],) + dynamic_shapes = (({0: Dim("dim")}, None, None),) m = wrap_func_as_nn_module(torch.ops.aten.select.int) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -519,7 +519,7 @@ def test_select(self): def test_slice(self): args = (torch.rand((10, 3, 224, 224)), 0, 0, 9223372036854775807) - dynamic_shapes = ([{0: Dim("dim")}, None, None, None],) + dynamic_shapes = (({0: Dim("dim")}, None, None, None),) m = wrap_func_as_nn_module(torch.ops.aten.slice.Tensor) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -537,7 +537,7 @@ def test_slice(self): def test_slice_2(self): args = (torch.rand((10, 3, 224, 224)), 1, 0, 2) - dynamic_shapes = ([{0: Dim("dim")}, None, None, None],) + dynamic_shapes = (({0: Dim("dim")}, None, None, None),) m = wrap_func_as_nn_module(torch.ops.aten.slice.Tensor) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -555,7 +555,7 @@ def test_slice_2(self): def test_softmax(self): args = (torch.rand((10, 12, 197, 197)), -1, False) - dynamic_shapes = ([{0: Dim("dim")}, None, None],) + dynamic_shapes = (({0: Dim("dim")}, None, None),) m = wrap_func_as_nn_module(torch.ops.aten._softmax.default) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -573,7 +573,7 @@ def test_softmax(self): def test_sub(self): args = (torch.rand((10, 1, 1, 10)), torch.rand((10, 1, 1, 10))) - dynamic_shapes = ([{0: Dim("dim")}, {0: Dim("dim")}],) + dynamic_shapes = (({0: Dim("dim")}, {0: Dim("dim")}),) m = wrap_func_as_nn_module(torch.ops.aten.sub.Tensor) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -591,7 +591,7 @@ def test_sub(self): def test_softmax_reduce_on_dynamic_dim(self): args = (torch.rand((1, 8, 128, 3)), -1, False) - dynamic_shapes = ([{3: Dim("dim")}, None, None],) + dynamic_shapes = (({3: Dim("dim")}, None, None),) m = wrap_func_as_nn_module(torch.ops.aten._softmax.default) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -609,7 +609,7 @@ def test_softmax_reduce_on_dynamic_dim(self): @unittest.skip("Converted StableHLO contains i1 dtype, not expected.") def test_index(self): args = (torch.rand((2, 10)), torch.arange(5)) - dynamic_shapes = ([None, {0: Dim("dim")}],) + dynamic_shapes = ((None, {0: Dim("dim")}),) m = wrap_func_as_nn_module( lambda x, y: torch.ops.aten.index.Tensor(x, [None, y])) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) @@ -628,7 +628,7 @@ def test_index(self): def test_sub_scalar(self): args = (1.0, torch.rand((10, 1, 1, 10))) - dynamic_shapes = ([None, {0: Dim("dim")}],) + dynamic_shapes = ((None, {0: Dim("dim")}),) m = wrap_func_as_nn_module(torch.ops.aten.sub.Tensor) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -670,7 +670,7 @@ def forward(self, x): def test_transpose_on_dynamic_dim(self): args = (torch.rand((1, 8, 3, 256)),) - dynamic_shapes = ([{2: Dim("dim")}],) + dynamic_shapes = (({2: Dim("dim")},),) m = wrap_func_as_nn_module( lambda x: torch.ops.aten.transpose.int(x, -2, -1)) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) @@ -688,7 +688,7 @@ def test_transpose_on_dynamic_dim(self): def test_unsqueeze_1(self): args = (torch.rand((3, 10)),) - dynamic_shapes = ([{0: Dim("dim")}],) + dynamic_shapes = (({0: Dim("dim")},),) m = wrap_func_as_nn_module(lambda x: torch.ops.aten.unsqueeze.default(x, 1)) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) @@ -705,7 +705,7 @@ def test_unsqueeze_1(self): def test_unsqueeze_2(self): args = (torch.rand((1, 1, 3, 256)),) - dynamic_shapes = ([{2: Dim("dim")}],) + dynamic_shapes = (({2: Dim("dim")},),) m = wrap_func_as_nn_module(lambda x: torch.ops.aten.unsqueeze.default(x, 2)) ep = export(m, args=args, dynamic_shapes=dynamic_shapes) shlo_module = exported_program_to_stablehlo(ep) From d25f47523183b92d6e2964db8313aafc249d4d20 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Mon, 29 Apr 2024 16:40:48 -0700 Subject: [PATCH 41/53] Manually push to `gh-pages` branch (#6996) --- .circleci/doc_push.sh | 63 ------------------------------------- .github/workflows/_docs.yml | 33 +++++++++---------- 2 files changed, 15 insertions(+), 81 deletions(-) delete mode 100755 .circleci/doc_push.sh diff --git a/.circleci/doc_push.sh b/.circleci/doc_push.sh deleted file mode 100755 index 72b4a44f6e7..00000000000 --- a/.circleci/doc_push.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash - -set -ex - -cd /tmp/pytorch/xla - -source ./xla_env -source .circleci/common.sh - -echo "Building docs" -pushd docs -./docs_build.sh -popd - -echo "Pushing to public" -git config --global user.email "pytorchxla@gmail.com" -git config --global user.name "torchxlabot2" -GH_PAGES_BRANCH=gh-pages -GH_PAGES_DIR=gh-pages-tmp -CURRENT_COMMIT=`git rev-parse HEAD` -BRANCH_NAME=`git rev-parse --abbrev-ref HEAD` -if [[ "$BRANCH_NAME" == release/* ]]; then - SUBDIR_NAME=$BRANCH_NAME -else - SUBDIR_NAME="master" -fi -pushd /tmp -git clone --quiet -b "$GH_PAGES_BRANCH" https://github.com/pytorch/xla.git "$GH_PAGES_DIR" -pushd $GH_PAGES_DIR -rm -rf $SUBDIR_NAME -mkdir -p $SUBDIR_NAME -cp -fR /tmp/pytorch/xla/docs/build/* $SUBDIR_NAME -git_status=$(git status --porcelain) -if [[ $git_status ]]; then - echo "Doc is updated... Pushing to public" - echo "${git_status}" - sudo apt-get -qq update - export DEBIAN_FRONTEND=noninteractive - sudo ln -snf /usr/share/zoneinfo/Etc/UTC /etc/localtime - sudo sh -c "echo Etc/UTC > /etc/timezone" - sudo apt-get -qq -y install tzdata - sudo apt-get -qq install expect - git add . - - COMMIT_MSG="Update doc from commit $CURRENT_COMMIT" - git commit -m "$COMMIT_MSG" - set +x -/usr/bin/expect < Date: Tue, 30 Apr 2024 10:39:43 -0300 Subject: [PATCH 42/53] Re-land: dynamo expand test with view-replay. (#6958) --- test/dynamo/test_dynamo.py | 15 +++++++++++++++ torch_xla/__init__.py | 6 ++++++ 2 files changed, 21 insertions(+) diff --git a/test/dynamo/test_dynamo.py b/test/dynamo/test_dynamo.py index 946ae914b04..3a3eb3d43f1 100644 --- a/test/dynamo/test_dynamo.py +++ b/test/dynamo/test_dynamo.py @@ -668,6 +668,21 @@ def foo(x): self.assertEqual(expected.dtype, actual.dtype) self.assertEqual(expected.device, actual.device) + def test_return_expand(self): + + def foo(x): + return x.expand(2, -1) + + optfoo = torch.compile(backend="openxla")(foo) + + t = torch.arange(10) + Xt = t.to(xm.xla_device()) + + expected = foo(t) + actual = optfoo(Xt) + + self.assertEqual(expected, actual.cpu()) + if __name__ == '__main__': test = unittest.main() diff --git a/torch_xla/__init__.py b/torch_xla/__init__.py index ebc0af6c7ad..d2c4e1a3aca 100644 --- a/torch_xla/__init__.py +++ b/torch_xla/__init__.py @@ -186,6 +186,12 @@ def _init_xla_lazy_backend(): # TODO @wonjoo come up with a long term fix in Dynamo. torch._dynamo.config.automatic_dynamic_shapes = False +# Activate view-replay on AOTAutograd. +# See: https://github.com/pytorch/pytorch/pull/124488 +import torch._functorch.config + +torch._functorch.config.view_replay_for_aliased_outputs = True + from .stablehlo import save_as_stablehlo, save_torch_model_as_stablehlo from .experimental import plugins From 87329ce0534b309563492aa720e99d47bbb291de Mon Sep 17 00:00:00 2001 From: JackCaoG <59073027+JackCaoG@users.noreply.github.com> Date: Tue, 30 Apr 2024 10:14:26 -0700 Subject: [PATCH 43/53] Move the nightly whl instruction out of the hide area (#7000) --- README.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6bd1bb844d7..70bdcfd57d9 100644 --- a/README.md +++ b/README.md @@ -132,14 +132,13 @@ Our comprehensive user guides are available at: PyTorch/XLA releases starting with version r2.1 will be available on PyPI. You can now install the main build with `pip install torch_xla`. To also install the -Cloud TPU plugin, install the optional `tpu` dependencies: +Cloud TPU plugin, install the optional `tpu` dependencies after installing the main build with ``` pip install torch_xla[tpu] -f https://storage.googleapis.com/libtpu-releases/index.html ``` -GPU, XRT (legacy runtime), and nightly builds are available in our public GCS -bucket. +GPU and nightly builds are available in our public GCS bucket. | Version | Cloud TPU/GPU VMs Wheel | | --- | ----------- | @@ -151,6 +150,8 @@ bucket. | nightly (Python 3.10) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly-cp310-cp310-linux_x86_64.whl` | | nightly (CUDA 12.1 + Python 3.8) | `https://storage.googleapis.com/pytorch-xla-releases/wheels/cuda/12.1/torch_xla-nightly-cp38-cp38-linux_x86_64.whl` | +You can also add `+yyyymmdd` after `torch_xla-nightly` to get the nightly wheel of a specified date. To get the companion pytorch nightly wheel, replace the `torch_xla` with `torch` on above wheel links. +
older versions @@ -206,9 +207,6 @@ wheels for `torch` and `torch_xla` at | --- | ----------- | | 2.0 | `https://storage.googleapis.com/tpu-pytorch/wheels/colab/torch_xla-2.0-cp310-cp310-linux_x86_64.whl` | -You can also add `+yyyymmdd` after `torch_xla-nightly` to get the nightly wheel -of a specified date. To get the companion pytorch and torchvision nightly wheel, -replace the `torch_xla` with `torch` or `torchvision` on above wheel links.
### Docker From 5f75290e3710e213289735dd287c2d6ea49a7e3c Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Tue, 30 Apr 2024 10:29:40 -0700 Subject: [PATCH 44/53] Don't fail docs push if there's nothing to commit (#7001) --- .github/workflows/_docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_docs.yml b/.github/workflows/_docs.yml index d68f9e97fe7..378dec9697a 100644 --- a/.github/workflows/_docs.yml +++ b/.github/workflows/_docs.yml @@ -66,6 +66,6 @@ jobs: git config user.email "pytorchxla@gmail.com" git config user.name "torchxlabot2" git add . -v - git commit -m "Update doc from commit ${{ github.sha }}" + git diff --cached --exit-code || git commit -m "Update doc from commit ${{ github.sha }}" git push origin gh-pages if: github.event_name == 'push' From 4a5e238ee74c403e42239540738e4d9bb4eeebf5 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Tue, 30 Apr 2024 13:15:06 -0700 Subject: [PATCH 45/53] Complain when TensorFlow is installed (#7004) --- torch_xla/__init__.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/torch_xla/__init__.py b/torch_xla/__init__.py index d2c4e1a3aca..6b83d45e4b4 100644 --- a/torch_xla/__init__.py +++ b/torch_xla/__init__.py @@ -192,6 +192,21 @@ def _init_xla_lazy_backend(): torch._functorch.config.view_replay_for_aliased_outputs = True +import importlib.metadata +import warnings + +try: + # TensorFlow TPU distribution has the same package name as GPU, but not CPU + dist = importlib.metadata.distribution('tensorflow') + warnings.warn( + "`tensorflow` can conflict with `torch-xla`. Prefer `tensorflow-cpu` when" + " using PyTorch/XLA. To silence this warning, `pip uninstall -y " + "tensorflow && pip install tensorflow-cpu`. If you are in a notebook " + "environment such as Colab or Kaggle, restart your notebook runtime " + "afterwards.") +except importlib.metadata.PackageNotFoundError: + pass + from .stablehlo import save_as_stablehlo, save_torch_model_as_stablehlo from .experimental import plugins From b8f8fa9ef5d68433f941edeea504cdfdacfa64d1 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Tue, 30 Apr 2024 13:16:53 -0700 Subject: [PATCH 46/53] Clean up workspace before test (#7005) --- .github/workflows/_build.yml | 2 +- .github/workflows/_test.yml | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index 685c63e05e4..0cbd8edaf0c 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -46,7 +46,7 @@ jobs: BAZEL_JOBS: 16 steps: # See https://github.com/actions/checkout/issues/1014#issuecomment-1906802802 - - name: Fix workspace permissions + - name: Clean up workspace run: | ls -la sudo rm -rvf ${GITHUB_WORKSPACE}/* diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml index ffb73a156fa..8a454cc075b 100644 --- a/.github/workflows/_test.yml +++ b/.github/workflows/_test.yml @@ -73,6 +73,11 @@ jobs: BAZEL_JOBS: 16 BAZEL_REMOTE_CACHE: 1 steps: + # See https://github.com/actions/checkout/issues/1014#issuecomment-1906802802 + - name: Clean up workspace + run: | + ls -la + rm -rvf ${GITHUB_WORKSPACE}/* - name: Setup gcloud shell: bash run: | From 77bbf7f3804dee0d9f7e4cf4a4a1e0ba226dea65 Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Tue, 30 Apr 2024 13:30:50 -0700 Subject: [PATCH 47/53] Tag CI build with git hash (#7003) --- .github/workflows/_build_torch_xla.yml | 2 +- infra/ansible/config/env.yaml | 2 +- infra/ansible/config/vars.yaml | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_build_torch_xla.yml b/.github/workflows/_build_torch_xla.yml index 7614242fd7a..58a783216e4 100644 --- a/.github/workflows/_build_torch_xla.yml +++ b/.github/workflows/_build_torch_xla.yml @@ -46,7 +46,7 @@ jobs: shell: bash run: | cd pytorch/xla/infra/ansible - ansible-playbook playbook.yaml -vvv -e "stage=build arch=amd64 accelerator=tpu src_root=${GITHUB_WORKSPACE} bundle_libtpu=0 build_cpp_tests=1 cache_suffix=-ci" --skip-tags=fetch_srcs,install_deps + ansible-playbook playbook.yaml -vvv -e "stage=build arch=amd64 accelerator=tpu src_root=${GITHUB_WORKSPACE} bundle_libtpu=0 build_cpp_tests=1 git_versioned_xla_build=1 cache_suffix=-ci" --skip-tags=fetch_srcs,install_deps - name: Upload wheel uses: actions/upload-artifact@v4 with: diff --git a/infra/ansible/config/env.yaml b/infra/ansible/config/env.yaml index ea785519bae..9e2fe7270cc 100644 --- a/infra/ansible/config/env.yaml +++ b/infra/ansible/config/env.yaml @@ -33,7 +33,7 @@ build_env: BAZEL_REMOTE_CACHE: 1 SILO_NAME: "cache-silo-{{ arch }}-{{ accelerator }}-{{ clang_version }}{{ cache_suffix }}" _GLIBCXX_USE_CXX11_ABI: 0 - GIT_VERSIONED_XLA_BUILD: "{{ nightly_release }}" + GIT_VERSIONED_XLA_BUILD: "{{ nightly_release or git_versioned_xla_build }}" amd64: ARCH: amd64 diff --git a/infra/ansible/config/vars.yaml b/infra/ansible/config/vars.yaml index 1ab00087b60..e5851d0cc77 100644 --- a/infra/ansible/config/vars.yaml +++ b/infra/ansible/config/vars.yaml @@ -16,3 +16,5 @@ bundle_libtpu: 1 cache_suffix: "" # Whether to build C++ tests with `torch_xla` wheel build_cpp_tests: 0 +# Whether to tag wheels with git hash, e.g. X.Y.Z+git123abc +git_versioned_xla_build: false From 2399e10f5426b7e0e194f2ae612e20774bb9b53c Mon Sep 17 00:00:00 2001 From: qihqi Date: Tue, 30 Apr 2024 16:16:15 -0700 Subject: [PATCH 48/53] fix addbmm opinfo (#6993) --- .../torch_xla2/docs/fixing_op_info_test.md | 211 ++++++++++++++++++ experimental/torch_xla2/test/test_ops.py | 1 - experimental/torch_xla2/torch_xla2/_ops.py | 9 + 3 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 experimental/torch_xla2/docs/fixing_op_info_test.md diff --git a/experimental/torch_xla2/docs/fixing_op_info_test.md b/experimental/torch_xla2/docs/fixing_op_info_test.md new file mode 100644 index 00000000000..03624f9487e --- /dev/null +++ b/experimental/torch_xla2/docs/fixing_op_info_test.md @@ -0,0 +1,211 @@ +# How to fix an op info test. + +## What is OpInfo test + +PyTorch created a list of python objects (OpInfo) to keep +track how to test each op. This is useful to us because it +ensures that the ops we implement produces the same results +pytorch would produce. + +Context: +* https://dev-discuss.pytorch.org/t/opinfos-in-pytorch-1-10/253 +* https://github.com/pytorch/pytorch/issues/54261 + + +## How to fix one + +### Remove one op from skiplist + +Open [test/test_ops.py](../test/test_ops.py) with your +favorite text editor. +Remove one line from the `skiplist` set. + +i.e. + +```bash +(base) hanq-macbookpro:torch_xla2 hanq$ git diff +diff --git a/experimental/torch_xla2/test/test_ops.py b/experimental/torch_xla2/test/test_ops.py +index 72a39ae85..2a156cbce 100644 +--- a/experimental/torch_xla2/test/test_ops.py ++++ b/experimental/torch_xla2/test/test_ops.py +@@ -15,7 +15,6 @@ skiplist = { + "_native_batch_norm_legit", + "_segment_reduce", + "_upsample_bilinear2d_aa", +- "addbmm", + "addmm", + "addmv", + "addr", +``` + +### Run test to see what failure + +Error gotten: + +``` +E RuntimeError: ('No lowering found for\n\nTo execute this test, run the following from the base repo dir:\n python test/test_ops.py -k test_reference_eager_addbmm_cpu_int64\n\nThis message can be suppressed by setting PYTORCH_PRINT_REPRO_ON_FAILURE=0', 'aten::addbmm') +``` + +From here we have 2 strategies for fixing this test: + +1. Add an implementation to `aten::addbmm` operator using Jax ops. Or, +2. Add an implementation `aten::addbmm` operator using torch ops (this commonly known as "decompositions"). + +Either way works for torch_xla2. For ops that are not "Core Aten" sometimes we implement in torch ops with the goal of +upstreaming this decomposition to [pytorch decompositon](https://github.com/pytorch/pytorch/blob/main/torch/_decomp/decompositions.py) +so other projects can benefit from it. + +For illustration purposes, let's implement this op in Jax. + +(NOTE: this doesn't stop us from upstreaming a decomposition later if we want) + +### First Impl + +To implement this op using jax ops, we first find what +is the exact semantics in this page: +https://pytorch.org/docs/stable/generated/torch.addbmm.html + +From it's math formula: we can implement it as follows. + +``` ++@op(torch.ops.aten.addbmm.default) ++def _aten_addbmm(input, batch1, batch2, *, beta=1, alpha=1): ++ ++ mm = jnp.einsum('bxy, byz -> xz', batch1, batch2) ++ return beta * input + alpha * mm +``` + +Now running test again: + +``` +python test/test_ops.py -k test_reference_eager_addbmm_cpu_int64 +``` + +(NOTE: the exact test command is printed out when we run +`pytest test/test_ops.py` so we can only run the failed test instead of running all tests.) + +We now see this error: + +``` +FAIL: test_reference_eager_addbmm_cpu_int64 (__main__.TestOpInfoCPU) [torch_xla2_diff:0.001] +---------------------------------------------------------------------- +Traceback (most recent call last): + File "/Users/hanq/git/qihqi/torch_xla/experimental/torch_xla2/test/test_ops.py", line 654, in run_export_and_compare + diff_output( + File "/Users/hanq/git/qihqi/torch_xla/experimental/torch_xla2/test/test_ops.py", line 617, in diff_output + testcase.assertTrue( +AssertionError: False is not true +``` + +This is telling me that our implementation did not produce +the same result as the ops in PyTorch. + +To debug this, let's figure out what exact input caused this. +We can achieve this by setting a break point [here](https://github.com/pytorch/xla/blob/master/experimental/torch_xla2/test/test_ops.py#L644), right before the diff. Here we can +inspect values of `res` and `res2`, as well as the `sample_input`. + +The sample input we get is +``` +SampleInput(input=tensor([[-3, -3, 9, 8, -8, -3, -4, 2, 2, 2], + [-5, 1, -9, 9, 1, -5, 6, 1, -4, -5], + [-2, -1, 5, -2, -3, 0, 5, -4, 9, -6], + [-1, -7, 6, 3, 8, 3, 8, 9, -5, 7], + [-3, -4, -9, 9, 7, -3, -8, 2, 5, -3]]), args=(tensor([[[-2, 4, -2, 5, 8], + [-6, -2, 5, 7, 7], + [-8, -3, 2, 5, -3], + [-4, 7, 0, -9, 8], + [ 3, 9, -9, -2, 0]], + + [[-7, 1, -3, 7, -4], + [ 3, 5, 4, 6, 5], + [-2, 8, 3, 5, 7], + [ 8, -2, -8, 2, 0], + [ 6, 1, -8, 8, 0]], + + [[ 2, -1, -5, -8, -9], + [ 5, 0, -4, -1, -6], + [-6, 2, -5, -2, -5], + [-5, -3, -5, -4, 9], + [-3, 4, -9, -9, 7]], + + [[ 2, 5, -7, -3, 8], + [-5, -7, -8, -4, 4], + [-4, -6, -3, 0, 6], + [ 8, 0, -3, -8, 2], + [-4, 3, -9, -6, 7]], + + [[ 2, 1, -6, 2, 8], + [ 2, 6, 4, 1, 8], + [-9, 9, -5, 8, 3], + [-5, 0, -2, 4, 0], + [ 5, 8, -4, 9, 7]]]), tensor([[[-1, -8, 3, 5, -8, 2, -5, 0, -9, -5], + [-4, -7, 2, 2, 1, -9, 2, 7, -1, -1], + [ 1, 8, -6, -4, -6, -8, -7, -9, 7, 4], + [-4, 1, -9, 3, 4, 6, 0, -2, -2, -7], + [ 5, 5, 0, 8, -3, 7, -7, 8, 3, 5]], + + [[ 8, -4, -9, 9, 5, 0, 5, 0, -5, 5], + [-5, -3, -2, 8, 1, -2, 4, -7, 5, 3], + [-4, 4, 1, -4, -8, 2, -5, 2, 9, -7], + [ 9, 6, -8, -3, 3, 1, 4, 6, -5, -4], + [-2, 1, 5, 5, 2, 6, 7, -3, -7, 3]], + + [[ 9, -8, 5, -3, -1, 2, -9, -5, -1, -3], + [-3, 3, -9, -7, -9, -8, 1, -3, 7, -2], + [ 8, -1, 8, -8, -7, 4, 8, 8, 5, -7], + [-1, 6, -8, 7, -1, -5, -8, 6, -2, 8], + [-5, -5, 8, 6, 0, 1, 3, -2, -3, -9]], + + [[ 7, -2, 6, -8, -5, 3, 2, -1, -5, 8], + [-6, -4, 3, 9, -9, -8, -7, 3, 9, 0], + [ 1, 3, 4, 4, -5, -2, -4, -2, 3, -7], + [-6, 9, 5, -1, 7, 7, 8, -3, -8, 0], + [-1, -6, -3, 3, 3, -8, -4, 9, -5, 7]], + + [[-5, -3, -9, 6, -1, -7, 9, -8, 1, -8], + [-8, -8, -2, -5, -7, -8, 1, 0, 0, -6], + [ 7, -5, 2, 2, 0, -9, -5, -7, 1, 8], + [-4, 0, 9, 6, -1, -6, 6, -6, -2, -1], + [ 7, 3, 0, 1, 1, -9, 5, -8, -1, -7]]])), kwargs={'beta': 0.6, 'alpha': 0.2}, broadcasts_input=False, name='') +``` + +And the `res` from torch is + +``` +tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) +``` + +So few observation is: +1. Input tensor are of type int64 +2. alpha and beta are both floats. + +So one can suspect that it has to do with rounding. +Reading the doc more carefully, we can find this sentence + + For inputs of type FloatTensor or DoubleTensor, arguments beta and alpha must be real numbers, otherwise they should be integers. + +So likely torch first casted the float alpha and beta to integer, which yields 0, then used them in math to get a matrix with all zeros. + +### Second Impl + +```python ++@op(torch.ops.aten.addbmm.default) ++def _aten_addbmm(input, batch1, batch2, *, beta=1, alpha=1): ++ alpha = jnp.array(alpha).astype(batch1.dtype) ++ beta = jnp.array(beta).astype(batch1.dtype) ++ mm = jnp.einsum('bxy, byz -> xz', batch1, batch2) ++ return jax.lax.cond(beta == 0, ++ lambda: alpha * mm, ++ lambda: beta*input + alpha*mm) ++ +``` + +Adding type casts makes the tests passes. + +### Submit +Now, let's remove the pdb and prints we added, and submit the fix as a PR: https://github.com/pytorch/xla/pull/6993 + diff --git a/experimental/torch_xla2/test/test_ops.py b/experimental/torch_xla2/test/test_ops.py index 72a39ae8582..2a156cbce6c 100644 --- a/experimental/torch_xla2/test/test_ops.py +++ b/experimental/torch_xla2/test/test_ops.py @@ -15,7 +15,6 @@ "_native_batch_norm_legit", "_segment_reduce", "_upsample_bilinear2d_aa", - "addbmm", "addmm", "addmv", "addr", diff --git a/experimental/torch_xla2/torch_xla2/_ops.py b/experimental/torch_xla2/torch_xla2/_ops.py index fe0f97a0f01..0eacf2d47a3 100644 --- a/experimental/torch_xla2/torch_xla2/_ops.py +++ b/experimental/torch_xla2/torch_xla2/_ops.py @@ -415,6 +415,15 @@ def _aten_addmm(self, mat1, mat2, *, beta=1.0, alpha=1.0): self += alpha * jnp.matmul(mat1, mat2) return self +@op(torch.ops.aten.addbmm.default) +def _aten_addbmm(input, batch1, batch2, *, beta=1, alpha=1): + alpha = jnp.array(alpha).astype(batch1.dtype) + beta = jnp.array(beta).astype(batch1.dtype) + mm = jnp.einsum('bxy, byz -> xz', batch1, batch2) + return jax.lax.cond(beta == 0, + lambda: alpha * mm, + lambda: beta*input + alpha*mm) + @op(torch.ops.aten.gelu) def _aten_gelu(self, *, approximate="none"): From 2907ab3093753d8a3516c8f67337a9464bf4ed8a Mon Sep 17 00:00:00 2001 From: qihqi Date: Tue, 30 Apr 2024 18:08:38 -0700 Subject: [PATCH 49/53] Fix more opinfo tests (#7008) --- experimental/torch_xla2/test/test_ops.py | 17 ++----- experimental/torch_xla2/torch_xla2/_ops.py | 45 +++++++++++++++---- .../torch_xla2/torch_xla2/functions.py | 26 +++++++++++ .../torch_xla2/torch_xla2/ops/jtorch.py | 7 +++ 4 files changed, 73 insertions(+), 22 deletions(-) diff --git a/experimental/torch_xla2/test/test_ops.py b/experimental/torch_xla2/test/test_ops.py index 2a156cbce6c..5f6fdbbeab2 100644 --- a/experimental/torch_xla2/test/test_ops.py +++ b/experimental/torch_xla2/test/test_ops.py @@ -7,6 +7,8 @@ instantiate_device_type_tests, ops) from torch.utils import _pytree as pytree from torch_xla2 import tensor +import torch_xla2 + skiplist = { "__getitem__", @@ -15,18 +17,6 @@ "_native_batch_norm_legit", "_segment_reduce", "_upsample_bilinear2d_aa", - "addmm", - "addmv", - "addr", - "all", - "allclose", - "amax", - "amin", - "aminmax", - "angle", - "any", - "argmax", - "argmin", "argsort", "as_strided", "as_strided_scatter", @@ -639,7 +629,8 @@ def run_export_and_compare(testcase, input2, args2, kwargs2 = pytree.tree_map_only( torch.Tensor, tensor.move_to_device, (sample_input.input, sample_input.args, sample_input.kwargs)) - res2 = func(input2, *args2, **kwargs2) + with torch_xla2.mode(): + res2 = func(input2, *args2, **kwargs2) res2 = pytree.tree_map_only(tensor.XLATensor2, lambda t: t.torch(), res2) with testcase.subTest("torch_xla2_diff:" + str(atol)): if ignore_indices and isinstance(res, tuple) and len(res) == 2: diff --git a/experimental/torch_xla2/torch_xla2/_ops.py b/experimental/torch_xla2/torch_xla2/_ops.py index 0eacf2d47a3..e3650234372 100644 --- a/experimental/torch_xla2/torch_xla2/_ops.py +++ b/experimental/torch_xla2/torch_xla2/_ops.py @@ -410,7 +410,10 @@ def _aten_native_layer_norm(input, # - func: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor @op(torch.ops.aten.addmm) +@op(torch.ops.aten.addmv) def _aten_addmm(self, mat1, mat2, *, beta=1.0, alpha=1.0): + alpha = jnp.array(alpha).astype(mat1.dtype) + beta = jnp.array(beta).astype(mat1.dtype) self *= beta self += alpha * jnp.matmul(mat1, mat2) return self @@ -641,13 +644,14 @@ def _aten_min(x, axis=None): @op(torch.ops.aten.amin) -def _aten_amin(x, axis=None): - return jnp.min(x, axis=axis) +def _aten_amin(x, dim=None, keepdim=False): + return _with_reduction_scalar(jnp.amin, x, dim, keepdim) @op(torch.ops.aten.argmin) -def _aten_amin(x, axis=None): - return jnp.argmin(x, axis=axis) +def _aten_argmin(self, dim=None, keepdim=False): + return _with_reduction_scalar( + jnp.argmin, self, dim, keepdim) @op(torch.ops.aten.sin) @@ -1211,13 +1215,27 @@ def _aten_abs(self): # generate aten.amax only @op(torch.ops.aten.amax) def _aten_amax(self, dim=None, keepdim=False): - return jnp.amax(self, axis=dim, keepdims=keepdim) - + return _with_reduction_scalar(jnp.amax, self, dim, keepdim) + + +def _with_reduction_scalar(jax_func, self, dim, keepdim): + expanded = False + if self.ndim == 0: + # for self of rank 0: + # torch.any(x, 0), torch.any(x, -1) works; + # torch.any(x, 1) throws out of bounds, so it's + # behavior is the same as a jnp array of rank 1 + expanded = True + self = jnp.expand_dims(self, 0) + res = jax_func(self, axis=dim, keepdims=keepdim) + if expanded: + res = res.squeeze() + return res # aten.any @op(torch.ops.aten.any) def _aten_any(self, dim=None, keepdim=False): - return jnp.any(self, axis=dim, keepdims=keepdim) + return _with_reduction_scalar(jnp.any, self, dim, keepdim) # aten.arange @@ -1246,7 +1264,8 @@ def _aten_arange(start, # aten.argmax @op(torch.ops.aten.argmax) def _aten_argmax(self, dim=None, keepdim=False): - return jnp.argmax(self, axis=dim, keepdims=keepdim) + return _with_reduction_scalar( + jnp.argmax, self, dim, keepdim) # aten.as_strided @@ -1751,4 +1770,12 @@ def _aten_local_scalar_dense(x): @op(torch.ops.aten.tensor_split.sections) def _aten_tensor_split(ary, indices_or_sections, axis=0): - return jnp.array_split(ary, indices_or_sections, axis) \ No newline at end of file + return jnp.array_split(ary, indices_or_sections, axis) + +@op(torch.ops.aten.outer) +def _aten_outer(a, b): + return jnp.outer(a, b) + +@op(torch.ops.aten.allclose) +def _aten_allclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False): + return jnp.allclose(input, other, rtol, atol, equal_nan) \ No newline at end of file diff --git a/experimental/torch_xla2/torch_xla2/functions.py b/experimental/torch_xla2/torch_xla2/functions.py index 9fcd5653a86..94320fd7cb2 100644 --- a/experimental/torch_xla2/torch_xla2/functions.py +++ b/experimental/torch_xla2/torch_xla2/functions.py @@ -92,6 +92,32 @@ def _full(size: Sequence[int], fill_value, *, dtype=None, **kwargs): # TODO: handle torch.Size return jnp.full(size, fill_value, dtype=dtype) +@register_function(torch.allclose) +def _aten_allclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False): + return jnp.allclose(input, other, rtol, atol, equal_nan) + +@register_function(torch.angle) +def _torch_angle(input): + return jnp.angle(input) + + +@register_function(torch.argsort) +def _torch_argsort(input, dim=-1, descending=False, stable=False): + expanded = False + if input == 0: + # for self of rank 0: + # torch.any(x, 0), torch.any(x, -1) works; + # torch.any(x, 1) throws out of bounds, so it's + # behavior is the same as a jnp array of rank 1 + expanded = True + input = jnp.expand_dims(input, 0) + res = jnp.argsort(input, axis=dim, descending=descending, + stable=stable) + if expanded: + res = res.squeeze() + return res + + class XLAFunctionMode(torch.overrides.TorchFunctionMode): """Context manager that dispatches torch function calls to JAX.""" diff --git a/experimental/torch_xla2/torch_xla2/ops/jtorch.py b/experimental/torch_xla2/torch_xla2/ops/jtorch.py index e69de29bb2d..6628b7e9510 100644 --- a/experimental/torch_xla2/torch_xla2/ops/jtorch.py +++ b/experimental/torch_xla2/torch_xla2/ops/jtorch.py @@ -0,0 +1,7 @@ +import torch + + + +torch_ops_override = { + torch.allclose: torch.ops.aten.allclose +} \ No newline at end of file From 865836ade1692115832a61367765fd6c27e984cb Mon Sep 17 00:00:00 2001 From: Wonjoo Lee Date: Wed, 1 May 2024 10:21:04 -0700 Subject: [PATCH 50/53] Fix q dtype in paged attention kernel (#7011) --- torch_xla/experimental/custom_kernel.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/torch_xla/experimental/custom_kernel.py b/torch_xla/experimental/custom_kernel.py index 42b11d3ea9b..0b9c1b202bb 100644 --- a/torch_xla/experimental/custom_kernel.py +++ b/torch_xla/experimental/custom_kernel.py @@ -392,10 +392,10 @@ def paged_attention(q, k_pages, v_pages, lengths, page_indices, batch_size, num_heads, head_dim = q.shape num_kv_heads, _, page_size, head_dim_k = k_pages.shape batch_size_paged_indices, pages_per_sequence = page_indices.shape - q_output_dtype = torch.float32 + q_dtype_for_kernel_launch = q.dtype if (num_heads // num_kv_heads) % 8 != 0: q = q.reshape(batch_size, num_heads, 1, head_dim) - q_output_dtype = q.dtype + q_dtype_for_kernel_launch = torch.float32 page_indices_reshaped = page_indices.reshape(-1) buffer_index = torch.zeros((1,), dtype=torch.int32).to("xla") @@ -408,11 +408,11 @@ def paged_attention(q, k_pages, v_pages, lengths, page_indices, page_indices_reshaped, buffer_index, step, - q, + q.to(q_dtype_for_kernel_launch), k_pages, v_pages, ], payload, [q.shape, output_shape, output_shape], - [q_output_dtype, torch.float32, torch.float32]) + [q_dtype_for_kernel_launch, torch.float32, torch.float32]) return output.reshape(batch_size, num_heads, head_dim).to(q.dtype) From 4883f6feef2f99b90975e9568de6411c63f67ffe Mon Sep 17 00:00:00 2001 From: Will Cromar Date: Wed, 1 May 2024 11:17:29 -0700 Subject: [PATCH 51/53] Build upstream CI image on push to master (#6952) --- .circleci/docker/cloudbuild.yaml | 29 ----- .circleci/docker/install_llvm_clang.sh | 33 ----- .../docker => .github/upstream}/Dockerfile | 1 + .../upstream}/install_conda.sh | 0 .../upstream}/install_valgrind.sh | 0 .github/workflows/_build.yml | 116 ------------------ .github/workflows/_build_upstream_image.yml | 44 +++++++ .github/workflows/build_and_test.yml | 13 +- 8 files changed, 49 insertions(+), 187 deletions(-) delete mode 100644 .circleci/docker/cloudbuild.yaml delete mode 100644 .circleci/docker/install_llvm_clang.sh rename {.circleci/docker => .github/upstream}/Dockerfile (98%) rename {.circleci/docker => .github/upstream}/install_conda.sh (100%) rename {.circleci/docker => .github/upstream}/install_valgrind.sh (100%) delete mode 100644 .github/workflows/_build.yml create mode 100644 .github/workflows/_build_upstream_image.yml diff --git a/.circleci/docker/cloudbuild.yaml b/.circleci/docker/cloudbuild.yaml deleted file mode 100644 index d3b154e0a65..00000000000 --- a/.circleci/docker/cloudbuild.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Cloud Build Configuration for .circleci base image build: -# - Builds and pushes gcr.io/tpu-pytorch/xla_base:{_TAG_NAME}. -# - This build is meant to be triggered manually in cloud builder -steps: -- name: 'gcr.io/cloud-builders/docker' - args: [ - 'build', - '--build-arg', 'cloud_build_prefix=.cicleci/docker', - '--build-arg', 'base_image=nvidia/cuda:${_CUDA_TAG}', - '--build-arg', 'cuda=${_CUDA}', - '--build-arg', 'python_version=${_PYTHON_VERSION}', - '-t', 'gcr.io/tpu-pytorch/xla_base:${_TAG_NAME}', - '-f', '.circleci/docker/Dockerfile', '.circleci/docker/' - ] - timeout: 2000s -- name: 'gcr.io/cloud-builders/docker' - args: ['push', 'gcr.io/tpu-pytorch/xla_base'] - timeout: 1000s - -options: - machineType: 'N1_HIGHCPU_32' - dynamic_substitutions: true - substitution_option: 'ALLOW_LOOSE' -substitutions: - _CUDA: '1' - _CUDA_TAG: '11.7.0-cudnn8-devel-ubuntu18.04' - _PYTHON_VERSION: '3.7' - _TAG_NAME: '${_PYTHON_VERSION}-${_CUDA_TAG}-mini' -timeout: 3000s diff --git a/.circleci/docker/install_llvm_clang.sh b/.circleci/docker/install_llvm_clang.sh deleted file mode 100644 index a911bce1aad..00000000000 --- a/.circleci/docker/install_llvm_clang.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -set -e -set -x - - -function debian_version { - local VER - if ! sudo apt-get install -y lsb-release > /dev/null 2>&1 ; then - VER="buster" - else - VER=$(lsb_release -c -s) - fi - echo "$VER" -} - -function install_llvm_clang() { - local DEBVER=$(debian_version) - if ! apt-get install -y -s clang-8 > /dev/null 2>&1 ; then - maybe_append "deb http://apt.llvm.org/${DEBVER}/ llvm-toolchain-${DEBVER}-8 main" /etc/apt/sources.list - maybe_append "deb-src http://apt.llvm.org/${DEBVER}/ llvm-toolchain-${DEBVER}-8 main" /etc/apt/sources.list - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add - - sudo apt-get update - fi - # Build config also sets CC=clang-8, CXX=clang++-8 - sudo apt-get install -y clang-8 clang++-8 - sudo apt-get install -y llvm-8 llvm-8-dev llvm-8-tools - sudo ln -s /usr/bin/clang-8 /usr/bin/clang - sudo ln -s /usr/bin/clang++-8 /usr/bin/clang++ - export CC=clang-8 CXX=clang++-8 -} - -install_llvm_clang diff --git a/.circleci/docker/Dockerfile b/.github/upstream/Dockerfile similarity index 98% rename from .circleci/docker/Dockerfile rename to .github/upstream/Dockerfile index f0cd196511c..006460c2477 100644 --- a/.circleci/docker/Dockerfile +++ b/.github/upstream/Dockerfile @@ -1,3 +1,4 @@ +# Dockerfile for image used by upstream CI # This requires cuda & cudnn packages pre-installed in the base image. # Other available cuda images are listed at https://hub.docker.com/r/nvidia/cuda ARG base_image="us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.8_cuda_12.1" diff --git a/.circleci/docker/install_conda.sh b/.github/upstream/install_conda.sh similarity index 100% rename from .circleci/docker/install_conda.sh rename to .github/upstream/install_conda.sh diff --git a/.circleci/docker/install_valgrind.sh b/.github/upstream/install_valgrind.sh similarity index 100% rename from .circleci/docker/install_valgrind.sh rename to .github/upstream/install_valgrind.sh diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml deleted file mode 100644 index 0cbd8edaf0c..00000000000 --- a/.github/workflows/_build.yml +++ /dev/null @@ -1,116 +0,0 @@ -name: xla-buld -on: - workflow_call: - inputs: - gcr-docker-image: - required: true - type: string - description: Base image for builds - ecr-docker-image-base: - required: true - type: string - description: Container registry to upload image to - runner: - required: false - type: string - description: Runner type for the test - default: linux.12xlarge - cuda: - required: false - type: string - description: Whether to build XLA with CUDA - default: 1 - - secrets: - gcloud-service-key: - required: true - description: Secret to access Bazel build cache - - outputs: - docker-image: - value: ${{ jobs.build.outputs.docker-image }} - description: The docker image containing the built PyTorch. -jobs: - build: - runs-on: ${{ inputs.runner }} - timeout-minutes: 240 - outputs: - docker-image: ${{ steps.upload-docker-image.outputs.docker-image }} - env: - ECR_DOCKER_IMAGE_BASE: ${{ inputs.ecr-docker-image-base }} - GCR_DOCKER_IMAGE: ${{ inputs.gcr-docker-image }} - WORKDIR: /var/lib/jenkins/workspace - SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 - GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }} - XLA_CUDA: ${{ inputs.cuda }} - BAZEL_JOBS: 16 - steps: - # See https://github.com/actions/checkout/issues/1014#issuecomment-1906802802 - - name: Clean up workspace - run: | - ls -la - sudo rm -rvf ${GITHUB_WORKSPACE}/* - - name: Setup Linux - uses: pytorch/test-infra/.github/actions/setup-linux@main - - name: Setup SSH (Click me for login details) - uses: pytorch/test-infra/.github/actions/setup-ssh@main - with: - github-secret: ${{ secrets.GITHUB_TOKEN }} - instructions: | - Build is done inside the container, to start an interactive session run: - docker exec -it $(docker container ps --format '{{.ID}}') bash - - name: Checkout repo - uses: actions/checkout@v3 - - name: Download docker image from GCR - shell: bash - run: docker pull "${GCR_DOCKER_IMAGE}" - - name: Stage image to ECR - shell: bash - run: | - # This is to stage PyTorch/XLA base image for use in the upstream. - # To allow the upstream workflow to access PyTorch/XLA build images, we - # need to have them in the ECR. This is not expensive, and only pushes it - # if image layers are not present in the repo. - # Note: disable the following 2 lines while testing a new image, so we do not - # push to the upstream. - docker tag "${GCR_DOCKER_IMAGE}" "${ECR_DOCKER_IMAGE_BASE}:v1.1-lite" >/dev/null - docker push "${ECR_DOCKER_IMAGE_BASE}:v1.1-lite" >/dev/null - - name: Start the container - shell: bash - run: | - pid=$(docker run --privileged -t -d -w "$WORKDIR" "${GCR_DOCKER_IMAGE}") - docker exec -u jenkins "${pid}" sudo chown -R jenkins "${WORKDIR}" - docker cp "${GITHUB_WORKSPACE}/." "$pid:$WORKDIR" - echo "pid=${pid}" >> "${GITHUB_ENV}" - - - name: Prepare build env - shell: bash - run: | - echo "declare -x SCCACHE_BUCKET=${SCCACHE_BUCKET}" | docker exec -i "${pid}" sh -c "cat >> env" - echo "declare -x XLA_CUDA=${XLA_CUDA}" | docker exec -i "${pid}" sh -c "cat >> xla_env" - echo "declare -x BAZEL_JOBS=${BAZEL_JOBS}" | docker exec -i "${pid}" sh -c "cat >> xla_env" - echo "declare -x BAZEL_REMOTE_CACHE=1" | docker exec -i "${pid}" sh -c "cat >> xla_env" - echo "${GCLOUD_SERVICE_KEY}" | docker exec -i "${pid}" sh -c "cat >> default_credentials.json" - - - name: Build - shell: bash - run: | - docker exec --privileged -u jenkins "${pid}" bash -c ".circleci/build.sh" - - name: Cleanup build env - shell: bash - run: | - docker exec "${pid}" rm default_credentials.json /tmp/pytorch/xla/default_credentials.json - - - name: Push built docker image to ECR - id: upload-docker-image - shell: bash - run: | - export COMMIT_DOCKER_IMAGE="${ECR_DOCKER_IMAGE_BASE}:latest-${GITHUB_SHA}" - time docker commit "${pid}" "${COMMIT_DOCKER_IMAGE}" - time docker push "${COMMIT_DOCKER_IMAGE}" - echo "docker-image=${COMMIT_DOCKER_IMAGE}" >> "${GITHUB_OUTPUT}" - - - name: Teardown Linux - uses: pytorch/test-infra/.github/actions/teardown-linux@main - if: always() - diff --git a/.github/workflows/_build_upstream_image.yml b/.github/workflows/_build_upstream_image.yml new file mode 100644 index 00000000000..ef0975b6abf --- /dev/null +++ b/.github/workflows/_build_upstream_image.yml @@ -0,0 +1,44 @@ +name: xla-buld +on: + workflow_call: + inputs: + ecr-docker-image-base: + required: true + type: string + description: Container registry to upload image to + runner: + required: false + type: string + description: Runner type for the test + default: linux.12xlarge +jobs: + build: + runs-on: ${{ inputs.runner }} + timeout-minutes: 240 + env: + ECR_DOCKER_IMAGE_BASE: ${{ inputs.ecr-docker-image-base }} + BAZEL_JOBS: 16 + steps: + # See https://github.com/actions/checkout/issues/1014#issuecomment-1906802802 + - name: Clean up workspace + run: | + ls -la + sudo rm -rvf ${GITHUB_WORKSPACE}/* + - name: Setup Linux + uses: pytorch/test-infra/.github/actions/setup-linux@main + - name: Checkout repo + uses: actions/checkout@v3 + - name: Download docker image from GCR + shell: bash + run: | + docker build -t "${ECR_DOCKER_IMAGE_BASE}:v1.2-lite" .github/upstream + - name: Stage image to ECR + shell: bash + run: | + # This is to stage PyTorch/XLA base image for use in the upstream. + # To allow the upstream workflow to access PyTorch/XLA build images, we + # need to have them in the ECR. This is not expensive, and only pushes it + # if image layers are not present in the repo. + # Note: disable the following line while testing a new image, so we do not + # push to the upstream. + docker push "${ECR_DOCKER_IMAGE_BASE}:v1.2-lite" diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index f7dc5b35426..e040884b5ef 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -19,18 +19,13 @@ concurrency: cancel-in-progress: true jobs: - # Old CI workflow - build: - name: "Build upstream CI image" - uses: ./.github/workflows/_build.yml + build-upstream-image: + name: "Build upstream Docker image" + uses: ./.github/workflows/_build_upstream_image.yml with: ecr-docker-image-base: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base - gcr-docker-image: gcr.io/tpu-pytorch/xla_base:dev-3.8_cuda_12.1 - cuda: 1 - secrets: - gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} + if: github.event_name == 'push' && github.event.ref == 'refs/heads/master' - # New CI workflow build-torch-xla: name: "Build PyTorch/XLA" uses: ./.github/workflows/_build_torch_xla.yml From 400bd0c914a6e9d5c05ed8dd9571280518395b5d Mon Sep 17 00:00:00 2001 From: Jiewen Tan Date: Wed, 1 May 2024 11:38:53 -0700 Subject: [PATCH 52/53] [Pallas] Support segment ids in flash attention (#6943) Summary: This PR is to add segment ids to the flash attention wrapper. The segment ids are a way to create an attention mask where each token can only attend to other tokens within the same segment. The mask is therefore a block diagonal matrix. To support it, we further split the flash attention forward into tracing and execution part, and implement all the shape operations to make it compatible with the kernel. Test Plan: PJRT_DEVICE=TPU python test/test_pallas.py --- test/test_pallas.py | 118 +++++++++++++++++++- torch_xla/experimental/custom_kernel.py | 137 +++++++++++++++++------- 2 files changed, 218 insertions(+), 37 deletions(-) diff --git a/test/test_pallas.py b/test/test_pallas.py index 089394b71d3..7b8755fc71e 100644 --- a/test/test_pallas.py +++ b/test/test_pallas.py @@ -22,8 +22,22 @@ class PallasTest(unittest.TestCase): - def _attention(self, q, k, v): + # This is to create a diagonal mask where only elements within the same segment + # can attend to each other. Since the mask is to mask out the unrelevant parts, + # therefore we use != instead of ==. + def _make_attention_mask_from_segment_ids(self, q_segment_ids, + kv_segment_ids): + return q_segment_ids.view(q_segment_ids.shape[0], 1, + q_segment_ids.shape[1], 1) != kv_segment_ids.view( + kv_segment_ids.shape[0], 1, 1, + kv_segment_ids.shape[1]) + + def _attention(self, q, k, v, *, attn_mask=None): attn_weight = q @ k.transpose(-2, -1) + if attn_mask is not None: + # Masked out the unrelevant parts. + attn_weight = attn_weight.masked_fill(attn_mask, + torch.finfo(attn_weight.dtype).min) attn_weight = nn.functional.softmax(attn_weight, dim=-1) attn_output = attn_weight @ v return attn_output @@ -619,6 +633,108 @@ def paged_attention_wrapper(q, k, v, seq_lens, page_indices, atol=1e-5, rtol=1e-5)) + @unittest.skipIf(xr.device_type() != 'TPU' or tpu.version() < 3, + "This test only works on TPUv3+.") + def test_flash_attention_wrapper_segment_ids_1(self): + from torch_xla.experimental.custom_kernel import flash_attention + from jax.experimental.pallas.ops.tpu.flash_attention import flash_attention as jax_flash_attention, SegmentIds + + q = torch.randn(3, 2, 128, 4) + k = torch.randn(3, 2, 128, 4) + v = torch.randn(3, 2, 128, 4) + q_segment_ids = torch.zeros(3, 128) + kv_segment_ids = torch.zeros(3, 128) + o = flash_attention( + q.to("xla"), k.to("xla"), v.to("xla"), False, q_segment_ids.to("xla"), + kv_segment_ids.to("xla")) + + jax_q = jnp.array(q.numpy(), dtype=jnp.float32) + jax_k = jnp.array(k.numpy(), dtype=jnp.float32) + jax_v = jnp.array(v.numpy(), dtype=jnp.float32) + jax_q_segment_ids = jnp.array(q_segment_ids.numpy(), dtype=jnp.float32) + jax_kv_segment_ids = jnp.array(kv_segment_ids.numpy(), dtype=jnp.float32) + expected_o = torch.from_numpy( + np.array( + jax_flash_attention( + jax_q, + jax_k, + jax_v, + segment_ids=SegmentIds(jax_q_segment_ids, jax_kv_segment_ids), + ))) + + self.assertTrue(torch.allclose(o.cpu(), expected_o.cpu(), atol=1e-05)) + + @unittest.skipIf(xr.device_type() != 'TPU' or tpu.version() < 3, + "This test only works on TPUv3+.") + def test_flash_attention_wrapper_segment_ids_2(self): + jax.config.update('jax_default_matmul_precision', jax.lax.Precision.HIGHEST) + from torch_xla.experimental.custom_kernel import flash_attention + + q = torch.randn(3, 2, 128, 4).to("xla") + k = torch.randn(3, 2, 128, 4).to("xla") + v = torch.randn(3, 2, 128, 4).to("xla") + q_segment_ids = torch.zeros(3, 128).to("xla") + kv_segment_ids = torch.zeros(3, 128).to("xla") + o = flash_attention(q, k, v, False, q_segment_ids, kv_segment_ids) + + expected_o = self._attention( + q, + k, + v, + attn_mask=self._make_attention_mask_from_segment_ids( + q_segment_ids, kv_segment_ids)) + self.assertTrue(torch.allclose(o.cpu(), expected_o.cpu(), atol=1e-05)) + jax.config.update('jax_default_matmul_precision', jax.lax.Precision.DEFAULT) + + @unittest.skipIf(xr.device_type() != 'TPU' or tpu.version() < 3, + "This test only works on TPUv3+.") + def test_flash_attention_backward_segment_ids(self): + jax.config.update('jax_default_matmul_precision', jax.lax.Precision.HIGHEST) + from torch_xla.experimental.custom_kernel import flash_attention + + torch.manual_seed(42) + q = torch.randn(4, 2, 128, 8, requires_grad=True).to("xla") + k = torch.randn(4, 2, 128, 8, requires_grad=True).to("xla") + v = torch.randn(4, 2, 128, 8, requires_grad=True).to("xla") + q_segment_ids = torch.zeros(4, 128).to("xla") + kv_segment_ids = torch.zeros(4, 128).to("xla") + q.retain_grad() + k.retain_grad() + v.retain_grad() + + o = flash_attention(q, k, v, False, q_segment_ids, kv_segment_ids) + loss = o.sum() + loss.backward() + xm.mark_step() + + q_grad = q.grad + k_grad = k.grad + v_grad = v.grad + + torch.manual_seed(42) + q = torch.randn(4, 2, 128, 8, requires_grad=True).to("xla") + k = torch.randn(4, 2, 128, 8, requires_grad=True).to("xla") + v = torch.randn(4, 2, 128, 8, requires_grad=True).to("xla") + q_segment_ids = torch.zeros(4, 128).to("xla") + kv_segment_ids = torch.zeros(4, 128).to("xla") + q.retain_grad() + k.retain_grad() + v.retain_grad() + + o = self._attention( + q, + k, + v, + attn_mask=self._make_attention_mask_from_segment_ids( + q_segment_ids, kv_segment_ids)) + loss = o.sum() + loss.backward() + xm.mark_step() + + for i in [(q, q_grad), (k, k_grad), (v, v_grad)]: + self.assertTrue(torch.allclose(i[0].grad.cpu(), i[1].cpu(), atol=1e-05)) + jax.config.update('jax_default_matmul_precision', jax.lax.Precision.DEFAULT) + if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) diff --git a/torch_xla/experimental/custom_kernel.py b/torch_xla/experimental/custom_kernel.py index 0b9c1b202bb..9bd050efc29 100644 --- a/torch_xla/experimental/custom_kernel.py +++ b/torch_xla/experimental/custom_kernel.py @@ -18,7 +18,7 @@ def _extract_backend_config( module: "jaxlib.mlir._mlir_libs._mlir.ir.Module") -> str | None: """ This algorithm intends to extract the backend config from the compiler IR like the following, - and it is designed to traverse any generic MLIR module. + and it is not designed to traverse any generic MLIR module. module @jit_add_vectors attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} { func.func public @main(%arg0: tensor<8xi32> {mhlo.layout_mode = "default", mhlo.sharding = "{replicated}"}, %arg1: tensor<8xi32> {mhlo.layout_mode = "default", mhlo.sharding = "{replicated}"}) -> (tensor<8xi32> {jax.result_info = "", mhlo.layout_mode = "default"}) { @@ -55,17 +55,12 @@ def jax_import_guard(): torch_xla._XLAC._init_computation_client() -def trace_pallas(kernel: Callable, - *args, - static_argnums=None, - static_argnames=None, - **kwargs): +def to_jax_shape_dtype_struct(tensor: torch.Tensor) -> "jax.ShapeDtypeStruct": # Import JAX within the function such that we don't need to call the jax_import_guard() # in the global scope which could cause problems for xmp.spawn. jax_import_guard() import jax import jax.numpy as jnp - import jax._src.pallas.mosaic.pallas_call_registration def convert_torch_dtype_to_jax(dtype: torch.dtype) -> jnp.dtype: if dtype == torch.float32: @@ -93,14 +88,28 @@ def convert_torch_dtype_to_jax(dtype: torch.dtype) -> jnp.dtype: else: raise ValueError(f"Unsupported dtype: {dtype}") + return jax.ShapeDtypeStruct(tensor.shape, + convert_torch_dtype_to_jax(tensor.dtype)) + + +def trace_pallas(kernel: Callable, + *args, + static_argnums=None, + static_argnames=None, + **kwargs): + # Import JAX within the function such that we don't need to call the jax_import_guard() + # in the global scope which could cause problems for xmp.spawn. + jax_import_guard() + import jax + import jax._src.pallas.mosaic.pallas_call_registration + jax_args = [] # for tracing tensor_args = [] # for execution for i, arg in enumerate(args): # TODO: Could the args be a tuple of tensors or a list of tensors? Flattern them? if torch.is_tensor(arg): # ShapeDtypeStruct doesn't have any storage and thus is very suitable for generating the payload. - jax_meta_tensor = jax.ShapeDtypeStruct( - arg.shape, convert_torch_dtype_to_jax(arg.dtype)) + jax_meta_tensor = to_jax_shape_dtype_struct(arg) jax_args.append(jax_meta_tensor) tensor_args.append(arg) else: @@ -167,12 +176,41 @@ class FlashAttention(torch.autograd.Function): "block_k_dq": 256, "block_k_major_dq": 512, } + NUM_LANES = 128 + NUM_SUBLANES = 8 + + @staticmethod + def prepare_segment_ids(q_segment_ids, kv_segment_ids): + from jax.experimental.pallas.ops.tpu.flash_attention import SegmentIds + if q_segment_ids is None or kv_segment_ids is None: + return None, None, None + + assert q_segment_ids is not None and kv_segment_ids is not None, "Both q_segment_ids and kv_segment_ids should be provided." + segment_ids = SegmentIds( + to_jax_shape_dtype_struct(q_segment_ids), + to_jax_shape_dtype_struct(kv_segment_ids)) + q_segment_ids = q_segment_ids.unsqueeze(-1).expand( + [-1 for _ in q_segment_ids.shape] + [FlashAttention.NUM_LANES]) + kv_segment_ids = kv_segment_ids.unsqueeze(1).expand([ + kv_segment_ids.shape[0], FlashAttention.NUM_SUBLANES, + kv_segment_ids.shape[1] + ]) + return segment_ids, q_segment_ids, kv_segment_ids @staticmethod - def forward(ctx, q, k, v, causal=False, partition_spec=None, mesh=None): + def forward(ctx, + q, + k, + v, + causal=False, + q_segment_ids=None, + kv_segment_ids=None, + partition_spec=None, + mesh=None): # Import JAX within the function such that we don't need to call the jax_import_guard() # in the global scope which could cause problems for xmp.spawn. jax_import_guard() + import jax from jax.experimental.pallas.ops.tpu.flash_attention import _flash_attention_impl ctx.causal = causal @@ -192,27 +230,32 @@ def forward(ctx, q, k, v, causal=False, partition_spec=None, mesh=None): k = xs.enable_manual_sharding(k, partition_spec, mesh=mesh).global_tensor v = xs.enable_manual_sharding(v, partition_spec, mesh=mesh).global_tensor - # It returns the shape and type of o, l, m. - def shape_dtype(q, *arg): - if not save_residuals: - return [(q.shape, q.dtype)] + # It computes the shape and type of o, l, m. + shapes = [q.shape] + dtypes = [q.dtype] + if save_residuals: res_shape = list(q.shape) res_shape[-1] = FlashAttention.MIN_BLOCK_SIZE - return [(q.shape, q.dtype), (res_shape, torch.float32), - (res_shape, torch.float32)] - - # We can't directly use flash_attention as we need to override the save_residuals flag which returns - # l and m that is needed for the backward. Then we lose all the shape checks. - # TODO: replicate the shape checks on flash_attention. - _flash_attention_impl = make_kernel_from_pallas(_flash_attention_impl, - shape_dtype) + for _ in range(2): + shapes.append(res_shape) + dtypes.append(torch.float32) + with torch.no_grad(): - o = _flash_attention_impl( + segment_ids, q_segment_ids, kv_segment_ids = FlashAttention.prepare_segment_ids( + q_segment_ids, kv_segment_ids) + ctx.segment_ids = segment_ids + + # We can't directly use flash_attention as we need to override the save_residuals flag which returns + # l and m that is needed for the backward. Then we lose all the shape checks. + # TODO: replicate the shape checks on flash_attention. + # Here we seperate the tracing and execution part just to support SegmentIds. + payload, _ = trace_pallas( + _flash_attention_impl, q, k, v, None, - None, + segment_ids, save_residuals, causal, 1.0, @@ -222,7 +265,14 @@ def shape_dtype(q, *arg): min(FlashAttention.DEFAULT_BLOCK_SIZES["block_k"], k.shape[2]), False, static_argnums=range(5, 13)) + + args = [q, k, v] + if segment_ids is not None: + args += [q_segment_ids, kv_segment_ids] + o = torch_xla._XLAC._xla_tpu_custom_call(args, payload, shapes, dtypes) + if not save_residuals: + o = o[0] # SPMD integration if partition_spec is not None: o = xs.disable_manual_sharding( @@ -240,18 +290,20 @@ def shape_dtype(q, *arg): m = xs.disable_manual_sharding( m, partition_spec[0:3], ctx.full_shape[0:3], mesh=mesh).global_tensor - ctx.save_for_backward(full_q, full_k, full_v, o, l, m) + ctx.save_for_backward(full_q, full_k, full_v, o, l, m, q_segment_ids, + kv_segment_ids) return o @staticmethod def backward(ctx, grad_output): from jax.experimental.pallas.ops.tpu.flash_attention import _flash_attention_bwd_dq, _flash_attention_bwd_dkv - q, k, v, o, l, m = ctx.saved_tensors + q, k, v, o, l, m, q_segment_ids, kv_segment_ids = ctx.saved_tensors causal = ctx.causal partition_spec = ctx.partition_spec mesh = ctx.mesh full_shape = ctx.full_shape + segment_ids = ctx.segment_ids grad_q = grad_k = grad_v = None grad_i = torch.sum( @@ -286,7 +338,7 @@ def backward(ctx, grad_output): k, v, None, - None, + segment_ids, l, m, grad_output, @@ -306,9 +358,13 @@ def backward(ctx, grad_output): "block_q_major", "block_k_major", "block_k", "sm_scale", "causal", "mask_value", "debug" ]) - grad_q = torch_xla._XLAC._xla_tpu_custom_call( - [q, k, v, expanded_l, expanded_m, grad_output, expanded_grad_i], - payload, [q.shape], [q.dtype])[0] + + args = [q, k, v] + if segment_ids is not None: + args += [q_segment_ids, kv_segment_ids] + args += [expanded_l, expanded_m, grad_output, expanded_grad_i] + grad_q = torch_xla._XLAC._xla_tpu_custom_call(args, payload, [q.shape], + [q.dtype])[0] if ctx.needs_input_grad[1] or ctx.needs_input_grad[2]: payload, _ = trace_pallas( @@ -317,7 +373,7 @@ def backward(ctx, grad_output): k, v, None, - None, + segment_ids, l, m, grad_output, @@ -340,9 +396,14 @@ def backward(ctx, grad_output): "block_q_major", "block_k_major", "block_k", "block_q", "sm_scale", "causal", "mask_value", "debug" ]) - grads = torch_xla._XLAC._xla_tpu_custom_call( - [q, k, v, expanded_l, expanded_m, grad_output, expanded_grad_i], - payload, [k.shape, v.shape], [k.dtype, v.dtype]) + + args = [q, k, v] + if segment_ids is not None: + args += [q_segment_ids, kv_segment_ids] + args += [expanded_l, expanded_m, grad_output, expanded_grad_i] + grads = torch_xla._XLAC._xla_tpu_custom_call(args, payload, + [k.shape, v.shape], + [k.dtype, v.dtype]) if ctx.needs_input_grad[1]: grad_k = grads[0] if ctx.needs_input_grad[2]: @@ -357,7 +418,7 @@ def backward(ctx, grad_output): grad_v = xs.disable_manual_sharding( grad_v, partition_spec, full_shape, mesh=mesh).global_tensor - return grad_q, grad_k, grad_v, None, None, None + return grad_q, grad_k, grad_v, None, None, None, None, None def flash_attention( @@ -365,10 +426,14 @@ def flash_attention( k, # [batch_size, num_heads, kv_seq_len, d_model] v, # [batch_size, num_heads, kv_seq_len, d_model] causal=False, + q_segment_ids=None, + kv_segment_ids=None, *, partition_spec=None, mesh=None): - return FlashAttention.apply(q, k, v, causal, partition_spec, mesh) + # TODO: support SPMD and Dynamo with segment_ids. + return FlashAttention.apply(q, k, v, causal, q_segment_ids, kv_segment_ids, + partition_spec, mesh) def paged_attention(q, k_pages, v_pages, lengths, page_indices, From cbbefa2c129ae48c6e868b9eeb4985b6e562dc1c Mon Sep 17 00:00:00 2001 From: JackCaoG <59073027+JackCaoG@users.noreply.github.com> Date: Wed, 1 May 2024 12:41:48 -0700 Subject: [PATCH 53/53] Support pin pr number in new .torch_pin (#6998) Co-authored-by: Will Cromar --- infra/ansible/roles/build_srcs/tasks/main.yaml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/infra/ansible/roles/build_srcs/tasks/main.yaml b/infra/ansible/roles/build_srcs/tasks/main.yaml index d69e9012718..da09a695453 100644 --- a/infra/ansible/roles/build_srcs/tasks/main.yaml +++ b/infra/ansible/roles/build_srcs/tasks/main.yaml @@ -8,9 +8,18 @@ # ansible.builtin.git wants to fetch the entire history, so check out the pin manually ansible.builtin.shell: cmd: | - git fetch origin {{ torch_pin.stdout }} - git checkout --recurse-submodules {{ torch_pin.stdout }} + set -xe + PIN="{{ torch_pin.stdout }}" + if [[ $PIN = \#* ]]; then + PRNUM="${PIN//[!0-9]/}" + git fetch origin "pull/$PRNUM/head" + else + git fetch origin {{ torch_pin.stdout }} + fi + git checkout --recurse-submodules FETCH_HEAD chdir: "{{ (src_root, 'pytorch') | path_join }}" + args: + executable: /bin/bash when: torch_pin is succeeded - name: Build PyTorch