From 280ef93cac6aa6dd24b981614157e7cf94b5fe77 Mon Sep 17 00:00:00 2001 From: Kevin Reid Date: Wed, 25 Oct 2023 11:42:36 -0700 Subject: [PATCH] Execute lighting calculations in parallel. This slightly changes the results due to batching (updates in a batch don't see other updates in the same batch), but does not introduce nondeterminism. Note that this is a performance improvement, but what we should really be doing is running light calculations in the background all the time, which will require more work to make the data available for use. --- .../test-record-lighting-bench.png | Bin 3662 -> 3670 bytes all-is-cubes/src/space/light/updater.rs | 44 +++++++++++++++++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/all-is-cubes-desktop/tests/end-to-end/record-ok-png.out/test-record-lighting-bench.png b/all-is-cubes-desktop/tests/end-to-end/record-ok-png.out/test-record-lighting-bench.png index ef58bbfcd507bca49f25dbe9e7fa0683a0b59529..9d3e938b2bd9d28ba46a68e763652187ddb15777 100644 GIT binary patch delta 3528 zcma)zcR17!1HkFbIQx)sj+4E2t|N--?0HBCN7*vt_?povdlb&f7G-3fj7k^o{Ibc} zBO{bO%KQHFKF|BSf4!f7KF{-co)1}ITp-mO7`=N-$1-&KXKuLP=osWnSJ#AA!;~v$ zx(p-ZB^}sh^GhW#O6b`3yaI63J-QdF`Lf09hFr18Bo5>0bb5;Zx zPRKY>DyD8-8YkgP@7Snq5~*ZvQE`j>zllAH_J7C!PUqSNYa7cE6V?24d+TfR;y*8z z>4EvVfeK&U@=l5M8DQhB_}RVqoeOXkkz{u{gp$XlD=$FMe z)14Emq{ImKu^-RI-<;4ghmE`xjsAl?+L}B5g|V2pX|;t#a9?2koY?2&ffZWUuS#MU ze`PHrcICvbcREW{dHkT3n`?%g63O!#ND63@OO=-Cb``%`ksVS9oJ&AW;pFd%a{@k3_S>mkJnUgsS)T- z5^9{RydUoT7EwoVnq0pYAyFxNKz$^0s;H}@ZT#WbbGfUTk6kZ*%;RRQ$8`O9FNzDu z00E-RlFr?!F1y-?jA@rPs>?-?NY24!!K^DZCSn6#dO%$V1AYnf>CMknRj}iVTJh-^ zf*uOW_gZfq z9gidDGZUq)Z=~q?oh|4de~gUdj-VOhS5Dj8fKHBhX#{xjwPE-J3!k1UM`jAt>4lC{ z{t3K70hYcm1iD3uIbqj*de^h2-c9Yj-T0X9=qq+vt^l6s2(C+VFEid7-VD$s>(O;jnvq<{0SJ+MY_!Mr) zP=2vp>)C8}62yi^b(+@l^&~kDtO{}k4e-7~(2l)06JF;lHSe6c%5Qt$^*@Am&xs+`HwB z$Jj`0lN@I|)X2j?Y%g--5h%C=Vx0%Xu;!fk7cY5YH7?LIVF?H(bLe9fmDUQvNT>dR zUGf8{@+94#&1hJO!{fKn4A<8XkNPbYujXUBUUi`a?x^tgePw2%H7RF8yT*4sz&lnQ z`Ru}ExGh>YG3;%{8LGFzF#BFY95iIkUL$r*8oW`e+fijd=SvS zJp^(1%ipVxRNGmdvK`@He>0|_c=s{EM(fNyMbF-zSSrg{OZPP$?1O2PX)+cKMN6yr zj4^h_b`oO`GOp&`2><#?=jyZKBXx|H=A&f=SHqi6jgciL61=@XNFnJw3iDaIfpUFI zQM}bb#haBlS7AUfPKTmapk9an?(${DipgX@#dmn z7ewql-B5>Iu{yO|bc#jeLS_6+b?o#AgAz6Ly;)CvF6fi3@=p!Eo+Dhv>uod1D)}V` z-dMO`LKT&BFNS0ioVB=hZ%jc%Wj=-u8%>4#@REwZ42`e%iSg_f5CH}~Ake5sG@8e) z2?PdO$AB)%{N_DctirkE^z=R9LK*$lQm@A;>^^Pvtp^HV<*};L$@b1M8d<^1>|O9q zdNVZZ^M1qqOl$a7(u2tH_zf8p(1q~QGm zhF*&0C*m&5G=%;sW-T@A-ZighhIZxGG;%Lsz7-WUOyogBpppc^b|E78ln4o@BvrYS zY<6PE+WjMBe~NWceJyR;9U9p^3KlkX-`ED9oEd{Ppx*gm$5cVVT&nXcesb9k z7{61OVd}zXc{g^uI6ki3Y^D?C?@5-Zi>WB%MA^YD;jG+}JVvrX3>MU%IAlwH!`q05 z&s^WDfT{thqv-}w_iuw}xfjE?u=W1@UdhZdp~sQ=+7?Ez*TnwE!&s>Y(}~>s3FGr-p~V4Q`Vh$M z?)~6`@9{-Kc*NCkm(7-pbISb%9vR1@Ice*UA@Ev`!Ob zayxTa&h13?$jy|w*wj$~1TszyH?FykZ=Xu_m74da$$@Y<%b-QOTN?ouuy@Ws+5iy0LF;^hu9c zZA0k}u~YFo@}%g|V_y5Ztq-(O61IL()sFnlVA~7qSQgyIr(`e>SVt z7sRh_U0h(*3@?kG*_I7?&%XJeApqagpI`d$ttVe_P6oeN&Q>6%cw;hhumIh+W>3nd zM$c zmto7Z+9|~tz5vj(qF?hne7To(=n0Kjgit5l1;z3?P}x5Kao?>@VMLCjb+SXjIekic z-5$>Oand=qX+0mNkY3+YTM?KWp<8?{m!URye|%r7@)iI~D?T=T^M(jk`e5Q@drf&8 ze-h$r*LV6*iw6x#e#@hzac+ad83PaH6eQVQkHJNIrf-! zgG2m!qm-Ly3y(qT)di3BYIUV!Ptm4Kr)2Xvu>PWL)kXWm^N2HkwZihp-&HN7CAYUp z)Sl2hKskK3KQET9HNRuo0p=#yc2T=D>4M&rG^;hA^HV=f{*+D@hvc6=mo$44E8R!+ z$0$nnDvK!m(G4-?wCGVXpf#%gPE@i=wprCZ)&8)EN=|&h$n@)bNDxK*;1bG9N8bar zGe86h7}y;U>WKb4fUG1xjRBOa8xic4klG`f)6j4MXhBtYWMLIhdukgh?$iXO|9B?v llFbcc!HjM29bR*zzaYorHi~Xjr3=-+xvOV-ZTV7t?%lb%2*n%tyA_0RSIuJHx zoKo#{YGo{?t$#3Tg`#7vj!HYDtvYo^T)K>P#<9xaOcw@<6$ml1@B5mBkc5!sWq(WF za_{Z$ocr>UK!8pg@Wna5@Obau_wKpp{66RRJHOxWT&b(eFa;pwTPu>2Ar2jrD-IKr zD-H{PL;x0mg$Tex1YjWoun+-QhyW}^02Y9S2*5%FU?BqV|19lm`X75->FX@kY2E1^vET^VJmoe&K_6F?z*Q=>GguTs(Ok<6nJ!sHtk`0j~JP1!&M= z=o%!+fT60G@Mhr+uUotJr_gDnh9rUgJ@fQ`pZWO})7C*%9Q?Bpf4#SF2|q4-@mZwH zGN%0I4V-!JZA^G_)lgE^&;wkybqjJg79z%vqKWgzrtZfBORKK>eQ{R{-rctV-QC?N z+G2xMvEkD(>+qABW!OBu4yvr;-@nyhSDmB@f*aSwgTFG+*4BzUJDPDMb0+F9wW4T$ z`c(ei<@$OoTQ+m3sBGu}-cE3}2oMeikh6s>RyVsXD+-zySHqjPi4J;3B|%YSG+kbV zh!#UA6vTwx>13T6x=x;fJHv~^i*#7*4um@WC|_$!Sg=CS7r$tQLSABW{Y8|yrX#xk zWys%q1vb@=)($_iGu(LW@#3MNw&VwY+_mHf2zLi@;<=wA_vJN+Dk^L?J1(fE9m~(I zM&Z|42u352qB?ZhK=6@Ii4;(MW*Smc-C!o~JeEfAWU|a=WP1zHK3;}lYlUpF;LGd= zRJ?S@fcMWW*n;8tqu@++B0D{k+Ew{H@(%07`~{1WS#`+|xblIAK)lF%lI=x*A_$W; zdK{^!YiU4xRvP40tDqTC+;%V@!AKC{B_G50+AL(fv&PHx!_Enq>-=^!Q^ zynx8;yJ68}bdGO@%asONfboU50jE}H!0yPsst?P4b_xD~7dyV*iBThR(a_k4dy4Oe zC({R2m9cPPazjp9z-6Udq3JPy4CuVszD)fnDVd*)YD_-BwWuF|@O*RS1?*k; z81Bv-h6o`j{`DKzY~$Eh-MC!jhibDRN|R*src#Q!WsJz4ii-NZkTeUXO)5doZ91Zw z1i!xv>pph(Ho#RP@sBH>A?ywdK3W|_eb-@{Q!3soS&BS&{($$+dM72B)R|m>uLajv zeG!ph2zwSS#ngf^XnbOSIUdd(J8&Bfjri;P<(RyBF04)~KX-UnEoy5o!mtF<=C37- zBkQ$|!Ga~T$TWdKAc&7YazoQ%{o0ZV@)J+$a5!wR*%f@aGKi|SGJLr3Np8KvTzN?k zIVk|244k5%SU1C1-8`qW5A;t9bt6WRZY&&y?|eHgnG&W(lz!KLoB6B6fyBBaIFV+h z;K_91imY&B)c7yW|M{E33{~N-mqp~dM%EnD339w2jZT8A z!<-@#2g~M^g+O;d>LV{g!Poq6Hu(BR0q>7|hL+Z5$R`Tno}5AMG0H91h+7W>$W*6x zQ%<|Pf?ShcJvEa z2)Os1HK<$hJmySUjK9qqfg*2hKfp{<*SW6S?=5@EFmYQE+Kdh=q(o8nQVnb#Cmf!% z>t6Sq^S*CCU*$F3~Irkp`ck3u!T>^4Xa z9~#f6!su!SK8rj;H+8PD-;h92ysYW;Ii&4lB}fvu}9at%Q8bEqH7D2XLo& z5Y_`Yed!eZVLxV%H7%?&+y?*U5Oj)WQJD`5t(h`g56LY{l;Q*(`{B&0N6on~3PyTy zxl%&*uxx~DC&D`XJY?TwZb>Y#V&N29zw4z**tDUP+DSzTSQX4W9k-GD&24(Ike zxy82lnsL`G7e%-ds*ZXvCTjtA&&=|FnFNDj?5fR!ufCcCop5_0Mi&-f$3OpwfZvZr zOIIUYwVg_i=2lW996>M;z&P&&DVf(;!~4W^FJ#=B z66Y-eysd02q$$<>^QD#=_-RLIeD1`=_l?c<@P%8b;NXY9C4juVJh&?-Q-L9Wg_N(- zS{g=JYDP`-m&hA!LkFcz896F8|H*|#KekdN8$e4_E3&oaa5|mbO>30zu=dovSy=zs zMr2T#bM}2x`S*2~J7`UJASZ*at=|b7UkW51%?BwB8lOE586Niy$F-;K0H2KAWc6pK z*?5hc0k)<(2ZpQkLY{Tpf_d|Q77TsB+sd}^+O9#lA}c0Dv>@z7ZT!2VXAU7kwc*1@ zj$`_ayO5scg4ap~{ap&uU0IZhIJoz;4zEO`?*L963}8~R1D7tg6LbRyHaU?qt{E2& zJ77-*mOsuaOr|%8QQjKtru;Og2yd==6}n`{>K9g{ytEt(9x@f%bg2!0+4?!YX~1Fu zCLv0LZYRL*)Eif$_B9-*?rDfo+Qlrt@}4w`4Xgy%J}}@uGUJrW)2W*c`KAKiN^n^r z!C<)}0mdEGsBNi3OG^u!E(a>BYEWHKLpwhy_|f+tB}FS}X=sOWCLgIzC)bLuX?(-} zV8ut2XQm=MUneV;QF+RLM;4xjU0-zIJIk{m)jtA{CnK=|5F-l?g`!x$wE{)6<{=mg zgSHK@@$>hw`sLU7(;#m>|0q^H^)&2uJ9n=Nup|Mfg#K@u)o|kC`xJcqvuHw@QUwRY@0#&;2{5dy&5Asa|T*@t%td3lW*s*Ij?!9v+MZL6!ogYp4s)c`MV+A64XmE3Kpn5DDOQVFVWk@~ zOWrdo#3zP60AN-FXMx!48W(M2T6kb4N#@#pvmg&T;O%AG`0UmBb5jO?QI(e~PUlr0 z6S|S8AWOW4OLLhbV6|^uiU&Td6sWUzb`ASX=EB@50|JKO7lCEv%(!>v?>%Xm*6fY3-Kbo32cCBal%4+_GtU| zm~961w1fD619g2DJhLjMCmyKlJsH2G&#$Gc?yU1+^uGvK%!YW?wC6Wc@56h1S&EC+){d#!?7C}z~f2%Q1_~=S0R0P;L6@g zS150AMZboJHm&PL$+Z=A-8=OF z>f3^h usize::MAX, }; + // TODO: More efficient threading. Instead of creating batches up front, + // run a continuous pipeline of calculations in the background and only apply + // them when we have this current opportunity. + // This will require making stored light data `Arc`ed and double-buffered or atomic, + // so it can be consulted by the calculation. + #[cfg(feature = "threads")] + while self.light_update_queue.len() > 0 { + // TODO: empirical tuning suggests that 128 is a good minimum batch size, + // but is too big for the amount of time we want to take + let some_updates: [Option; 32] = + [None::; 32].map(|_| self.light_update_queue.pop()); + let outputs = some_updates + .as_slice() + .into_par_iter() + .flatten() + .map(|&LightUpdateRequest { cube, .. }| self.compute_lighting(cube)) + .collect::>>(); + for output in outputs { + if false { + // Log cubes that were updated for debug visualization. + self.last_light_updates.push(output.cube); + } + light_update_count += 1; + let (difference, cube_cost) = self.apply_lighting_update(output); + max_difference = max_difference.max(difference); + cost += cube_cost; + } + + if cost >= max_cost { + break; + } + } + + #[cfg(not(feature = "threads"))] while let Some(LightUpdateRequest { cube, .. }) = self.light_update_queue.pop() { if false { // Log cubes that were updated for debug visualization. @@ -88,7 +125,7 @@ impl Space { let computation = self.compute_lighting(cube); - let (difference, cube_cost) = self.apply_lighting_update(cube, computation); + let (difference, cube_cost) = self.apply_lighting_update(computation); max_difference = max_difference.max(difference); cost += cube_cost; if cost >= max_cost { @@ -117,10 +154,10 @@ impl Space { #[inline] fn apply_lighting_update( &mut self, - cube: Cube, computation: ComputedLight<()>, ) -> (PackedLightScalar, usize) { let ComputedLight { + cube, light: new_light_value, dependencies, mut cost, @@ -221,6 +258,7 @@ impl Space { let new_light_value = cube_buffer.finish(origin_is_opaque); ComputedLight { + cube, light: new_light_value, dependencies: cube_buffer.dependencies, cost: cube_buffer.cost, @@ -531,6 +569,8 @@ impl LightBuffer { #[derive(Clone, Debug)] #[doc(hidden)] // used for debug rendering pub struct ComputedLight { + pub cube: Cube, + pub light: PackedLight, /// Cubes which the computed value depends on (imprecisely; empty cubes passed through /// are not listed).