From 7ca07db0d6968cd878126c9c7fd4fe61b1210463 Mon Sep 17 00:00:00 2001 From: Xu Date: Mon, 13 Jan 2025 17:08:00 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20=E4=BC=98=E5=8C=96=20FSRCNNX?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Effects/CAS/CAS.hlsl | 8 +- src/Effects/FSRCNNX/FSRCNNX.hlsl | 436 +++++++------- src/Effects/FSRCNNX/FSRCNNX_LineArt.hlsl | 723 ++++++++++++----------- src/Magpie/AppSettings.cpp | 2 + 4 files changed, 588 insertions(+), 581 deletions(-) diff --git a/src/Effects/CAS/CAS.hlsl b/src/Effects/CAS/CAS.hlsl index 40f6ed1fa..841eb1e8c 100644 --- a/src/Effects/CAS/CAS.hlsl +++ b/src/Effects/CAS/CAS.hlsl @@ -259,18 +259,18 @@ void Pass1(uint2 blockStart, uint3 threadId) { MF2 pixR, pixG, pixB; CasFilterH(src, 0, peak, pixR, pixG, pixB); - OUTPUT[gxy] = MF4(MF3(pixR.x, pixG.x, pixB.x), 1); + OUTPUT[gxy] = MF4(pixR.x, pixG.x, pixB.x, 1); ++gxy.x; - OUTPUT[gxy] = MF4(MF3(pixR.y, pixG.y, pixB.y), 1); + OUTPUT[gxy] = MF4(pixR.y, pixG.y, pixB.y, 1); CasFilterH(src, 1, peak, pixR, pixG, pixB); ++gxy.y; - OUTPUT[gxy] = MF4(MF3(pixR.y, pixG.y, pixB.y), 1); + OUTPUT[gxy] = MF4(pixR.y, pixG.y, pixB.y, 1); --gxy.x; - OUTPUT[gxy] = MF4(MF3(pixR.x, pixG.x, pixB.x), 1); + OUTPUT[gxy] = MF4(pixR.x, pixG.x, pixB.x, 1); #else OUTPUT[gxy] = MF4(CasFilter(src, uint2(1, 1), peak), 1); diff --git a/src/Effects/FSRCNNX/FSRCNNX.hlsl b/src/Effects/FSRCNNX/FSRCNNX.hlsl index 67364a843..5be12807a 100644 --- a/src/Effects/FSRCNNX/FSRCNNX.hlsl +++ b/src/Effects/FSRCNNX/FSRCNNX.hlsl @@ -4,7 +4,9 @@ //!MAGPIE EFFECT //!VERSION 4 -//!USE FP16 +//!USE FP16, MulAdd + +#include "..\StubDefs.hlsli" //!TEXTURE @@ -116,58 +118,58 @@ void Pass1(uint2 blockStart, uint3 threadId) { } MF4 target1 = MF4(-0.1572492271661758, -0.0120896836742759, 0.0061487639322877, -0.2852848768234253); - target1 += MF4(-0.0047900392673910, 0.0537447109818459, -0.0000247144635068, 0.0066653941757977) * src[0][0]; - target1 += MF4(0.0073144687339664, -0.0309004038572311, -0.0109181385487318, -0.0092840325087309) * src[0][1]; - target1 += MF4(0.0591700896620750, 0.1974907070398331, -0.0197357516735792, -0.0546554848551750) * src[0][2]; - target1 += MF4(-0.0011764382943511, -0.0299451071768999, 0.0229587312787771, 0.0021908886265010) * src[0][3]; - target1 += MF4(0.0098101310431957, 0.0080995410680771, -0.0030452020000666, -0.0132035519927740) * src[0][4]; - target1 += MF4(-0.0168330334126949, -0.0743711441755295, -0.0259261634200811, 0.0234480481594801) * src[1][0]; - target1 += MF4(0.0239933785051107, 0.1896541714668274, 0.0207756329327822, -0.0370332375168800) * src[1][1]; - target1 += MF4(0.0094799501821399, -0.0652511194348335, -0.0004292793164495, -0.0726212188601494) * src[1][2]; - target1 += MF4(0.0297284796833992, -0.1210186630487442, -0.0202929321676493, -0.0574462898075581) * src[1][3]; - target1 += MF4(-0.0318185277283192, 0.0840775370597839, 0.0110451309010386, 0.0415569432079792) * src[1][4]; - target1 += MF4(-0.0253141783177853, 0.1168256178498268, 0.1159729585051537, 0.0963164269924164) * src[2][0]; - target1 += MF4(-0.1103615835309029, -0.0276833958923817, -0.4999594092369080, 0.1053867191076279) * src[2][1]; - target1 += MF4(1.1100435256958008, 0.0646764487028122, 0.0154005717486143, 0.8891586661338806) * src[2][2]; - target1 += MF4(0.1229330673813820, 0.1719468832015991, 0.5730338096618652, -0.1645544171333313) * src[2][3]; - target1 += MF4(-0.0090442728251219, -0.3023961782455444, -0.1589493155479431, 0.0418574027717113) * src[2][4]; - target1 += MF4(0.0031942036002874, -0.1310926079750061, 0.0075543406419456, -0.0016449346439913) * src[3][0]; - target1 += MF4(-0.0995150282979012, -0.0701921209692955, -0.0130895879119635, 0.1344170123338699) * src[3][1]; - target1 += MF4(0.0060519003309309, -0.1533465683460236, 0.0114194005727768, 0.0264683905988932) * src[3][2]; - target1 += MF4(0.0244008023291826, 0.1881769001483917, -0.0206351149827242, -0.0628309547901154) * src[3][3]; - target1 += MF4(0.0075713125988841, 0.0508594363927841, 0.0430423170328140, -0.0124188791960478) * src[3][4]; - target1 += MF4(-0.0166875869035721, -0.0047865519300103, 0.0006719123339280, 0.0316803231835365) * src[4][0]; - target1 += MF4(-0.0058461269363761, 0.0990798473358154, -0.0177743826061487, -0.0066122291609645) * src[4][1]; - target1 += MF4(-0.0972401946783066, -0.0225446373224258, -0.0037693574558944, 0.1953062713146210) * src[4][2]; - target1 += MF4(-0.0216837190091610, -0.1824268400669098, 0.0069816261529922, 0.0283037684857845) * src[4][3]; - target1 += MF4(-0.0025767991319299, 0.0459827110171318, -0.0080216089263558, 0.0084134787321091) * src[4][4]; + target1 = mad(src[0][0], MF4(-0.0047900392673910, 0.0537447109818459, -0.0000247144635068, 0.0066653941757977), target1); + target1 = mad(src[0][1], MF4(0.0073144687339664, -0.0309004038572311, -0.0109181385487318, -0.0092840325087309), target1); + target1 = mad(src[0][2], MF4(0.0591700896620750, 0.1974907070398331, -0.0197357516735792, -0.0546554848551750), target1); + target1 = mad(src[0][3], MF4(-0.0011764382943511, -0.0299451071768999, 0.0229587312787771, 0.0021908886265010), target1); + target1 = mad(src[0][4], MF4(0.0098101310431957, 0.0080995410680771, -0.0030452020000666, -0.0132035519927740), target1); + target1 = mad(src[1][0], MF4(-0.0168330334126949, -0.0743711441755295, -0.0259261634200811, 0.0234480481594801), target1); + target1 = mad(src[1][1], MF4(0.0239933785051107, 0.1896541714668274, 0.0207756329327822, -0.0370332375168800), target1); + target1 = mad(src[1][2], MF4(0.0094799501821399, -0.0652511194348335, -0.0004292793164495, -0.0726212188601494), target1); + target1 = mad(src[1][3], MF4(0.0297284796833992, -0.1210186630487442, -0.0202929321676493, -0.0574462898075581), target1); + target1 = mad(src[1][4], MF4(-0.0318185277283192, 0.0840775370597839, 0.0110451309010386, 0.0415569432079792), target1); + target1 = mad(src[2][0], MF4(-0.0253141783177853, 0.1168256178498268, 0.1159729585051537, 0.0963164269924164), target1); + target1 = mad(src[2][1], MF4(-0.1103615835309029, -0.0276833958923817, -0.4999594092369080, 0.1053867191076279), target1); + target1 = mad(src[2][2], MF4(1.1100435256958008, 0.0646764487028122, 0.0154005717486143, 0.8891586661338806), target1); + target1 = mad(src[2][3], MF4(0.1229330673813820, 0.1719468832015991, 0.5730338096618652, -0.1645544171333313), target1); + target1 = mad(src[2][4], MF4(-0.0090442728251219, -0.3023961782455444, -0.1589493155479431, 0.0418574027717113), target1); + target1 = mad(src[3][0], MF4(0.0031942036002874, -0.1310926079750061, 0.0075543406419456, -0.0016449346439913), target1); + target1 = mad(src[3][1], MF4(-0.0995150282979012, -0.0701921209692955, -0.0130895879119635, 0.1344170123338699), target1); + target1 = mad(src[3][2], MF4(0.0060519003309309, -0.1533465683460236, 0.0114194005727768, 0.0264683905988932), target1); + target1 = mad(src[3][3], MF4(0.0244008023291826, 0.1881769001483917, -0.0206351149827242, -0.0628309547901154), target1); + target1 = mad(src[3][4], MF4(0.0075713125988841, 0.0508594363927841, 0.0430423170328140, -0.0124188791960478), target1); + target1 = mad(src[4][0], MF4(-0.0166875869035721, -0.0047865519300103, 0.0006719123339280, 0.0316803231835365), target1); + target1 = mad(src[4][1], MF4(-0.0058461269363761, 0.0990798473358154, -0.0177743826061487, -0.0066122291609645), target1); + target1 = mad(src[4][2], MF4(-0.0972401946783066, -0.0225446373224258, -0.0037693574558944, 0.1953062713146210), target1); + target1 = mad(src[4][3], MF4(-0.0216837190091610, -0.1824268400669098, 0.0069816261529922, 0.0283037684857845), target1); + target1 = mad(src[4][4], MF4(-0.0025767991319299, 0.0459827110171318, -0.0080216089263558, 0.0084134787321091), target1); MF4 target2 = MF4(0.0541447550058365, 0.0088306749239564, -0.0112389577552676, -0.0127860950306058); - target2 += MF4(0.0142660010606050, 0.0137931071221828, 0.0061188107356429, -0.0104134222492576) * src[0][0]; - target2 += MF4(0.0147292809560895, -0.0289912857115269, 0.0266769435256720, 0.0933856964111328) * src[0][1]; - target2 += MF4(-0.1734338253736496, 0.1116316691040993, -0.1973157376050949, -0.0581855811178684) * src[0][2]; - target2 += MF4(0.0347507223486900, -0.0341566652059555, 0.0061667622067034, 0.0075258882716298) * src[0][3]; - target2 += MF4(0.0069884369149804, -0.0194250214844942, 0.0080830128863454, -0.0036874092184007) * src[0][4]; - target2 += MF4(0.0233764201402664, 0.0344744995236397, 0.0162145942449570, 0.0979529991745949) * src[1][0]; - target2 += MF4(0.1280796974897385, -0.1018339172005653, -0.0132977198809385, -0.0019474622095004) * src[1][1]; - target2 += MF4(0.4286882579326630, 0.1222677752375603, 0.7046694159507751, 0.0945475697517395) * src[1][2]; - target2 += MF4(0.1107441782951355, -0.0134433070197701, -0.0174900908023119, -0.1686445474624634) * src[1][3]; - target2 += MF4(0.0321478620171547, 0.0065357843413949, 0.0300805997103453, 0.0420113280415535) * src[1][4]; - target2 += MF4(-0.1240341588854790, 0.0950303301215172, -0.0129648456349969, -0.2681856453418732) * src[2][0]; - target2 += MF4(0.4846960902214050, 0.0351924635469913, 0.0223043337464333, -0.1273630708456039) * src[2][1]; - target2 += MF4(-1.9379507303237915, -0.2444442063570023, 0.0291962660849094, -0.3835578560829163) * src[2][2]; - target2 += MF4(0.6396278142929077, -0.0765938311815262, -0.0552659817039967, 0.4393545985221863) * src[2][3]; - target2 += MF4(-0.1969728022813797, -0.0607173256576061, 0.0131113547831774, 0.0542017817497253) * src[2][4]; - target2 += MF4(0.0091696009039879, -0.0031533432193100, -0.0368777588009834, -0.0459998287260532) * src[3][0]; - target2 += MF4(0.1096992492675781, 0.2597902715206146, 0.0304869692772627, -0.0195200722664595) * src[3][1]; - target2 += MF4(0.2889648377895355, -0.4275591969490051, -0.7414156794548035, 0.2695442438125610) * src[3][2]; - target2 += MF4(0.0892018377780914, -0.0229137558490038, 0.0244414471089840, -0.1926898956298828) * src[3][3]; - target2 += MF4(0.0576358586549759, 0.0027846973389387, -0.0036861505359411, -0.0253547113388777) * src[3][4]; - target2 += MF4(0.0159624069929123, 0.0319602824747562, 0.0019470085389912, 0.0089780492708087) * src[4][0]; - target2 += MF4(0.0552792511880398, 0.0543054342269897, 0.0134062822908163, 0.0545728243887424) * src[4][1]; - target2 += MF4(-0.1170092225074768, 0.1963327825069427, 0.1503890156745911, 0.1891828328371048) * src[4][2]; - target2 += MF4(-0.0084421783685684, 0.1297017931938171, -0.0330600887537003, -0.0942063704133034) * src[4][3]; - target2 += MF4(0.0118440408259630, -0.0337875857949257, 0.0055063469335437, 0.0254479162395000) * src[4][4]; + target2 = mad(src[0][0], MF4(0.0142660010606050, 0.0137931071221828, 0.0061188107356429, -0.0104134222492576), target2); + target2 = mad(src[0][1], MF4(0.0147292809560895, -0.0289912857115269, 0.0266769435256720, 0.0933856964111328), target2); + target2 = mad(src[0][2], MF4(-0.1734338253736496, 0.1116316691040993, -0.1973157376050949, -0.0581855811178684), target2); + target2 = mad(src[0][3], MF4(0.0347507223486900, -0.0341566652059555, 0.0061667622067034, 0.0075258882716298), target2); + target2 = mad(src[0][4], MF4(0.0069884369149804, -0.0194250214844942, 0.0080830128863454, -0.0036874092184007), target2); + target2 = mad(src[1][0], MF4(0.0233764201402664, 0.0344744995236397, 0.0162145942449570, 0.0979529991745949), target2); + target2 = mad(src[1][1], MF4(0.1280796974897385, -0.1018339172005653, -0.0132977198809385, -0.0019474622095004), target2); + target2 = mad(src[1][2], MF4(0.4286882579326630, 0.1222677752375603, 0.7046694159507751, 0.0945475697517395), target2); + target2 = mad(src[1][3], MF4(0.1107441782951355, -0.0134433070197701, -0.0174900908023119, -0.1686445474624634), target2); + target2 = mad(src[1][4], MF4(0.0321478620171547, 0.0065357843413949, 0.0300805997103453, 0.0420113280415535), target2); + target2 = mad(src[2][0], MF4(-0.1240341588854790, 0.0950303301215172, -0.0129648456349969, -0.2681856453418732), target2); + target2 = mad(src[2][1], MF4(0.4846960902214050, 0.0351924635469913, 0.0223043337464333, -0.1273630708456039), target2); + target2 = mad(src[2][2], MF4(-1.9379507303237915, -0.2444442063570023, 0.0291962660849094, -0.3835578560829163), target2); + target2 = mad(src[2][3], MF4(0.6396278142929077, -0.0765938311815262, -0.0552659817039967, 0.4393545985221863), target2); + target2 = mad(src[2][4], MF4(-0.1969728022813797, -0.0607173256576061, 0.0131113547831774, 0.0542017817497253), target2); + target2 = mad(src[3][0], MF4(0.0091696009039879, -0.0031533432193100, -0.0368777588009834, -0.0459998287260532), target2); + target2 = mad(src[3][1], MF4(0.1096992492675781, 0.2597902715206146, 0.0304869692772627, -0.0195200722664595), target2); + target2 = mad(src[3][2], MF4(0.2889648377895355, -0.4275591969490051, -0.7414156794548035, 0.2695442438125610), target2); + target2 = mad(src[3][3], MF4(0.0892018377780914, -0.0229137558490038, 0.0244414471089840, -0.1926898956298828), target2); + target2 = mad(src[3][4], MF4(0.0576358586549759, 0.0027846973389387, -0.0036861505359411, -0.0253547113388777), target2); + target2 = mad(src[4][0], MF4(0.0159624069929123, 0.0319602824747562, 0.0019470085389912, 0.0089780492708087), target2); + target2 = mad(src[4][1], MF4(0.0552792511880398, 0.0543054342269897, 0.0134062822908163, 0.0545728243887424), target2); + target2 = mad(src[4][2], MF4(-0.1170092225074768, 0.1963327825069427, 0.1503890156745911, 0.1891828328371048), target2); + target2 = mad(src[4][3], MF4(-0.0084421783685684, 0.1297017931938171, -0.0330600887537003, -0.0942063704133034), target2); + target2 = mad(src[4][4], MF4(0.0118440408259630, -0.0337875857949257, 0.0055063469335437, 0.0254479162395000), target2); featureMap1[destPos] = target1; featureMap2[destPos] = target2; @@ -216,45 +218,45 @@ void Pass2(uint2 blockStart, uint3 threadId) { MF4 br2 = featureMap2.SampleLevel(sam, pos + inputPt, 0); MF4 target1 = MF4(-0.0445119962096214, -0.7632357478141785, 0.0156328510493040, -0.2424548566341400); - target1 += mul(tl1, MF4x4(0.1279004216194153, -0.0275541823357344, 0.2275633513927460, 0.2241709381341934, 0.0197204202413559, -0.0456816256046295, -0.1296672523021698, 0.0564568229019642, -0.0241488646715879, -0.0237508192658424, -0.1899632662534714, 0.4177669584751129, -0.1814560592174530, -0.0526473335921764, 0.1154382973909378, -0.0715614855289459)); - target1 += mul(tl2, MF4x4(-0.0660311505198479, 0.0416736751794815, 0.3146112561225891, 0.1472041457891464, -0.3456672728061676, -0.0055983816273510, 0.0022350433282554, 0.0819796621799469, 0.0057485047727823, 0.1532524228096008, 0.0204557459801435, -0.2500547170639038, -0.0524359568953514, -0.1911625266075134, -0.1078366711735725, -0.1296254843473434)); - target1 += mul(ml1, MF4x4(0.0904538556933403, -0.0150672039017081, 0.3322310745716095, 0.0638923197984695, 0.5975797176361084, -0.2452044337987900, -0.4947478473186493, -0.0783191770315170, 0.5771877169609070, -0.0870653912425041, -0.8966570496559143, -0.2140965163707733, -0.0493861362338066, -0.0380848757922649, -0.1345319598913193, -0.0186063013970852)); - target1 += mul(ml2, MF4x4(-0.2523841261863708, 0.1387074738740921, 0.7878478765487671, -0.2251627445220947, 0.2277439534664154, 0.5417668819427490, 0.0866540968418121, -0.1707777529954910, -0.0598246827721596, -0.4717158675193787, -1.2242834568023682, 0.0454643070697784, -0.3503442704677582, 0.0573085807263851, 0.2530198395252228, -0.0207283068448305)); - target1 += mul(bl1, MF4x4(0.0168380383402109, -0.2142438590526581, -0.0207892972975969, 0.3628533780574799, 0.2431225180625916, 0.3098322153091431, 0.4073205888271332, -0.2762102782726288, -0.0197229012846947, 0.1305596232414246, -0.5697882771492004, -0.2976251542568207, -0.0551432967185974, 0.2614036500453949, -0.1410341411828995, -0.2906406223773956)); - target1 += mul(bl2, MF4x4(-0.0498303361237049, 0.0224859956651926, 0.1952174901962280, -0.0311204437166452, 0.2501715123653412, -0.5893352627754211, -1.0793941020965576, 0.0160885509103537, 0.5081620812416077, 0.0482814386487007, 0.0546359121799469, -0.0501569248735905, 0.1400523334741592, -0.0106841633096337, -0.0940591320395470, -0.1791856139898300)); - target1 += mul(tc1, MF4x4(0.0393299944698811, 0.2232691347599030, -0.1055066883563995, -0.1607919186353683, -0.1567825973033905, -0.0042221010662615, -0.0548228211700916, 0.2352052628993988, 0.1483389288187027, 0.7503526806831360, 0.0797731876373291, -0.0049001369625330, -0.0242983382195234, -0.0308702979236841, 0.0828925222158432, 0.0561857633292675)); - target1 += mul(tc2, MF4x4(0.0926392748951912, -0.0418718457221985, -0.3060409128665924, -0.1883587390184402, 0.0284292586147785, -0.3584854304790497, -0.7909982800483704, -0.0187337957322598, -0.2496993243694305, -0.7520986795425415, 0.3771523833274841, -0.0259053874760866, 0.0337998159229755, 0.2209153026342392, 0.0708771497011185, -0.2814430892467499)); - target1 += mul(mc1, MF4x4(-0.5287809371948242, 0.5777525901794434, 0.0880500450730324, -0.8452472090721130, -0.3393408954143524, -0.2273543328046799, -0.1298527419567108, 0.4990308582782745, 1.2613251209259033, -0.7636719942092896, 1.5694186687469482, -0.4087363779544830, 0.0874531939625740, 0.7067158818244934, -0.3419588804244995, -0.3265531957149506)); - target1 += mul(mc2, MF4x4(0.8229957222938538, -0.1236215904355049, -0.1859253048896790, 1.6684840917587280, 0.2000777721405029, -0.1239093989133835, 1.5623438358306885, 0.1779983490705490, 0.1017884835600853, -0.3707404434680939, 1.0626678466796875, -0.3124029338359833, 0.0659058541059494, -0.3585464656352997, -0.1866402775049210, 0.6733445525169373)); - target1 += mul(bc1, MF4x4(-0.5544115900993347, -0.1892931908369064, 0.2460739761590958, -0.1056193932890892, -0.4318082630634308, 0.1257930994033813, -0.2672747671604156, -0.1690235435962677, 0.0018221997888759, -0.4397548139095306, -0.3007801771163940, 0.1068472340703011, 0.3506655991077423, 0.1143834441900253, 0.1363849341869354, -0.1417382210493088)); - target1 += mul(bc2, MF4x4(-0.0505668744444847, 0.1831464916467667, 0.3957343697547913, -0.2295413911342621, -0.3892803490161896, 0.5436951518058777, 0.1217770799994469, 0.0223295800387859, -0.4462866187095642, -0.4055982232093811, -0.3771279454231262, 0.0807068347930908, 0.2116729617118835, 0.0281026475131512, -0.0229265503585339, 0.2868605256080627)); - target1 += mul(tr1, MF4x4(0.1962712109088898, -0.2373334914445877, -2.5208437442779541, -0.1988540291786194, 0.2224564403295517, -0.1783192902803421, -0.3962321281433105, -0.1685980409383774, 0.1910390257835388, 0.2554391324520111, 0.4586416482925415, 0.2779130041599274, -0.2002453953027725, -0.0061091855168343, 1.3808131217956543, 0.0434907525777817)); - target1 += mul(tr2, MF4x4(-0.0307611189782619, -0.0524470545351505, -0.5897512435913086, -0.0816674903035164, 0.4052906930446625, 0.2542210817337036, -1.9041002988815308, 0.0835462361574173, -0.2484460622072220, -0.0184739269316196, 0.4510098397731781, 0.2587619423866272, 0.1537084281444550, 0.1503131389617920, -0.0742949545383453, 0.0613216012716293)); - target1 += mul(mr1, MF4x4(0.1772638261318207, 0.0948876664042473, 0.0083848545327783, -0.2919732332229614, 0.2566950321197510, 0.0288751143962145, -0.4624863862991333, -0.0608786940574646, 0.3310996592044830, -0.0104284398257732, 0.6334818005561829, -0.0027201652992517, -0.0342350602149963, 0.1938806027173996, -0.2464301586151123, 0.0125883584842086)); - target1 += mul(mr2, MF4x4(0.4839433431625366, -0.0502159744501114, -1.1114163398742676, -0.3965759575366974, 0.2117286175489426, 0.0414481423795223, -0.1332397013902664, -0.0549883767962456, -0.1275007277727127, 0.7844302654266357, -0.0095163453370333, 0.0961041301488876, -0.4759134948253632, -0.4284025132656097, -0.2072399407625198, -0.3953579664230347)); - target1 += mul(br1, MF4x4(0.1605869531631470, -0.1715892106294632, 0.0865620598196983, -0.0464400537312031, -0.2688548862934113, 0.1722514480352402, 0.0167612321674824, -0.0032994034700096, -0.3451044559478760, -0.2280300110578537, -0.0029796555172652, -0.1597652435302734, 0.0500137843191624, 0.1023071259260178, -0.0407028235495090, 0.2228624969720840)); - target1 += mul(br2, MF4x4(0.6999920010566711, 0.0839441940188408, 0.0815469548106194, -0.1509176045656204, -0.0690853074193001, -0.3200871348381042, 0.0780162736773491, -0.1449639797210693, 0.2868815064430237, 0.3962450027465820, -0.3439113497734070, 0.2657423913478851, 0.0988137871026993, 0.3471299111843109, -0.2186402678489685, -0.0648017078638077)); + target1 = MulAdd(tl1, MF4x4(0.1279004216194153, -0.0275541823357344, 0.2275633513927460, 0.2241709381341934, 0.0197204202413559, -0.0456816256046295, -0.1296672523021698, 0.0564568229019642, -0.0241488646715879, -0.0237508192658424, -0.1899632662534714, 0.4177669584751129, -0.1814560592174530, -0.0526473335921764, 0.1154382973909378, -0.0715614855289459), target1); + target1 = MulAdd(tl2, MF4x4(-0.0660311505198479, 0.0416736751794815, 0.3146112561225891, 0.1472041457891464, -0.3456672728061676, -0.0055983816273510, 0.0022350433282554, 0.0819796621799469, 0.0057485047727823, 0.1532524228096008, 0.0204557459801435, -0.2500547170639038, -0.0524359568953514, -0.1911625266075134, -0.1078366711735725, -0.1296254843473434), target1); + target1 = MulAdd(ml1, MF4x4(0.0904538556933403, -0.0150672039017081, 0.3322310745716095, 0.0638923197984695, 0.5975797176361084, -0.2452044337987900, -0.4947478473186493, -0.0783191770315170, 0.5771877169609070, -0.0870653912425041, -0.8966570496559143, -0.2140965163707733, -0.0493861362338066, -0.0380848757922649, -0.1345319598913193, -0.0186063013970852), target1); + target1 = MulAdd(ml2, MF4x4(-0.2523841261863708, 0.1387074738740921, 0.7878478765487671, -0.2251627445220947, 0.2277439534664154, 0.5417668819427490, 0.0866540968418121, -0.1707777529954910, -0.0598246827721596, -0.4717158675193787, -1.2242834568023682, 0.0454643070697784, -0.3503442704677582, 0.0573085807263851, 0.2530198395252228, -0.0207283068448305), target1); + target1 = MulAdd(bl1, MF4x4(0.0168380383402109, -0.2142438590526581, -0.0207892972975969, 0.3628533780574799, 0.2431225180625916, 0.3098322153091431, 0.4073205888271332, -0.2762102782726288, -0.0197229012846947, 0.1305596232414246, -0.5697882771492004, -0.2976251542568207, -0.0551432967185974, 0.2614036500453949, -0.1410341411828995, -0.2906406223773956), target1); + target1 = MulAdd(bl2, MF4x4(-0.0498303361237049, 0.0224859956651926, 0.1952174901962280, -0.0311204437166452, 0.2501715123653412, -0.5893352627754211, -1.0793941020965576, 0.0160885509103537, 0.5081620812416077, 0.0482814386487007, 0.0546359121799469, -0.0501569248735905, 0.1400523334741592, -0.0106841633096337, -0.0940591320395470, -0.1791856139898300), target1); + target1 = MulAdd(tc1, MF4x4(0.0393299944698811, 0.2232691347599030, -0.1055066883563995, -0.1607919186353683, -0.1567825973033905, -0.0042221010662615, -0.0548228211700916, 0.2352052628993988, 0.1483389288187027, 0.7503526806831360, 0.0797731876373291, -0.0049001369625330, -0.0242983382195234, -0.0308702979236841, 0.0828925222158432, 0.0561857633292675), target1); + target1 = MulAdd(tc2, MF4x4(0.0926392748951912, -0.0418718457221985, -0.3060409128665924, -0.1883587390184402, 0.0284292586147785, -0.3584854304790497, -0.7909982800483704, -0.0187337957322598, -0.2496993243694305, -0.7520986795425415, 0.3771523833274841, -0.0259053874760866, 0.0337998159229755, 0.2209153026342392, 0.0708771497011185, -0.2814430892467499), target1); + target1 = MulAdd(mc1, MF4x4(-0.5287809371948242, 0.5777525901794434, 0.0880500450730324, -0.8452472090721130, -0.3393408954143524, -0.2273543328046799, -0.1298527419567108, 0.4990308582782745, 1.2613251209259033, -0.7636719942092896, 1.5694186687469482, -0.4087363779544830, 0.0874531939625740, 0.7067158818244934, -0.3419588804244995, -0.3265531957149506), target1); + target1 = MulAdd(mc2, MF4x4(0.8229957222938538, -0.1236215904355049, -0.1859253048896790, 1.6684840917587280, 0.2000777721405029, -0.1239093989133835, 1.5623438358306885, 0.1779983490705490, 0.1017884835600853, -0.3707404434680939, 1.0626678466796875, -0.3124029338359833, 0.0659058541059494, -0.3585464656352997, -0.1866402775049210, 0.6733445525169373), target1); + target1 = MulAdd(bc1, MF4x4(-0.5544115900993347, -0.1892931908369064, 0.2460739761590958, -0.1056193932890892, -0.4318082630634308, 0.1257930994033813, -0.2672747671604156, -0.1690235435962677, 0.0018221997888759, -0.4397548139095306, -0.3007801771163940, 0.1068472340703011, 0.3506655991077423, 0.1143834441900253, 0.1363849341869354, -0.1417382210493088), target1); + target1 = MulAdd(bc2, MF4x4(-0.0505668744444847, 0.1831464916467667, 0.3957343697547913, -0.2295413911342621, -0.3892803490161896, 0.5436951518058777, 0.1217770799994469, 0.0223295800387859, -0.4462866187095642, -0.4055982232093811, -0.3771279454231262, 0.0807068347930908, 0.2116729617118835, 0.0281026475131512, -0.0229265503585339, 0.2868605256080627), target1); + target1 = MulAdd(tr1, MF4x4(0.1962712109088898, -0.2373334914445877, -2.5208437442779541, -0.1988540291786194, 0.2224564403295517, -0.1783192902803421, -0.3962321281433105, -0.1685980409383774, 0.1910390257835388, 0.2554391324520111, 0.4586416482925415, 0.2779130041599274, -0.2002453953027725, -0.0061091855168343, 1.3808131217956543, 0.0434907525777817), target1); + target1 = MulAdd(tr2, MF4x4(-0.0307611189782619, -0.0524470545351505, -0.5897512435913086, -0.0816674903035164, 0.4052906930446625, 0.2542210817337036, -1.9041002988815308, 0.0835462361574173, -0.2484460622072220, -0.0184739269316196, 0.4510098397731781, 0.2587619423866272, 0.1537084281444550, 0.1503131389617920, -0.0742949545383453, 0.0613216012716293), target1); + target1 = MulAdd(mr1, MF4x4(0.1772638261318207, 0.0948876664042473, 0.0083848545327783, -0.2919732332229614, 0.2566950321197510, 0.0288751143962145, -0.4624863862991333, -0.0608786940574646, 0.3310996592044830, -0.0104284398257732, 0.6334818005561829, -0.0027201652992517, -0.0342350602149963, 0.1938806027173996, -0.2464301586151123, 0.0125883584842086), target1); + target1 = MulAdd(mr2, MF4x4(0.4839433431625366, -0.0502159744501114, -1.1114163398742676, -0.3965759575366974, 0.2117286175489426, 0.0414481423795223, -0.1332397013902664, -0.0549883767962456, -0.1275007277727127, 0.7844302654266357, -0.0095163453370333, 0.0961041301488876, -0.4759134948253632, -0.4284025132656097, -0.2072399407625198, -0.3953579664230347), target1); + target1 = MulAdd(br1, MF4x4(0.1605869531631470, -0.1715892106294632, 0.0865620598196983, -0.0464400537312031, -0.2688548862934113, 0.1722514480352402, 0.0167612321674824, -0.0032994034700096, -0.3451044559478760, -0.2280300110578537, -0.0029796555172652, -0.1597652435302734, 0.0500137843191624, 0.1023071259260178, -0.0407028235495090, 0.2228624969720840), target1); + target1 = MulAdd(br2, MF4x4(0.6999920010566711, 0.0839441940188408, 0.0815469548106194, -0.1509176045656204, -0.0690853074193001, -0.3200871348381042, 0.0780162736773491, -0.1449639797210693, 0.2868815064430237, 0.3962450027465820, -0.3439113497734070, 0.2657423913478851, 0.0988137871026993, 0.3471299111843109, -0.2186402678489685, -0.0648017078638077), target1); target1 = max(target1, 0) + MF4(1.0311057567596436, 0.1051208898425102, 0.1158760935068130, 0.0466635078191757) * min(target1, 0); MF4 target2 = MF4(0.0713458731770515, -0.1403961777687073, -0.0019562745001167, 0.0153338573873043); - target2 += mul(tl1, MF4x4(-0.0950641855597496, -0.1496641039848328, -0.0653550028800964, 0.0655386000871658, -0.0118882004171610, 0.2012491524219513, -0.2844599783420563, -0.4794720113277435, 0.1128025799989700, -0.0173030979931355, -0.0558849945664406, -0.2957552075386047, 0.0128202112391591, 0.0199047476053238, -0.0091027505695820, -0.0789640173316002)); - target2 += mul(tl2, MF4x4(0.1597457975149155, -0.0476507246494293, 0.1466529071331024, 0.0859163030982018, 0.0797316282987595, -0.3380981683731079, 0.2370245009660721, -0.1145931258797646, -0.0352988094091415, -0.0444888733327389, -0.2100716233253479, 0.1305520236492157, -0.1359029710292816, 0.1097442805767059, 0.0449938289821148, -0.1155664771795273)); - target2 += mul(ml1, MF4x4(-0.0333916284143925, 0.2415594160556793, 0.0520512908697128, 0.1228107511997223, -0.0491011217236519, 0.4408806562423706, 0.4631956815719604, 0.2014560103416443, -0.3688595592975616, 0.0367180295288563, 0.2484581321477890, -0.1113442853093147, 0.1283355057239532, 0.0418004281818867, -0.0171243026852608, -0.1231943219900131)); - target2 += mul(ml2, MF4x4(0.3493446409702301, 0.4550022482872009, 0.0368724688887596, 0.0748724937438965, 0.5001406073570251, 0.0145555436611176, 0.1236629858613014, 0.3143120706081390, -0.1951988488435745, -0.0157914645969868, 0.0937998965382576, -0.2233840376138687, 0.5033411383628845, -0.3183194100856781, -0.2259195148944855, 0.3639536798000336)); - target2 += mul(bl1, MF4x4(-0.0742707476019859, -0.1287801116704941, -0.2533137500286102, 0.0666435658931732, -0.0185621567070484, 0.1427449285984039, -0.0724751204252243, -0.0781485065817833, -0.2270648330450058, -0.2314778864383698, 0.3814929425716400, -0.1655400246381760, 0.0408568829298019, -0.1139645278453827, 0.1797397136688232, -0.0245632305741310)); - target2 += mul(bl2, MF4x4(0.1184135973453522, 0.0439366139471531, 0.0225226897746325, -0.0038526873104274, 0.1292685419321060, 0.0629177838563919, 0.3455114960670471, -0.1857204884290695, -0.4921502172946930, -0.1171003505587578, 0.0188624169677496, -0.1101682260632515, 0.0676844567060471, 0.5154085755348206, -0.0898379907011986, 0.3413280248641968)); - target2 += mul(tc1, MF4x4(-0.2631838321685791, 0.0215514600276947, 0.3092688918113708, -0.0200904365628958, 0.0678770467638969, 0.1769931465387344, -0.3653681278228760, -0.3274513185024261, 0.4608019888401031, -0.1544784456491470, 0.1189439669251442, 0.7015876173973083, 0.2732816934585571, -0.0545057803392410, -0.3474545478820801, -0.0253226496279240)); - target2 += mul(tc2, MF4x4(0.0994316861033440, 0.0642566010355949, 0.2031503319740295, 0.2276959568262100, -0.1094077304005623, 0.4463521838188171, 0.0921792611479759, -0.3033096492290497, -0.0953373983502388, -0.1331395804882050, 0.2615413069725037, -0.2874414622783661, -0.0389687754213810, 0.0338272154331207, 0.2804331183433533, -0.3443813025951385)); - target2 += mul(mc1, MF4x4(-0.1806042939424515, -0.4840798676013947, 0.4222546219825745, 0.1238701492547989, 0.0117481639608741, -0.5986865758895874, 0.3057619929313660, 0.1934896260499954, -0.7086342573165894, -0.8567376136779785, 0.6944998502731323, -1.4599204063415527, 0.0886754393577576, -0.4293498098850250, -0.1524195969104767, 0.2418079674243927)); - target2 += mul(mc2, MF4x4(2.1706113815307617, 0.3525652289390564, -0.7008359432220459, -0.4825965166091919, -0.3203429281711578, 0.8500943183898926, -0.7993509769439697, 0.4329842329025269, 0.2106771767139435, 1.1103280782699585, 1.2092385292053223, 1.4814503192901611, -0.4147390127182007, -0.7046836614608765, -0.1443170011043549, -0.6811133027076721)); - target2 += mul(bc1, MF4x4(-0.1489356607198715, 0.1400019824504852, 0.2425604313611984, -0.2098473459482193, -0.1580564379692078, 0.1463224738836288, -0.2187854647636414, 0.5174596905708313, -0.0143817225471139, -0.0362622961401939, -0.0068237944506109, 0.4749472737312317, 0.2914732992649078, -0.3306328952312469, -0.2444777786731720, -0.1171946674585342)); - target2 += mul(bc2, MF4x4(0.0455239675939083, 0.3496046066284180, 0.1297491937875748, -0.2541095912456512, 0.3605501055717468, 0.2339573651552200, -0.0188565086573362, -0.0526181310415268, 0.1471424549818039, 0.8212822079658508, 0.0819099843502045, -0.0851665437221527, 0.3739568293094635, 0.1304695755243301, 0.1481167376041412, -0.2134698331356049)); - target2 += mul(tr1, MF4x4(-0.2076720446348190, -0.0932599306106567, 0.0648527294397354, -0.2374770641326904, -0.0927826911211014, 0.1848200261592865, 0.4131188094615936, 0.3280069231987000, -0.2099185734987259, 0.2130926996469498, -0.0362745784223080, 0.0191331822425127, 0.1590368449687958, 0.0303016249090433, 0.1207325309514999, 0.2451425045728683)); - target2 += mul(tr2, MF4x4(-0.0135009605437517, -0.0101303057745099, 0.0752487555146217, 0.0533373840153217, -0.0253537259995937, 0.1318614929914474, -0.1263181120157242, 0.0249524712562561, -0.1477261483669281, 0.3236559033393860, 0.0773291289806366, -0.1439673304557800, -0.2005890905857086, 0.0892757251858711, 0.0398719944059849, 0.3675192892551422)); - target2 += mul(mr1, MF4x4(-0.0193535499274731, -0.2256918102502823, 0.0341436080634594, 0.0795947611331940, 0.1496857404708862, -0.2784725725650787, -0.0582313314080238, -0.2786065340042114, -0.1666128039360046, -0.6534121036529541, 0.2695854306221008, -0.0179719906300306, 0.0015976354479790, 0.0139929885044694, -0.1706486046314240, -0.3274765610694885)); - target2 += mul(mr2, MF4x4(-0.7170836329460144, 0.0868831276893616, 0.1829078495502472, -0.0076045366004109, 0.1525912433862686, -0.2558896839618683, 0.0893209800124168, -0.3426039516925812, -0.2871107757091522, -0.2445062994956970, 0.1676304638385773, 0.2116415053606033, 0.0883995518088341, -0.3880331516265869, 0.2636835277080536, -0.2514505982398987)); - target2 += mul(br1, MF4x4(-0.1861270815134048, 0.2000686377286911, -0.1501186788082123, 0.1525203883647919, 0.1969228833913803, 0.1174068301916122, -0.1281060427427292, -0.0854888409376144, 0.0290613435208797, -0.0538076497614384, -0.0251582786440849, 0.0692845508456230, 0.0384319014847279, 0.2888138592243195, 0.1151804402470589, 0.0990421250462532)); - target2 += mul(br2, MF4x4(-0.0344385802745819, 0.1270371377468109, 0.0922426953911781, -0.0426749102771282, -0.1656492203474045, -0.3273328542709351, -0.0282224025577307, 0.1099396124482155, -0.1113230437040329, 0.2943290174007416, -0.2181112915277481, -0.3177657723426819, -0.1096536740660667, -0.0508293099701405, -0.0256164856255054, -0.0388228967785835)); + target2 = MulAdd(tl1, MF4x4(-0.0950641855597496, -0.1496641039848328, -0.0653550028800964, 0.0655386000871658, -0.0118882004171610, 0.2012491524219513, -0.2844599783420563, -0.4794720113277435, 0.1128025799989700, -0.0173030979931355, -0.0558849945664406, -0.2957552075386047, 0.0128202112391591, 0.0199047476053238, -0.0091027505695820, -0.0789640173316002), target2); + target2 = MulAdd(tl2, MF4x4(0.1597457975149155, -0.0476507246494293, 0.1466529071331024, 0.0859163030982018, 0.0797316282987595, -0.3380981683731079, 0.2370245009660721, -0.1145931258797646, -0.0352988094091415, -0.0444888733327389, -0.2100716233253479, 0.1305520236492157, -0.1359029710292816, 0.1097442805767059, 0.0449938289821148, -0.1155664771795273), target2); + target2 = MulAdd(ml1, MF4x4(-0.0333916284143925, 0.2415594160556793, 0.0520512908697128, 0.1228107511997223, -0.0491011217236519, 0.4408806562423706, 0.4631956815719604, 0.2014560103416443, -0.3688595592975616, 0.0367180295288563, 0.2484581321477890, -0.1113442853093147, 0.1283355057239532, 0.0418004281818867, -0.0171243026852608, -0.1231943219900131), target2); + target2 = MulAdd(ml2, MF4x4(0.3493446409702301, 0.4550022482872009, 0.0368724688887596, 0.0748724937438965, 0.5001406073570251, 0.0145555436611176, 0.1236629858613014, 0.3143120706081390, -0.1951988488435745, -0.0157914645969868, 0.0937998965382576, -0.2233840376138687, 0.5033411383628845, -0.3183194100856781, -0.2259195148944855, 0.3639536798000336), target2); + target2 = MulAdd(bl1, MF4x4(-0.0742707476019859, -0.1287801116704941, -0.2533137500286102, 0.0666435658931732, -0.0185621567070484, 0.1427449285984039, -0.0724751204252243, -0.0781485065817833, -0.2270648330450058, -0.2314778864383698, 0.3814929425716400, -0.1655400246381760, 0.0408568829298019, -0.1139645278453827, 0.1797397136688232, -0.0245632305741310), target2); + target2 = MulAdd(bl2, MF4x4(0.1184135973453522, 0.0439366139471531, 0.0225226897746325, -0.0038526873104274, 0.1292685419321060, 0.0629177838563919, 0.3455114960670471, -0.1857204884290695, -0.4921502172946930, -0.1171003505587578, 0.0188624169677496, -0.1101682260632515, 0.0676844567060471, 0.5154085755348206, -0.0898379907011986, 0.3413280248641968), target2); + target2 = MulAdd(tc1, MF4x4(-0.2631838321685791, 0.0215514600276947, 0.3092688918113708, -0.0200904365628958, 0.0678770467638969, 0.1769931465387344, -0.3653681278228760, -0.3274513185024261, 0.4608019888401031, -0.1544784456491470, 0.1189439669251442, 0.7015876173973083, 0.2732816934585571, -0.0545057803392410, -0.3474545478820801, -0.0253226496279240), target2); + target2 = MulAdd(tc2, MF4x4(0.0994316861033440, 0.0642566010355949, 0.2031503319740295, 0.2276959568262100, -0.1094077304005623, 0.4463521838188171, 0.0921792611479759, -0.3033096492290497, -0.0953373983502388, -0.1331395804882050, 0.2615413069725037, -0.2874414622783661, -0.0389687754213810, 0.0338272154331207, 0.2804331183433533, -0.3443813025951385), target2); + target2 = MulAdd(mc1, MF4x4(-0.1806042939424515, -0.4840798676013947, 0.4222546219825745, 0.1238701492547989, 0.0117481639608741, -0.5986865758895874, 0.3057619929313660, 0.1934896260499954, -0.7086342573165894, -0.8567376136779785, 0.6944998502731323, -1.4599204063415527, 0.0886754393577576, -0.4293498098850250, -0.1524195969104767, 0.2418079674243927), target2); + target2 = MulAdd(mc2, MF4x4(2.1706113815307617, 0.3525652289390564, -0.7008359432220459, -0.4825965166091919, -0.3203429281711578, 0.8500943183898926, -0.7993509769439697, 0.4329842329025269, 0.2106771767139435, 1.1103280782699585, 1.2092385292053223, 1.4814503192901611, -0.4147390127182007, -0.7046836614608765, -0.1443170011043549, -0.6811133027076721), target2); + target2 = MulAdd(bc1, MF4x4(-0.1489356607198715, 0.1400019824504852, 0.2425604313611984, -0.2098473459482193, -0.1580564379692078, 0.1463224738836288, -0.2187854647636414, 0.5174596905708313, -0.0143817225471139, -0.0362622961401939, -0.0068237944506109, 0.4749472737312317, 0.2914732992649078, -0.3306328952312469, -0.2444777786731720, -0.1171946674585342), target2); + target2 = MulAdd(bc2, MF4x4(0.0455239675939083, 0.3496046066284180, 0.1297491937875748, -0.2541095912456512, 0.3605501055717468, 0.2339573651552200, -0.0188565086573362, -0.0526181310415268, 0.1471424549818039, 0.8212822079658508, 0.0819099843502045, -0.0851665437221527, 0.3739568293094635, 0.1304695755243301, 0.1481167376041412, -0.2134698331356049), target2); + target2 = MulAdd(tr1, MF4x4(-0.2076720446348190, -0.0932599306106567, 0.0648527294397354, -0.2374770641326904, -0.0927826911211014, 0.1848200261592865, 0.4131188094615936, 0.3280069231987000, -0.2099185734987259, 0.2130926996469498, -0.0362745784223080, 0.0191331822425127, 0.1590368449687958, 0.0303016249090433, 0.1207325309514999, 0.2451425045728683), target2); + target2 = MulAdd(tr2, MF4x4(-0.0135009605437517, -0.0101303057745099, 0.0752487555146217, 0.0533373840153217, -0.0253537259995937, 0.1318614929914474, -0.1263181120157242, 0.0249524712562561, -0.1477261483669281, 0.3236559033393860, 0.0773291289806366, -0.1439673304557800, -0.2005890905857086, 0.0892757251858711, 0.0398719944059849, 0.3675192892551422), target2); + target2 = MulAdd(mr1, MF4x4(-0.0193535499274731, -0.2256918102502823, 0.0341436080634594, 0.0795947611331940, 0.1496857404708862, -0.2784725725650787, -0.0582313314080238, -0.2786065340042114, -0.1666128039360046, -0.6534121036529541, 0.2695854306221008, -0.0179719906300306, 0.0015976354479790, 0.0139929885044694, -0.1706486046314240, -0.3274765610694885), target2); + target2 = MulAdd(mr2, MF4x4(-0.7170836329460144, 0.0868831276893616, 0.1829078495502472, -0.0076045366004109, 0.1525912433862686, -0.2558896839618683, 0.0893209800124168, -0.3426039516925812, -0.2871107757091522, -0.2445062994956970, 0.1676304638385773, 0.2116415053606033, 0.0883995518088341, -0.3880331516265869, 0.2636835277080536, -0.2514505982398987), target2); + target2 = MulAdd(br1, MF4x4(-0.1861270815134048, 0.2000686377286911, -0.1501186788082123, 0.1525203883647919, 0.1969228833913803, 0.1174068301916122, -0.1281060427427292, -0.0854888409376144, 0.0290613435208797, -0.0538076497614384, -0.0251582786440849, 0.0692845508456230, 0.0384319014847279, 0.2888138592243195, 0.1151804402470589, 0.0990421250462532), target2); + target2 = MulAdd(br2, MF4x4(-0.0344385802745819, 0.1270371377468109, 0.0922426953911781, -0.0426749102771282, -0.1656492203474045, -0.3273328542709351, -0.0282224025577307, 0.1099396124482155, -0.1113230437040329, 0.2943290174007416, -0.2181112915277481, -0.3177657723426819, -0.1096536740660667, -0.0508293099701405, -0.0256164856255054, -0.0388228967785835), target2); target2 = max(target2, 0) + MF4(0.7142407894134521, 0.0686190053820610, 0.3999933302402496, -1.0247212648391724) * min(target2, 0); tex1[gxy] = target1; @@ -303,45 +305,45 @@ void Pass3(uint2 blockStart, uint3 threadId) { MF4 br2 = tex2.SampleLevel(sam, pos + inputPt, 0); MF4 target1 = MF4(0.0203563515096903, 0.1902436912059784, -0.0757935121655464, 0.0393617525696754); - target1 += mul(tl1, MF4x4(-0.1080558672547340, -0.0400269515812397, 0.1042881682515144, -0.1994346678256989, 0.0172465778887272, -0.0829331055283546, -0.1278677284717560, -0.0762506872415543, -0.0593080408871174, -0.0305212251842022, 0.1326192617416382, -0.3380933105945587, -0.0722763314843178, -0.1975518912076950, -0.0223602931946516, 0.2251029163599014)); - target1 += mul(tl2, MF4x4(0.1747678220272064, 0.0297168865799904, 0.1054855734109879, 0.0803295820951462, -0.0338115766644478, -0.3885377943515778, -0.3540246784687042, -0.0719623491168022, -0.0656022280454636, -0.0469004511833191, 0.1379419565200806, 0.0319863893091679, 0.0799935683608055, -0.0099127553403378, 0.1698455959558487, -0.0108015276491642)); - target1 += mul(ml1, MF4x4(0.1587898135185242, 0.3995443880558014, -0.0333226583898067, 0.2373267263174057, -0.1616930961608887, 0.0659186244010925, 0.0141129801049829, -0.0541022196412086, -0.5743742585182190, 0.1121487766504288, 0.4259817600250244, 0.0280795227736235, -0.3721714317798615, -0.3496374189853668, 0.0997273251414299, -0.0079920450225472)); - target1 += mul(ml2, MF4x4(0.0928084030747414, 0.3107658624649048, 0.1375299990177155, 0.1550617516040802, -0.0780353918671608, -0.0102957757189870, -0.2056752145290375, -0.3927979469299316, -1.2112152576446533, 0.0213295854628086, 0.1396545022726059, 0.0492016039788723, -0.0569122135639191, -0.1691886335611343, -0.1535325646400452, 0.2800904810428619)); - target1 += mul(bl1, MF4x4(0.2494744062423706, -0.0363066755235195, 0.0959179550409317, -0.0048101749271154, -0.0195793900638819, 0.0451166369020939, 0.1470773071050644, -0.0050059854984283, 0.2886958122253418, -0.3221147954463959, -0.7062104344367981, 0.1646659970283508, -0.0092520527541637, -0.1254461258649826, 0.0217506736516953, -0.0678806379437447)); - target1 += mul(bl2, MF4x4(-0.0686557441949844, -0.0414490625262260, -0.1855954080820084, 0.0264346338808537, -0.0296857114881277, -0.0431593284010887, 0.0669397041201591, -0.0946076661348343, -0.2036914378404617, -0.1336101740598679, -0.2099903970956802, -0.1327936947345734, -0.1002155169844627, -0.0368575826287270, -0.1660962998867035, 0.0728288888931274)); - target1 += mul(tc1, MF4x4(0.5504320859909058, 0.2939232587814331, 0.4704743027687073, 0.2129514217376709, 0.0843106731772423, -0.1978624463081360, -0.3298224806785583, 0.1919094175100327, 0.1980742365121841, -0.0644423812627792, 0.0091170109808445, -0.2124856859445572, 0.0804558470845222, -0.1130188927054405, -0.6276652812957764, 0.1861163526773453)); - target1 += mul(tc2, MF4x4(-0.3357668519020081, 0.2093413323163986, 0.4355416595935822, 0.1550502777099609, -0.6510964035987854, -0.1751857399940491, -0.2060168534517288, -0.1710205078125000, -0.1202360317111015, -0.2500316798686981, 0.1074745431542397, -0.2418434321880341, 0.0133954072371125, -0.0555886104702950, 0.1514673978090286, 0.2739115655422211)); - target1 += mul(mc1, MF4x4(-0.3006273508071899, -0.2699472010135651, -0.1982013583183289, -0.0032952548936009, 0.0307833012193441, 0.3671586215496063, -0.0966020002961159, -0.2836556434631348, 0.4297264218330383, 0.6171903610229492, 0.6723483800888062, 0.2705117464065552, -0.1438141316175461, -0.0873940736055374, -0.7001031041145325, -0.2052250355482101)); - target1 += mul(mc2, MF4x4(-0.2875024676322937, -1.6230558156967163, -0.6733398437500000, -0.9642448425292969, -0.1964960694313049, 0.2485812455415726, 0.1236900389194489, -1.1423941850662231, -0.0412602946162224, 0.3412002623081207, 0.3962794244289398, -0.2490761876106262, -0.0058065578341484, -0.4578708708286285, -0.2418260127305984, 0.5357795953750610)); - target1 += mul(bc1, MF4x4(0.0062361713498831, 0.1925230026245117, 0.0824977159500122, 0.0561275146901608, 0.0929671525955200, 0.0698546022176743, 0.3816939592361450, 0.0395248420536518, -0.0719512030482292, 0.0564917400479317, -0.1297784000635147, 0.1245511695742607, 0.0012355837970972, -0.0990515723824501, 0.4213519692420959, -0.1645816713571548)); - target1 += mul(bc2, MF4x4(-0.0611936338245869, -0.0220258161425591, -0.0040935277938843, -0.1060328409075737, -0.0583154149353504, -0.0171997752040625, 0.1058546081185341, 0.2793170809745789, -0.2339317053556442, -0.1972009539604187, -0.0600687190890312, -0.0684379041194916, 0.0243016034364700, -0.2111079394817352, -0.2042971849441528, 0.0724857896566391)); - target1 += mul(tr1, MF4x4(-0.0833447948098183, -0.0533220991492271, 0.0767802372574806, 0.1182348504662514, -0.0223299078643322, -0.0479344800114632, -0.0119727496057749, 0.0524821877479553, -0.0334780365228653, 0.0719002187252045, 0.0439689308404922, 0.0475181229412556, 0.0764308497309685, 0.0086713796481490, -0.1700707823038101, 0.06573542952537547)); - target1 += mul(tr2, MF4x4(0.1391696482896805, 0.0739523395895958, 0.0565792545676231, -0.0430364646017551, 0.0943084582686424, 0.0102064209058881, 0.0120795257389545, -0.0841303989291191, 0.1573246121406555, 0.0164279472082853, 0.0988841354846954, -0.1430613398551941, -0.0572808869183064, -0.0844292491674423, 0.0621565617620945, 0.0923799052834511)); - target1 += mul(mr1, MF4x4(-0.1223107874393463, -0.2441930323839188, -0.2410650849342346, -0.0162935722619295, 0.0695567727088928, -0.0028583710081875, -0.0059417244046926, 0.0715164169669151, -0.0668491795659065, -0.1499572396278381, 0.0869924053549767, 0.0553652904927731, 0.2729566097259521, 0.1370039582252502, -0.1282183527946472, -0.1451860070228577)); - target1 += mul(mr2, MF4x4(0.1331952214241028, 0.0021079662255943, -0.1116734445095062, -0.4168601930141449, 0.0534659475088120, 0.0037860786542296, -0.0366065911948681, 0.1047701835632324, 0.1491260826587677, 0.0782341659069061, 0.0949895009398460, -0.1160908639431000, -0.1057133302092552, -0.2699718773365021, -0.1193305626511574, 0.2142304331064224)); - target1 += mul(br1, MF4x4(0.0041565205901861, -0.1065499857068062, -0.0629659667611122, -0.1144768893718719, 0.0318886637687683, -0.0562519319355488, 0.0043422472663224, 0.0226082988083363, -0.1456198990345001, -0.2398656159639359, -0.2625046670436859, -0.0710547044873238, 0.0067904205061495, 0.0018544088816270, 0.1019348874688148, -0.0186133962124586)); - target1 += mul(br2, MF4x4(0.0732532218098640, 0.1516859829425812, 0.0580205544829369, 0.1968977004289627, -0.0066619524732232, -0.1597842127084732, -0.0990600511431694, -0.1059188917279243, 0.0718481168150902, -0.2222738713026047, -0.1675696671009064, -0.1500017195940018, -0.0568779110908508, -0.0582777932286263, -0.0844587534666061, -0.0263266414403915)); + target1 = MulAdd(tl1, MF4x4(-0.1080558672547340, -0.0400269515812397, 0.1042881682515144, -0.1994346678256989, 0.0172465778887272, -0.0829331055283546, -0.1278677284717560, -0.0762506872415543, -0.0593080408871174, -0.0305212251842022, 0.1326192617416382, -0.3380933105945587, -0.0722763314843178, -0.1975518912076950, -0.0223602931946516, 0.2251029163599014), target1); + target1 = MulAdd(tl2, MF4x4(0.1747678220272064, 0.0297168865799904, 0.1054855734109879, 0.0803295820951462, -0.0338115766644478, -0.3885377943515778, -0.3540246784687042, -0.0719623491168022, -0.0656022280454636, -0.0469004511833191, 0.1379419565200806, 0.0319863893091679, 0.0799935683608055, -0.0099127553403378, 0.1698455959558487, -0.0108015276491642), target1); + target1 = MulAdd(ml1, MF4x4(0.1587898135185242, 0.3995443880558014, -0.0333226583898067, 0.2373267263174057, -0.1616930961608887, 0.0659186244010925, 0.0141129801049829, -0.0541022196412086, -0.5743742585182190, 0.1121487766504288, 0.4259817600250244, 0.0280795227736235, -0.3721714317798615, -0.3496374189853668, 0.0997273251414299, -0.0079920450225472), target1); + target1 = MulAdd(ml2, MF4x4(0.0928084030747414, 0.3107658624649048, 0.1375299990177155, 0.1550617516040802, -0.0780353918671608, -0.0102957757189870, -0.2056752145290375, -0.3927979469299316, -1.2112152576446533, 0.0213295854628086, 0.1396545022726059, 0.0492016039788723, -0.0569122135639191, -0.1691886335611343, -0.1535325646400452, 0.2800904810428619), target1); + target1 = MulAdd(bl1, MF4x4(0.2494744062423706, -0.0363066755235195, 0.0959179550409317, -0.0048101749271154, -0.0195793900638819, 0.0451166369020939, 0.1470773071050644, -0.0050059854984283, 0.2886958122253418, -0.3221147954463959, -0.7062104344367981, 0.1646659970283508, -0.0092520527541637, -0.1254461258649826, 0.0217506736516953, -0.0678806379437447), target1); + target1 = MulAdd(bl2, MF4x4(-0.0686557441949844, -0.0414490625262260, -0.1855954080820084, 0.0264346338808537, -0.0296857114881277, -0.0431593284010887, 0.0669397041201591, -0.0946076661348343, -0.2036914378404617, -0.1336101740598679, -0.2099903970956802, -0.1327936947345734, -0.1002155169844627, -0.0368575826287270, -0.1660962998867035, 0.0728288888931274), target1); + target1 = MulAdd(tc1, MF4x4(0.5504320859909058, 0.2939232587814331, 0.4704743027687073, 0.2129514217376709, 0.0843106731772423, -0.1978624463081360, -0.3298224806785583, 0.1919094175100327, 0.1980742365121841, -0.0644423812627792, 0.0091170109808445, -0.2124856859445572, 0.0804558470845222, -0.1130188927054405, -0.6276652812957764, 0.1861163526773453), target1); + target1 = MulAdd(tc2, MF4x4(-0.3357668519020081, 0.2093413323163986, 0.4355416595935822, 0.1550502777099609, -0.6510964035987854, -0.1751857399940491, -0.2060168534517288, -0.1710205078125000, -0.1202360317111015, -0.2500316798686981, 0.1074745431542397, -0.2418434321880341, 0.0133954072371125, -0.0555886104702950, 0.1514673978090286, 0.2739115655422211), target1); + target1 = MulAdd(mc1, MF4x4(-0.3006273508071899, -0.2699472010135651, -0.1982013583183289, -0.0032952548936009, 0.0307833012193441, 0.3671586215496063, -0.0966020002961159, -0.2836556434631348, 0.4297264218330383, 0.6171903610229492, 0.6723483800888062, 0.2705117464065552, -0.1438141316175461, -0.0873940736055374, -0.7001031041145325, -0.2052250355482101), target1); + target1 = MulAdd(mc2, MF4x4(-0.2875024676322937, -1.6230558156967163, -0.6733398437500000, -0.9642448425292969, -0.1964960694313049, 0.2485812455415726, 0.1236900389194489, -1.1423941850662231, -0.0412602946162224, 0.3412002623081207, 0.3962794244289398, -0.2490761876106262, -0.0058065578341484, -0.4578708708286285, -0.2418260127305984, 0.5357795953750610), target1); + target1 = MulAdd(bc1, MF4x4(0.0062361713498831, 0.1925230026245117, 0.0824977159500122, 0.0561275146901608, 0.0929671525955200, 0.0698546022176743, 0.3816939592361450, 0.0395248420536518, -0.0719512030482292, 0.0564917400479317, -0.1297784000635147, 0.1245511695742607, 0.0012355837970972, -0.0990515723824501, 0.4213519692420959, -0.1645816713571548), target1); + target1 = MulAdd(bc2, MF4x4(-0.0611936338245869, -0.0220258161425591, -0.0040935277938843, -0.1060328409075737, -0.0583154149353504, -0.0171997752040625, 0.1058546081185341, 0.2793170809745789, -0.2339317053556442, -0.1972009539604187, -0.0600687190890312, -0.0684379041194916, 0.0243016034364700, -0.2111079394817352, -0.2042971849441528, 0.0724857896566391), target1); + target1 = MulAdd(tr1, MF4x4(-0.0833447948098183, -0.0533220991492271, 0.0767802372574806, 0.1182348504662514, -0.0223299078643322, -0.0479344800114632, -0.0119727496057749, 0.0524821877479553, -0.0334780365228653, 0.0719002187252045, 0.0439689308404922, 0.0475181229412556, 0.0764308497309685, 0.0086713796481490, -0.1700707823038101, 0.06573542952537547), target1); + target1 = MulAdd(tr2, MF4x4(0.1391696482896805, 0.0739523395895958, 0.0565792545676231, -0.0430364646017551, 0.0943084582686424, 0.0102064209058881, 0.0120795257389545, -0.0841303989291191, 0.1573246121406555, 0.0164279472082853, 0.0988841354846954, -0.1430613398551941, -0.0572808869183064, -0.0844292491674423, 0.0621565617620945, 0.0923799052834511), target1); + target1 = MulAdd(mr1, MF4x4(-0.1223107874393463, -0.2441930323839188, -0.2410650849342346, -0.0162935722619295, 0.0695567727088928, -0.0028583710081875, -0.0059417244046926, 0.0715164169669151, -0.0668491795659065, -0.1499572396278381, 0.0869924053549767, 0.0553652904927731, 0.2729566097259521, 0.1370039582252502, -0.1282183527946472, -0.1451860070228577), target1); + target1 = MulAdd(mr2, MF4x4(0.1331952214241028, 0.0021079662255943, -0.1116734445095062, -0.4168601930141449, 0.0534659475088120, 0.0037860786542296, -0.0366065911948681, 0.1047701835632324, 0.1491260826587677, 0.0782341659069061, 0.0949895009398460, -0.1160908639431000, -0.1057133302092552, -0.2699718773365021, -0.1193305626511574, 0.2142304331064224), target1); + target1 = MulAdd(br1, MF4x4(0.0041565205901861, -0.1065499857068062, -0.0629659667611122, -0.1144768893718719, 0.0318886637687683, -0.0562519319355488, 0.0043422472663224, 0.0226082988083363, -0.1456198990345001, -0.2398656159639359, -0.2625046670436859, -0.0710547044873238, 0.0067904205061495, 0.0018544088816270, 0.1019348874688148, -0.0186133962124586), target1); + target1 = MulAdd(br2, MF4x4(0.0732532218098640, 0.1516859829425812, 0.0580205544829369, 0.1968977004289627, -0.0066619524732232, -0.1597842127084732, -0.0990600511431694, -0.1059188917279243, 0.0718481168150902, -0.2222738713026047, -0.1675696671009064, -0.1500017195940018, -0.0568779110908508, -0.0582777932286263, -0.0844587534666061, -0.0263266414403915), target1); target1 = max(target1, 0) + MF4(-0.2459529191255569, 0.7563464641571045, -0.0705636814236641, -0.0094820559024811) * min(target1, 0); MF4 target2 = MF4(-0.0448397286236286, -0.1649267971515656, -0.1192543581128120, -0.0061073559336364); - target2 += mul(tl1, MF4x4(0.0724840760231018, -0.0480341166257858, -0.1082391515374184, -0.1447021961212158, 0.0723197236657143, 0.0481830574572086, 0.0009448126656935, 0.0353565886616707, -0.0653375908732414, 0.0029647622723132, -0.0016588598955423, -0.2075651884078979, 0.0403469167649746, 0.3929971158504486, 0.0342363268136978, 0.1427230089902878)); - target2 += mul(tl2, MF4x4(-0.0743464827537537, 0.1844420731067657, 0.0256296340376139, -0.2808582782745361, 0.0351609662175179, 0.3277008235454559, -0.0205841138958931, -0.5355809330940247, 0.0681906566023827, 0.2058052271604538, -0.0479847639799118, -0.3735262751579285, -0.0261550359427929, -0.1148884072899818, -0.2329017966985703, 0.0728458985686302)); - target2 += mul(ml1, MF4x4(-0.1236097738146782, 0.1251334398984909, -0.1339431256055832, 0.0198749266564846, -0.1325920224189758, -2.2431972026824951, -0.0680834427475929, -0.5671764612197876, -0.3431925177574158, -0.0983135104179382, -0.2207138091325760, -0.2374879121780396, 0.0127309206873178, 1.3076044321060181, 0.0848151743412018, -0.1928595900535583)); - target2 += mul(ml2, MF4x4(-0.0471093133091927, -0.1513628512620926, -0.0134263765066862, -0.1519252359867096, -0.5260242223739624, 0.2291621714830399, 0.4088975787162781, -0.4315340518951416, 0.0933236032724380, -1.0386694669723511, 0.0015958193689585, -0.2737887501716614, -0.0246253963559866, -0.2722961604595184, -0.1770633459091187, -0.2291279733181000)); - target2 += mul(bl1, MF4x4(-0.0017552347853780, 0.1903935521841049, -0.0740704238414764, -0.0917679518461227, 0.0323882810771465, -0.3029108047485352, 0.0532565414905548, -0.0651542618870735, 0.4868686199188232, 0.8539272546768188, 0.4151960313320160, 0.2619662582874298, -0.0413270294666290, 0.1404227763414383, 0.1027320474386215, 0.3274228572845459)); - target2 += mul(bl2, MF4x4(0.1828346252441406, 0.0274682324379683, -0.1169882863759995, 0.0327291004359722, 0.1786244213581085, -0.6569546461105347, -0.0609031207859516, -0.1676601022481918, -0.1481092721223831, 0.2889067530632019, 0.1246089115738869, 0.2203597426414490, -0.0366856977343559, 0.1539470851421356, 0.0069492300972342, -0.1544002443552017)); - target2 += mul(tc1, MF4x4(0.2073992937803268, -0.0717074573040009, -0.0196173377335072, -0.0956910699605942, 0.0728898122906685, 0.0484567955136299, 0.3063069283962250, -0.3200540542602539, 0.0291527546942234, -0.0265460256487131, 0.1168476045131683, -0.2479970753192902, 0.1224220171570778, 0.0745823010802269, 0.1868897676467896, -0.1958049237728119)); - target2 += mul(tc2, MF4x4(0.0019954447634518, -0.0225235987454653, 0.0812198966741562, 0.0295672398060560, -0.2016931176185608, -0.2239151000976562, -0.2481262385845184, -0.2381946444511414, -0.0520484372973442, -0.1200495883822441, 0.2121954560279846, -0.1573531329631805, -0.0198472067713737, 0.1001087054610252, -0.1084884032607079, -0.3126969039440155)); - target2 += mul(mc1, MF4x4(0.3838330209255219, 0.1678779572248459, 0.6496244072914124, 0.3783606290817261, -0.2198582738637924, -0.2351343184709549, -0.2852248847484589, 0.6310021877288818, 0.8083020448684692, 0.0039323624223471, -0.0901831910014153, 0.0797894075512886, -0.2271467447280884, 0.7082978487014771, 0.1513756662607193, 0.2188975960016251)); - target2 += mul(mc2, MF4x4(-0.2871031761169434, 0.2316448241472244, 0.4947948157787323, 0.3308620452880859, -0.0623455122113228, -0.1314185708761215, -0.2664661705493927, 0.8725078701972961, 0.4541083276271820, 0.1433589160442352, -1.1269453763961792, 0.6427971124649048, -0.1016561388969421, 0.3418317139148712, -0.0991155728697777, -1.0508837699890137)); - target2 += mul(bc1, MF4x4(-0.2179604172706604, 0.1258949041366577, -0.1155700981616974, -0.0536149404942989, -0.0140614463016391, -0.0091438721865416, -0.0501774959266186, -0.3570724725723267, -0.5832386016845703, 0.2004123181104660, 0.2986239194869995, -0.8139168024063110, 0.0142666567116976, 0.0681498944759369, 0.1293468028306961, -0.1001938357949257)); - target2 += mul(bc2, MF4x4(0.1952836811542511, -0.3092494010925293, 0.3063779771327972, 0.1934849917888641, 0.0746696740388870, -0.3533902466297150, -0.1269576102495193, -0.2237875163555145, 0.2470717132091522, -0.2640363574028015, -0.2862776815891266, 0.1740108281373978, -0.0963631942868233, 0.2631850540637970, 0.0400718413293362, -0.3590607047080994)); - target2 += mul(tr1, MF4x4(-0.5299927592277527, 0.0979989692568779, 0.1666737496852875, -0.1547524333000183, -0.0043443185277283, 0.1540203243494034, 0.0594348423182964, -0.0167275425046682, -0.1043610796332359, 0.0504250898957253, 0.0456700921058655, 0.2525034546852112, 0.2241353541612625, -0.1678503304719925, 0.1532667279243469, 0.2901742458343506)); - target2 += mul(tr2, MF4x4(0.0998796448111534, 0.0385462641716003, -0.0762400180101395, -0.1255892217159271, 0.0281430184841156, -0.0304958485066891, -0.1440480053424835, -0.1001605167984962, -0.2257689833641052, 0.2056092917919159, 0.0248535349965096, -0.1383949518203735, -0.0951708629727364, 0.0997417271137238, 0.0275330394506454, -0.5728432536125183)); - target2 += mul(mr1, MF4x4(0.4256163835525513, 0.1745115518569946, -0.2409395426511765, 0.3139856457710266, -0.0036795330233872, 0.1819283962249756, -0.0864531323313713, 0.0102691333740950, -0.3397279977798462, 0.1107075437903404, -0.0035228815395385, -0.2207705229520798, -0.1779139339923859, -0.2106117755174637, 0.0352664291858673, 0.3615589439868927)); - target2 += mul(mr2, MF4x4(-0.0345224253833294, -0.0669926702976227, 0.0907212942838669, -0.3758732676506042, -0.0452554710209370, -0.1134464666247368, -0.0358871109783649, -0.1858227252960205, -0.0233245138078928, -0.0495684742927551, 0.1976234614849091, -0.1165761798620224, -0.0340447537600994, 0.1095624342560768, 0.0110175255686045, -0.8269239664077759)); - target2 += mul(br1, MF4x4(-0.1379280686378479, 0.1004267781972885, 0.0723998174071312, -0.1510958224534988, 0.0610648579895496, 0.0451720170676708, -0.0231927260756493, -0.0251553766429424, 0.2306085377931595, 0.1033207178115845, -0.1316205114126205, 0.1130664870142937, -0.0458516106009483, -0.1152514070272446, -0.0088650323450565, -0.0214479379355907)); - target2 += mul(br2, MF4x4(-0.0545783303678036, -0.0620098188519478, 0.0347074456512928, 0.1096799224615097, 0.0036664425861090, -0.0413107499480247, 0.1443250179290771, -0.1161036714911461, -0.0061624986119568, -0.0252977479249239, 0.3230019211769104, -0.2536626160144806, -0.0565439648926258, 0.0827583819627762, -0.0071726376190782, -0.1983329951763153)); + target2 = MulAdd(tl1, MF4x4(0.0724840760231018, -0.0480341166257858, -0.1082391515374184, -0.1447021961212158, 0.0723197236657143, 0.0481830574572086, 0.0009448126656935, 0.0353565886616707, -0.0653375908732414, 0.0029647622723132, -0.0016588598955423, -0.2075651884078979, 0.0403469167649746, 0.3929971158504486, 0.0342363268136978, 0.1427230089902878), target2); + target2 = MulAdd(tl2, MF4x4(-0.0743464827537537, 0.1844420731067657, 0.0256296340376139, -0.2808582782745361, 0.0351609662175179, 0.3277008235454559, -0.0205841138958931, -0.5355809330940247, 0.0681906566023827, 0.2058052271604538, -0.0479847639799118, -0.3735262751579285, -0.0261550359427929, -0.1148884072899818, -0.2329017966985703, 0.0728458985686302), target2); + target2 = MulAdd(ml1, MF4x4(-0.1236097738146782, 0.1251334398984909, -0.1339431256055832, 0.0198749266564846, -0.1325920224189758, -2.2431972026824951, -0.0680834427475929, -0.5671764612197876, -0.3431925177574158, -0.0983135104179382, -0.2207138091325760, -0.2374879121780396, 0.0127309206873178, 1.3076044321060181, 0.0848151743412018, -0.1928595900535583), target2); + target2 = MulAdd(ml2, MF4x4(-0.0471093133091927, -0.1513628512620926, -0.0134263765066862, -0.1519252359867096, -0.5260242223739624, 0.2291621714830399, 0.4088975787162781, -0.4315340518951416, 0.0933236032724380, -1.0386694669723511, 0.0015958193689585, -0.2737887501716614, -0.0246253963559866, -0.2722961604595184, -0.1770633459091187, -0.2291279733181000), target2); + target2 = MulAdd(bl1, MF4x4(-0.0017552347853780, 0.1903935521841049, -0.0740704238414764, -0.0917679518461227, 0.0323882810771465, -0.3029108047485352, 0.0532565414905548, -0.0651542618870735, 0.4868686199188232, 0.8539272546768188, 0.4151960313320160, 0.2619662582874298, -0.0413270294666290, 0.1404227763414383, 0.1027320474386215, 0.3274228572845459), target2); + target2 = MulAdd(bl2, MF4x4(0.1828346252441406, 0.0274682324379683, -0.1169882863759995, 0.0327291004359722, 0.1786244213581085, -0.6569546461105347, -0.0609031207859516, -0.1676601022481918, -0.1481092721223831, 0.2889067530632019, 0.1246089115738869, 0.2203597426414490, -0.0366856977343559, 0.1539470851421356, 0.0069492300972342, -0.1544002443552017), target2); + target2 = MulAdd(tc1, MF4x4(0.2073992937803268, -0.0717074573040009, -0.0196173377335072, -0.0956910699605942, 0.0728898122906685, 0.0484567955136299, 0.3063069283962250, -0.3200540542602539, 0.0291527546942234, -0.0265460256487131, 0.1168476045131683, -0.2479970753192902, 0.1224220171570778, 0.0745823010802269, 0.1868897676467896, -0.1958049237728119), target2); + target2 = MulAdd(tc2, MF4x4(0.0019954447634518, -0.0225235987454653, 0.0812198966741562, 0.0295672398060560, -0.2016931176185608, -0.2239151000976562, -0.2481262385845184, -0.2381946444511414, -0.0520484372973442, -0.1200495883822441, 0.2121954560279846, -0.1573531329631805, -0.0198472067713737, 0.1001087054610252, -0.1084884032607079, -0.3126969039440155), target2); + target2 = MulAdd(mc1, MF4x4(0.3838330209255219, 0.1678779572248459, 0.6496244072914124, 0.3783606290817261, -0.2198582738637924, -0.2351343184709549, -0.2852248847484589, 0.6310021877288818, 0.8083020448684692, 0.0039323624223471, -0.0901831910014153, 0.0797894075512886, -0.2271467447280884, 0.7082978487014771, 0.1513756662607193, 0.2188975960016251), target2); + target2 = MulAdd(mc2, MF4x4(-0.2871031761169434, 0.2316448241472244, 0.4947948157787323, 0.3308620452880859, -0.0623455122113228, -0.1314185708761215, -0.2664661705493927, 0.8725078701972961, 0.4541083276271820, 0.1433589160442352, -1.1269453763961792, 0.6427971124649048, -0.1016561388969421, 0.3418317139148712, -0.0991155728697777, -1.0508837699890137), target2); + target2 = MulAdd(bc1, MF4x4(-0.2179604172706604, 0.1258949041366577, -0.1155700981616974, -0.0536149404942989, -0.0140614463016391, -0.0091438721865416, -0.0501774959266186, -0.3570724725723267, -0.5832386016845703, 0.2004123181104660, 0.2986239194869995, -0.8139168024063110, 0.0142666567116976, 0.0681498944759369, 0.1293468028306961, -0.1001938357949257), target2); + target2 = MulAdd(bc2, MF4x4(0.1952836811542511, -0.3092494010925293, 0.3063779771327972, 0.1934849917888641, 0.0746696740388870, -0.3533902466297150, -0.1269576102495193, -0.2237875163555145, 0.2470717132091522, -0.2640363574028015, -0.2862776815891266, 0.1740108281373978, -0.0963631942868233, 0.2631850540637970, 0.0400718413293362, -0.3590607047080994), target2); + target2 = MulAdd(tr1, MF4x4(-0.5299927592277527, 0.0979989692568779, 0.1666737496852875, -0.1547524333000183, -0.0043443185277283, 0.1540203243494034, 0.0594348423182964, -0.0167275425046682, -0.1043610796332359, 0.0504250898957253, 0.0456700921058655, 0.2525034546852112, 0.2241353541612625, -0.1678503304719925, 0.1532667279243469, 0.2901742458343506), target2); + target2 = MulAdd(tr2, MF4x4(0.0998796448111534, 0.0385462641716003, -0.0762400180101395, -0.1255892217159271, 0.0281430184841156, -0.0304958485066891, -0.1440480053424835, -0.1001605167984962, -0.2257689833641052, 0.2056092917919159, 0.0248535349965096, -0.1383949518203735, -0.0951708629727364, 0.0997417271137238, 0.0275330394506454, -0.5728432536125183), target2); + target2 = MulAdd(mr1, MF4x4(0.4256163835525513, 0.1745115518569946, -0.2409395426511765, 0.3139856457710266, -0.0036795330233872, 0.1819283962249756, -0.0864531323313713, 0.0102691333740950, -0.3397279977798462, 0.1107075437903404, -0.0035228815395385, -0.2207705229520798, -0.1779139339923859, -0.2106117755174637, 0.0352664291858673, 0.3615589439868927), target2); + target2 = MulAdd(mr2, MF4x4(-0.0345224253833294, -0.0669926702976227, 0.0907212942838669, -0.3758732676506042, -0.0452554710209370, -0.1134464666247368, -0.0358871109783649, -0.1858227252960205, -0.0233245138078928, -0.0495684742927551, 0.1976234614849091, -0.1165761798620224, -0.0340447537600994, 0.1095624342560768, 0.0110175255686045, -0.8269239664077759), target2); + target2 = MulAdd(br1, MF4x4(-0.1379280686378479, 0.1004267781972885, 0.0723998174071312, -0.1510958224534988, 0.0610648579895496, 0.0451720170676708, -0.0231927260756493, -0.0251553766429424, 0.2306085377931595, 0.1033207178115845, -0.1316205114126205, 0.1130664870142937, -0.0458516106009483, -0.1152514070272446, -0.0088650323450565, -0.0214479379355907), target2); + target2 = MulAdd(br2, MF4x4(-0.0545783303678036, -0.0620098188519478, 0.0347074456512928, 0.1096799224615097, 0.0036664425861090, -0.0413107499480247, 0.1443250179290771, -0.1161036714911461, -0.0061624986119568, -0.0252977479249239, 0.3230019211769104, -0.2536626160144806, -0.0565439648926258, 0.0827583819627762, -0.0071726376190782, -0.1983329951763153), target2); target2 = max(target2, 0) + MF4(-0.6312188506126404, -0.1215368881821632, 0.2487443536520004, 0.4051703512668610) * min(target2, 0); tex3[gxy] = target1; @@ -390,45 +392,45 @@ void Pass4(uint2 blockStart, uint3 threadId) { MF4 br2 = tex4.SampleLevel(sam, pos + inputPt, 0); MF4 target1 = MF4(-0.0410279631614685, -0.1111723631620407, -0.0406232848763466, -0.0939496159553528); - target1 += mul(tl1, MF4x4(0.1221675798296928, 0.0083215842023492, -0.0162804014980793, 0.0316714197397232, -0.2205813378095627, 0.1500435769557953, 0.2109555304050446, 0.2741867899894714, 0.0956874340772629, -0.0896854698657990, -0.1657065600156784, -0.1349759399890900, 0.0601499564945698, -0.1523845940828323, -0.1828087568283081, -0.2727653682231903)); - target1 += mul(tl2, MF4x4(-0.0918163508176804, 0.1564485579729080, 0.1133174449205399, 0.2215953171253204, -0.0623677000403404, -0.0497728772461414, -0.0372809022665024, -0.0258478187024593, -0.1364922970533371, 0.1053884625434875, 0.3292874991893768, 0.2693256139755249, -0.0347631797194481, -0.1470523178577423, 0.0096792401745915, -0.0542853325605392)); - target1 += mul(ml1, MF4x4(0.1331177949905396, -0.0964357852935791, -0.0706946700811386, 0.1593225002288818, -0.4815943241119385, 0.1224092170596123, -0.0870430991053581, 0.0005010276800022, -0.0242684502154589, -0.2256436049938202, 0.1367238312959671, 0.0474774017930031, 0.6886650323867798, -0.0065326127223670, 0.1841574758291245, -0.1354993879795074)); - target1 += mul(ml2, MF4x4(-0.1049591675400734, 0.0515934228897095, 0.1128631457686424, 0.1688040047883987, -0.0084041170775890, -0.0006375144002959, -0.0598374009132385, 0.1424416452646255, -0.0048398924991488, 0.1832167655229568, 0.0231959503144026, 0.0816788375377655, -0.1321710795164108, 0.0397678017616272, -0.0058345394209027, 0.5784573554992676)); - target1 += mul(bl1, MF4x4(0.1438693851232529, -0.0694608166813850, -0.0428275354206562, 0.1599996536970139, -0.1651254445314407, 0.1388883888721466, -0.0895452573895454, 0.2569831907749176, 0.3150432109832764, -0.0910519883036613, 0.0367441214621067, 0.1903669685125351, 0.2805841267108917, -0.0444608181715012, 0.0059385276399553, -0.2585869431495667)); - target1 += mul(bl2, MF4x4(-0.1217494234442711, 0.0191769022494555, -0.0065453462302685, 0.1391217857599258, 0.0998920649290085, -0.0162798929959536, 0.0502282194793224, 0.0370145924389362, 0.0290782172232866, -0.0099554909393191, 0.0142515478655696, 0.1248661577701569, -0.0076912571676075, 0.0251651499420404, 0.2190572917461395, 0.0020069130696356)); - target1 += mul(tc1, MF4x4(0.2666685581207275, -0.1625511497259140, -0.3938800692558289, -0.0253848694264889, 0.0987015441060066, 0.2033616453409195, 0.3128099143505096, 0.4608893990516663, 0.0620003379881382, -0.1389972567558289, -0.3095863461494446, -0.4023511111736298, -0.1105777546763420, 0.1115406602621078, 0.3639950752258301, 0.0645622834563255)); - target1 += mul(tc2, MF4x4(-0.2135885655879974, -0.1035343706607819, 0.1795026361942291, 0.1828210204839706, 0.0780984908342361, 0.0656728670001030, 0.0033678691834211, 0.1361345648765564, 0.1712654232978821, -0.0172833092510700, -0.0502183400094509, 0.2910411655902863, 0.0691247656941414, 0.1935720741748810, 0.0652214139699936, 0.1608240753412247)); - target1 += mul(mc1, MF4x4(0.8243460655212402, -0.0979344248771667, -0.0366373993456364, 0.1692261099815369, 0.5517869591712952, 0.3282494544982910, -0.7905511856079102, -0.4462923705577850, -0.0803156569600105, 0.1172509342432022, 0.1864327639341354, 0.1471016854047775, 0.1296005547046661, -0.1004103720188141, 0.3174172043800354, -0.1181766316294670)); - target1 += mul(mc2, MF4x4(0.0259374529123306, -0.0934808850288391, 0.3008874654769897, 0.3957927823066711, -0.4048821926116943, 0.1461934000253677, -0.1819096356630325, -0.1908810287714005, 0.3193186521530151, -0.7438099980354309, 0.1919509470462799, -0.2065188735723495, 0.1752236187458038, -0.6840037107467651, 0.1588519066572189, -0.3956064879894257)); - target1 += mul(bc1, MF4x4(0.1574442386627197, -0.0114925103262067, -0.1208277940750122, 0.2058266401290894, 0.2879209220409393, -0.0419875606894493, -0.1902059614658356, -0.2723863720893860, -0.1086223348975182, -0.0870924964547157, 0.8605937957763672, 0.2656622231006622, -0.1653763055801392, 0.0816384851932526, -0.0137870563194156, 0.1433854848146439)); - target1 += mul(bc2, MF4x4(-0.1565909236669540, -0.0307490080595016, -0.1055604666471481, 0.2573592662811279, -0.1186821162700653, 0.1141471788287163, -0.0272745657712221, -0.1049114838242531, 0.2445316016674042, -0.0027864547446370, -0.1759569346904755, -0.1556979566812515, 0.0550616309046745, 0.1704383641481400, 0.0853662937879562, 0.3280856907367706)); - target1 += mul(tr1, MF4x4(0.1460669338703156, 0.4202052652835846, -0.3638312816619873, -0.0958623066544533, -0.0492525361478329, -0.3664234280586243, 0.0794373303651810, 0.0399017669260502, 0.0629198029637337, 0.1662959158420563, -0.1001493930816650, -0.0587460733950138, -0.0396478697657585, 0.0017320754704997, 0.0314909480512142, -0.0202700830996037)); - target1 += mul(tr2, MF4x4(-0.0964399129152298, 0.0380319654941559, 0.0396055467426777, 0.0265473183244467, -0.0161637403070927, -0.1872924566268921, 0.1670000404119492, 0.0029466480482370, -0.1093841269612312, -0.3629201948642731, -0.0562992505729198, 0.1792684197425842, -0.0203859098255634, 0.0983991250395775, 0.0058611719869077, 0.1627455651760101)); - target1 += mul(mr1, MF4x4(-0.1117974221706390, 0.7562329173088074, -0.2046248912811279, 0.1677842289209366, -0.2063486129045486, -0.6023545265197754, -0.5739209651947021, 0.5110496878623962, -0.0715268924832344, -0.1373793482780457, 0.1251420378684998, -0.0477442294359207, 0.4961377978324890, 0.2688887119293213, 0.3146316707134247, -0.5197153687477112)); - target1 += mul(mr2, MF4x4(-0.1314805448055267, 0.0746279135346413, 0.3457699418067932, 0.2564856410026550, 0.0839370116591454, -0.6136511564254761, -0.4646295011043549, 0.0612256154417992, -0.1910563558340073, -0.0935136750340462, -0.2426030039787292, 0.2102959007024765, 0.1575350016355515, 0.6145061254501343, 0.3368154168128967, -0.0974092856049538)); - target1 += mul(br1, MF4x4(0.0565315335988998, 0.2393359094858170, -0.0932938233017921, 0.1555283814668655, 0.0123879108577967, -0.1247719228267670, -0.0564610138535500, -0.1125799044966698, -0.0104600470513105, 0.0482629500329494, 0.2316472232341766, 0.1083717569708824, -0.0525921434164047, 0.0643989592790604, -0.0525734610855579, -0.0503251366317272)); - target1 += mul(br2, MF4x4(-0.1835366338491440, 0.0978360474109650, -0.1111819595098495, 0.2109299153089523, 0.0509372949600220, -0.1992686837911606, 0.0677929744124413, -0.0870024710893631, -0.0412262082099915, -0.0697719156742096, -0.0967373847961426, 0.0137308547273278, 0.0195730421692133, 0.0410240143537521, 0.1157210171222687, 0.2283479571342468)); + target1 = MulAdd(tl1, MF4x4(0.1221675798296928, 0.0083215842023492, -0.0162804014980793, 0.0316714197397232, -0.2205813378095627, 0.1500435769557953, 0.2109555304050446, 0.2741867899894714, 0.0956874340772629, -0.0896854698657990, -0.1657065600156784, -0.1349759399890900, 0.0601499564945698, -0.1523845940828323, -0.1828087568283081, -0.2727653682231903), target1); + target1 = MulAdd(tl2, MF4x4(-0.0918163508176804, 0.1564485579729080, 0.1133174449205399, 0.2215953171253204, -0.0623677000403404, -0.0497728772461414, -0.0372809022665024, -0.0258478187024593, -0.1364922970533371, 0.1053884625434875, 0.3292874991893768, 0.2693256139755249, -0.0347631797194481, -0.1470523178577423, 0.0096792401745915, -0.0542853325605392), target1); + target1 = MulAdd(ml1, MF4x4(0.1331177949905396, -0.0964357852935791, -0.0706946700811386, 0.1593225002288818, -0.4815943241119385, 0.1224092170596123, -0.0870430991053581, 0.0005010276800022, -0.0242684502154589, -0.2256436049938202, 0.1367238312959671, 0.0474774017930031, 0.6886650323867798, -0.0065326127223670, 0.1841574758291245, -0.1354993879795074), target1); + target1 = MulAdd(ml2, MF4x4(-0.1049591675400734, 0.0515934228897095, 0.1128631457686424, 0.1688040047883987, -0.0084041170775890, -0.0006375144002959, -0.0598374009132385, 0.1424416452646255, -0.0048398924991488, 0.1832167655229568, 0.0231959503144026, 0.0816788375377655, -0.1321710795164108, 0.0397678017616272, -0.0058345394209027, 0.5784573554992676), target1); + target1 = MulAdd(bl1, MF4x4(0.1438693851232529, -0.0694608166813850, -0.0428275354206562, 0.1599996536970139, -0.1651254445314407, 0.1388883888721466, -0.0895452573895454, 0.2569831907749176, 0.3150432109832764, -0.0910519883036613, 0.0367441214621067, 0.1903669685125351, 0.2805841267108917, -0.0444608181715012, 0.0059385276399553, -0.2585869431495667), target1); + target1 = MulAdd(bl2, MF4x4(-0.1217494234442711, 0.0191769022494555, -0.0065453462302685, 0.1391217857599258, 0.0998920649290085, -0.0162798929959536, 0.0502282194793224, 0.0370145924389362, 0.0290782172232866, -0.0099554909393191, 0.0142515478655696, 0.1248661577701569, -0.0076912571676075, 0.0251651499420404, 0.2190572917461395, 0.0020069130696356), target1); + target1 = MulAdd(tc1, MF4x4(0.2666685581207275, -0.1625511497259140, -0.3938800692558289, -0.0253848694264889, 0.0987015441060066, 0.2033616453409195, 0.3128099143505096, 0.4608893990516663, 0.0620003379881382, -0.1389972567558289, -0.3095863461494446, -0.4023511111736298, -0.1105777546763420, 0.1115406602621078, 0.3639950752258301, 0.0645622834563255), target1); + target1 = MulAdd(tc2, MF4x4(-0.2135885655879974, -0.1035343706607819, 0.1795026361942291, 0.1828210204839706, 0.0780984908342361, 0.0656728670001030, 0.0033678691834211, 0.1361345648765564, 0.1712654232978821, -0.0172833092510700, -0.0502183400094509, 0.2910411655902863, 0.0691247656941414, 0.1935720741748810, 0.0652214139699936, 0.1608240753412247), target1); + target1 = MulAdd(mc1, MF4x4(0.8243460655212402, -0.0979344248771667, -0.0366373993456364, 0.1692261099815369, 0.5517869591712952, 0.3282494544982910, -0.7905511856079102, -0.4462923705577850, -0.0803156569600105, 0.1172509342432022, 0.1864327639341354, 0.1471016854047775, 0.1296005547046661, -0.1004103720188141, 0.3174172043800354, -0.1181766316294670), target1); + target1 = MulAdd(mc2, MF4x4(0.0259374529123306, -0.0934808850288391, 0.3008874654769897, 0.3957927823066711, -0.4048821926116943, 0.1461934000253677, -0.1819096356630325, -0.1908810287714005, 0.3193186521530151, -0.7438099980354309, 0.1919509470462799, -0.2065188735723495, 0.1752236187458038, -0.6840037107467651, 0.1588519066572189, -0.3956064879894257), target1); + target1 = MulAdd(bc1, MF4x4(0.1574442386627197, -0.0114925103262067, -0.1208277940750122, 0.2058266401290894, 0.2879209220409393, -0.0419875606894493, -0.1902059614658356, -0.2723863720893860, -0.1086223348975182, -0.0870924964547157, 0.8605937957763672, 0.2656622231006622, -0.1653763055801392, 0.0816384851932526, -0.0137870563194156, 0.1433854848146439), target1); + target1 = MulAdd(bc2, MF4x4(-0.1565909236669540, -0.0307490080595016, -0.1055604666471481, 0.2573592662811279, -0.1186821162700653, 0.1141471788287163, -0.0272745657712221, -0.1049114838242531, 0.2445316016674042, -0.0027864547446370, -0.1759569346904755, -0.1556979566812515, 0.0550616309046745, 0.1704383641481400, 0.0853662937879562, 0.3280856907367706), target1); + target1 = MulAdd(tr1, MF4x4(0.1460669338703156, 0.4202052652835846, -0.3638312816619873, -0.0958623066544533, -0.0492525361478329, -0.3664234280586243, 0.0794373303651810, 0.0399017669260502, 0.0629198029637337, 0.1662959158420563, -0.1001493930816650, -0.0587460733950138, -0.0396478697657585, 0.0017320754704997, 0.0314909480512142, -0.0202700830996037), target1); + target1 = MulAdd(tr2, MF4x4(-0.0964399129152298, 0.0380319654941559, 0.0396055467426777, 0.0265473183244467, -0.0161637403070927, -0.1872924566268921, 0.1670000404119492, 0.0029466480482370, -0.1093841269612312, -0.3629201948642731, -0.0562992505729198, 0.1792684197425842, -0.0203859098255634, 0.0983991250395775, 0.0058611719869077, 0.1627455651760101), target1); + target1 = MulAdd(mr1, MF4x4(-0.1117974221706390, 0.7562329173088074, -0.2046248912811279, 0.1677842289209366, -0.2063486129045486, -0.6023545265197754, -0.5739209651947021, 0.5110496878623962, -0.0715268924832344, -0.1373793482780457, 0.1251420378684998, -0.0477442294359207, 0.4961377978324890, 0.2688887119293213, 0.3146316707134247, -0.5197153687477112), target1); + target1 = MulAdd(mr2, MF4x4(-0.1314805448055267, 0.0746279135346413, 0.3457699418067932, 0.2564856410026550, 0.0839370116591454, -0.6136511564254761, -0.4646295011043549, 0.0612256154417992, -0.1910563558340073, -0.0935136750340462, -0.2426030039787292, 0.2102959007024765, 0.1575350016355515, 0.6145061254501343, 0.3368154168128967, -0.0974092856049538), target1); + target1 = MulAdd(br1, MF4x4(0.0565315335988998, 0.2393359094858170, -0.0932938233017921, 0.1555283814668655, 0.0123879108577967, -0.1247719228267670, -0.0564610138535500, -0.1125799044966698, -0.0104600470513105, 0.0482629500329494, 0.2316472232341766, 0.1083717569708824, -0.0525921434164047, 0.0643989592790604, -0.0525734610855579, -0.0503251366317272), target1); + target1 = MulAdd(br2, MF4x4(-0.1835366338491440, 0.0978360474109650, -0.1111819595098495, 0.2109299153089523, 0.0509372949600220, -0.1992686837911606, 0.0677929744124413, -0.0870024710893631, -0.0412262082099915, -0.0697719156742096, -0.0967373847961426, 0.0137308547273278, 0.0195730421692133, 0.0410240143537521, 0.1157210171222687, 0.2283479571342468), target1); target1 = max(target1, 0) + MF4(0.1991519331932068, -0.1275756657123566, -0.0622864030301571, 0.1586369574069977) * min(target1, 0); MF4 target2 = MF4(-0.0089084329083562, -0.0336172059178352, 0.0177190825343132, 0.0529975406825542); - target2 += mul(tl1, MF4x4(-0.0275970958173275, 0.0141968233510852, 0.1181544512510300, -0.0572245270013809, 0.1161347925662994, -0.1156444773077965, -0.2549640238285065, 0.0882879272103310, -0.0715355500578880, 0.0151285668835044, 0.1079384386539459, 0.0650847703218460, -0.1597152203321457, 0.0669793561100960, 0.2084401696920395, -0.0951152443885803)); - target2 += mul(tl2, MF4x4(0.0404323227703571, -0.0206144321709871, -0.1080420613288879, -0.2038477361202240, 0.0248847268521786, -0.0064681121148169, 0.0389525443315506, 0.0011026862775907, 0.0885242074728012, 0.0295896343886852, -0.3323790132999420, 0.1935138553380966, -0.0466548874974251, 0.1023886054754257, 0.1257870644330978, -0.1541756242513657)); - target2 += mul(ml1, MF4x4(-0.0076520540751517, 0.0361139886081219, 0.1749804913997650, -0.2051989138126373, 0.0022692133206874, -0.0282937753945589, -0.2039019316434860, -0.2343468815088272, -0.0357327871024609, -0.0570764988660812, 0.2925858795642853, -0.1988349705934525, -0.0584560707211494, -0.0341510921716690, 0.1300961822271347, 0.5184492468833923)); - target2 += mul(ml2, MF4x4(0.0884973928332329, 0.0333527140319347, 0.0180535931140184, -0.2655122876167297, 0.0433661043643951, 0.0104369185864925, 0.0010909073753282, -0.0705273598432541, -0.0602585524320602, 0.2420269846916199, -0.4731841087341309, -0.8040290474891663, 0.3066828548908234, -0.2466925680637360, 0.0938910692930222, -0.2002603262662888)); - target2 += mul(bl1, MF4x4(0.0549152903258801, 0.0291299298405647, 0.0946277007460594, -0.0581608228385448, 0.0669180899858475, -0.0635575056076050, -0.2427970170974731, -0.2677550315856934, 0.2226776182651520, 0.1301570236682892, -0.1519709974527359, 0.0671724304556847, -0.0526433289051056, 0.1898351758718491, 0.2383745312690735, 0.21917118132114417)); - target2 += mul(bl2, MF4x4(-0.0234222635626793, 0.0238620284944773, 0.0427630320191383, -0.1080563366413116, 0.0332126952707767, -0.0039051575586200, 0.0293126031756401, 0.0161924213171005, 0.0453971028327942, 0.0131999952718616, -0.0689036697149277, 0.2349009960889816, 0.1013344153761864, 0.2706570029258728, 0.1191426888108253, -0.2830821871757507)); - target2 += mul(tc1, MF4x4(0.0181465242058039, -0.0571886636316776, 0.4875229001045227, -0.4244020283222198, 0.4331104159355164, 0.1066712513566017, -0.5277034044265747, 0.1110567077994347, -0.1179447323083878, -0.0273578558117151, 0.1798476576805115, -0.2829602360725403, 0.1012385115027428, -0.2528488039970398, 0.1697608679533005, 0.1121710017323494)); - target2 += mul(tc2, MF4x4(-0.1404130905866623, -0.0984055623412132, -0.0279541295021772, -0.1321212500333786, -0.0841855704784393, 0.1336171030998230, -0.1458790600299835, -0.0044095455668867, 0.2203754037618637, 0.1455714553594589, -0.2362042963504791, -0.0329121425747871, -0.1683547794818878, 0.0289597529917955, 0.3424547612667084, 0.0143845872953534)); - target2 += mul(mc1, MF4x4(0.0287246014922857, 0.1948280781507492, 0.5998955368995667, 0.1192114129662514, -0.6269109249114990, 0.8724324703216553, -0.6399638652801514, -0.4201497733592987, -0.3355066180229187, -0.1566904038190842, -0.4396412074565887, 0.1525828838348389, 0.5573399066925049, 0.2324324846267700, 0.2762884795665741, 0.0406046211719513)); - target2 += mul(mc2, MF4x4(0.3890096545219421, -0.0574061162769794, -0.1468243300914764, -0.5953360199928284, -0.1363215148448944, -0.2224670499563217, -0.2237723320722580, 0.2738097012042999, -0.4868114292621613, -0.5029351711273193, -0.3570256233215332, -0.1776263266801834, -0.0176672954112291, -0.4318660795688629, 1.0395888090133667, 0.1728395074605942)); - target2 += mul(bc1, MF4x4(0.1337304115295410, -0.0809440389275551, 0.1600498855113983, -0.1108811497688293, -0.2376178801059723, -0.1532768607139587, -0.0447455830872059, 0.2515332102775574, 0.4848278462886810, -0.0915748402476311, -0.0336527302861214, -0.2141884714365005, 0.2125129699707031, 0.3237875998020172, 0.0022272330243140, -0.0167857185006142)); - target2 += mul(bc2, MF4x4(0.0457934997975826, 0.0510537698864937, -0.0519523508846760, -0.4506326615810394, -0.1029204949736595, 0.0116113182157278, -0.1750748157501221, -0.0048758201301098, 0.1506977379322052, 0.0633068457245827, -0.1628549993038177, -0.0144928665831685, 0.1408756822347641, 0.2896180152893066, 0.0803691521286964, -0.4930096566677094)); - target2 += mul(tr1, MF4x4(-0.0484248884022236, 0.1371297985315323, -0.1235475391149521, -0.2618594765663147, -0.0280395895242691, 0.0248795989900827, 0.1204105168581009, 0.3246576189994812, 0.0426272377371788, -0.0520061068236828, 0.0575957447290421, -0.2613646090030670, 0.1165295541286469, -0.0390013493597507, -0.0470846109092236, -0.0014663023175672)); - target2 += mul(tr2, MF4x4(-0.1066762879490852, -0.0869804695248604, -0.0099332248792052, -0.1355892717838287, -0.0760413780808449, 0.1377770304679871, -0.0263407956808805, 0.0880135521292686, 0.1496269851922989, -0.0487459264695644, 0.1286851912736893, 0.2218491584062576, 0.1723349541425705, -0.0165541302412748, -0.0690477639436722, -0.2388458102941513)); - target2 += mul(mr1, MF4x4(-0.4236431121826172, 0.0465179122984409, -0.1526456624269485, 0.1426440477371216, 0.5913932919502258, -0.1082349196076393, 0.2731275856494904, -0.2687640488147736, -0.4628683030605316, -0.0537119321525097, -0.1597615629434586, 0.0528527684509754, -0.3485085070133209, 0.1395110934972763, 0.0642972290515900, 0.0323829315602779)); - target2 += mul(mr2, MF4x4(0.0066713397391140, -0.0482029877603054, -0.1707276403903961, -0.1001396998763084, 0.0539822019636631, -0.1624453216791153, 0.4913550019264221, 0.3687861263751984, 0.0491421781480312, 0.1311376541852951, 0.0992425829172134, -0.4636098444461823, -0.3415873646736145, -0.0153833786025643, -0.0270162131637335, -0.0935514941811562)); - target2 += mul(br1, MF4x4(-0.1738258153200150, 0.0458541549742222, -0.0653749182820320, -0.0156540926545858, -0.0357586294412613, -0.1486178338527679, 0.1798035055398941, -0.1310307979583740, 0.0783249065279961, -0.0261360015720129, -0.1047066971659660, 0.3385537564754486, -0.0339452810585499, 0.2299628853797913, -0.1408322304487228, -0.0352708548307419)); - target2 += mul(br2, MF4x4(0.0463018082082272, 0.0565674640238285, -0.0538956597447395, -0.2354862987995148, 0.0297824125736952, 0.0307939313352108, 0.1271791011095047, -0.1025698855519295, 0.1060482114553452, -0.0703211054205894, -0.0083062350749969, 0.0474255047738552, 0.0442508421838284, 0.1569559425115585, -0.0442709513008595, -0.1188704669475555)); + target2 = MulAdd(tl1, MF4x4(-0.0275970958173275, 0.0141968233510852, 0.1181544512510300, -0.0572245270013809, 0.1161347925662994, -0.1156444773077965, -0.2549640238285065, 0.0882879272103310, -0.0715355500578880, 0.0151285668835044, 0.1079384386539459, 0.0650847703218460, -0.1597152203321457, 0.0669793561100960, 0.2084401696920395, -0.0951152443885803), target2); + target2 = MulAdd(tl2, MF4x4(0.0404323227703571, -0.0206144321709871, -0.1080420613288879, -0.2038477361202240, 0.0248847268521786, -0.0064681121148169, 0.0389525443315506, 0.0011026862775907, 0.0885242074728012, 0.0295896343886852, -0.3323790132999420, 0.1935138553380966, -0.0466548874974251, 0.1023886054754257, 0.1257870644330978, -0.1541756242513657), target2); + target2 = MulAdd(ml1, MF4x4(-0.0076520540751517, 0.0361139886081219, 0.1749804913997650, -0.2051989138126373, 0.0022692133206874, -0.0282937753945589, -0.2039019316434860, -0.2343468815088272, -0.0357327871024609, -0.0570764988660812, 0.2925858795642853, -0.1988349705934525, -0.0584560707211494, -0.0341510921716690, 0.1300961822271347, 0.5184492468833923), target2); + target2 = MulAdd(ml2, MF4x4(0.0884973928332329, 0.0333527140319347, 0.0180535931140184, -0.2655122876167297, 0.0433661043643951, 0.0104369185864925, 0.0010909073753282, -0.0705273598432541, -0.0602585524320602, 0.2420269846916199, -0.4731841087341309, -0.8040290474891663, 0.3066828548908234, -0.2466925680637360, 0.0938910692930222, -0.2002603262662888), target2); + target2 = MulAdd(bl1, MF4x4(0.0549152903258801, 0.0291299298405647, 0.0946277007460594, -0.0581608228385448, 0.0669180899858475, -0.0635575056076050, -0.2427970170974731, -0.2677550315856934, 0.2226776182651520, 0.1301570236682892, -0.1519709974527359, 0.0671724304556847, -0.0526433289051056, 0.1898351758718491, 0.2383745312690735, 0.21917118132114417), target2); + target2 = MulAdd(bl2, MF4x4(-0.0234222635626793, 0.0238620284944773, 0.0427630320191383, -0.1080563366413116, 0.0332126952707767, -0.0039051575586200, 0.0293126031756401, 0.0161924213171005, 0.0453971028327942, 0.0131999952718616, -0.0689036697149277, 0.2349009960889816, 0.1013344153761864, 0.2706570029258728, 0.1191426888108253, -0.2830821871757507), target2); + target2 = MulAdd(tc1, MF4x4(0.0181465242058039, -0.0571886636316776, 0.4875229001045227, -0.4244020283222198, 0.4331104159355164, 0.1066712513566017, -0.5277034044265747, 0.1110567077994347, -0.1179447323083878, -0.0273578558117151, 0.1798476576805115, -0.2829602360725403, 0.1012385115027428, -0.2528488039970398, 0.1697608679533005, 0.1121710017323494), target2); + target2 = MulAdd(tc2, MF4x4(-0.1404130905866623, -0.0984055623412132, -0.0279541295021772, -0.1321212500333786, -0.0841855704784393, 0.1336171030998230, -0.1458790600299835, -0.0044095455668867, 0.2203754037618637, 0.1455714553594589, -0.2362042963504791, -0.0329121425747871, -0.1683547794818878, 0.0289597529917955, 0.3424547612667084, 0.0143845872953534), target2); + target2 = MulAdd(mc1, MF4x4(0.0287246014922857, 0.1948280781507492, 0.5998955368995667, 0.1192114129662514, -0.6269109249114990, 0.8724324703216553, -0.6399638652801514, -0.4201497733592987, -0.3355066180229187, -0.1566904038190842, -0.4396412074565887, 0.1525828838348389, 0.5573399066925049, 0.2324324846267700, 0.2762884795665741, 0.0406046211719513), target2); + target2 = MulAdd(mc2, MF4x4(0.3890096545219421, -0.0574061162769794, -0.1468243300914764, -0.5953360199928284, -0.1363215148448944, -0.2224670499563217, -0.2237723320722580, 0.2738097012042999, -0.4868114292621613, -0.5029351711273193, -0.3570256233215332, -0.1776263266801834, -0.0176672954112291, -0.4318660795688629, 1.0395888090133667, 0.1728395074605942), target2); + target2 = MulAdd(bc1, MF4x4(0.1337304115295410, -0.0809440389275551, 0.1600498855113983, -0.1108811497688293, -0.2376178801059723, -0.1532768607139587, -0.0447455830872059, 0.2515332102775574, 0.4848278462886810, -0.0915748402476311, -0.0336527302861214, -0.2141884714365005, 0.2125129699707031, 0.3237875998020172, 0.0022272330243140, -0.0167857185006142), target2); + target2 = MulAdd(bc2, MF4x4(0.0457934997975826, 0.0510537698864937, -0.0519523508846760, -0.4506326615810394, -0.1029204949736595, 0.0116113182157278, -0.1750748157501221, -0.0048758201301098, 0.1506977379322052, 0.0633068457245827, -0.1628549993038177, -0.0144928665831685, 0.1408756822347641, 0.2896180152893066, 0.0803691521286964, -0.4930096566677094), target2); + target2 = MulAdd(tr1, MF4x4(-0.0484248884022236, 0.1371297985315323, -0.1235475391149521, -0.2618594765663147, -0.0280395895242691, 0.0248795989900827, 0.1204105168581009, 0.3246576189994812, 0.0426272377371788, -0.0520061068236828, 0.0575957447290421, -0.2613646090030670, 0.1165295541286469, -0.0390013493597507, -0.0470846109092236, -0.0014663023175672), target2); + target2 = MulAdd(tr2, MF4x4(-0.1066762879490852, -0.0869804695248604, -0.0099332248792052, -0.1355892717838287, -0.0760413780808449, 0.1377770304679871, -0.0263407956808805, 0.0880135521292686, 0.1496269851922989, -0.0487459264695644, 0.1286851912736893, 0.2218491584062576, 0.1723349541425705, -0.0165541302412748, -0.0690477639436722, -0.2388458102941513), target2); + target2 = MulAdd(mr1, MF4x4(-0.4236431121826172, 0.0465179122984409, -0.1526456624269485, 0.1426440477371216, 0.5913932919502258, -0.1082349196076393, 0.2731275856494904, -0.2687640488147736, -0.4628683030605316, -0.0537119321525097, -0.1597615629434586, 0.0528527684509754, -0.3485085070133209, 0.1395110934972763, 0.0642972290515900, 0.0323829315602779), target2); + target2 = MulAdd(mr2, MF4x4(0.0066713397391140, -0.0482029877603054, -0.1707276403903961, -0.1001396998763084, 0.0539822019636631, -0.1624453216791153, 0.4913550019264221, 0.3687861263751984, 0.0491421781480312, 0.1311376541852951, 0.0992425829172134, -0.4636098444461823, -0.3415873646736145, -0.0153833786025643, -0.0270162131637335, -0.0935514941811562), target2); + target2 = MulAdd(br1, MF4x4(-0.1738258153200150, 0.0458541549742222, -0.0653749182820320, -0.0156540926545858, -0.0357586294412613, -0.1486178338527679, 0.1798035055398941, -0.1310307979583740, 0.0783249065279961, -0.0261360015720129, -0.1047066971659660, 0.3385537564754486, -0.0339452810585499, 0.2299628853797913, -0.1408322304487228, -0.0352708548307419), target2); + target2 = MulAdd(br2, MF4x4(0.0463018082082272, 0.0565674640238285, -0.0538956597447395, -0.2354862987995148, 0.0297824125736952, 0.0307939313352108, 0.1271791011095047, -0.1025698855519295, 0.1060482114553452, -0.0703211054205894, -0.0083062350749969, 0.0474255047738552, 0.0442508421838284, 0.1569559425115585, -0.0442709513008595, -0.1188704669475555), target2); target2 = max(target2, 0) + MF4(0.7366524934768677, 1.0013850927352905, -0.0276311747729778, 0.0734841898083687) * min(target2, 0); tex1[gxy] = target1; @@ -477,56 +479,56 @@ void Pass5(uint2 blockStart, uint3 threadId) { MF4 br2 = tex2.SampleLevel(sam, pos + inputPt, 0); MF4 c1 = { -0.1306160986423492,-0.0808217376470566,-0.2880123555660248,0.0099629526957870 }; - c1 += mul(tl1, MF4x4(-0.1033539846539497, 0.0541300140321255, -0.0804840475320816, -0.0334571413695812, -0.0264753755182028, 0.1118840202689171, 0.1186013221740723, -0.0127575425431132, 0.2236593365669250, 0.0025286162272096, 0.0985530614852905, 0.0685181617736816, -0.1884875595569611, 0.0530862808227539, -0.0482063069939613, 0.0375233069062233)); - c1 += mul(tl2, MF4x4(0.1837068796157837, -0.0632847175002098, 0.0016613919287920, 0.0392861217260361, 0.2923883199691772, -0.1713902205228806, 0.1907587945461273, 0.0550456829369068, 0.0644215345382690, -0.1046456992626190, 0.0187383033335209, 0.0770180150866508, 0.1933846622705460, -0.0455715768039227, 0.0375007353723049, -0.1053109914064407)); - c1 += mul(ml1, MF4x4(-0.0972480997443199, 0.2820451855659485, 0.0114549007266760, -0.0954328626394272, 0.0706252008676529, 0.4829064607620239, -0.6371517181396484, 0.0005180989392102, 0.3280143439769745, 0.0665246024727821, -0.0503116399049759, -0.1261110603809357, 0.1114177703857422, -0.2053108513355255, 0.1428771317005157, 0.3926100134849548)); - c1 += mul(ml2, MF4x4(-0.2571723163127899, 0.1627264618873596, -0.4940335154533386, -0.1361546218395233, 0.0804422944784164, -0.4231885373592377, 0.0650202706456184, 0.0518481098115444, -0.0502478554844856, -0.1305799931287766, 0.1814480125904083, 0.0090866927057505, -0.0510044656693935, -0.1691461503505707, 0.0922467112541199, -0.0314207412302494)); - c1 += mul(bl1, MF4x4(0.1270498335361481, 0.0563284493982792, -0.0435525141656399, 0.1569847911596298, 0.0576847903430462, 0.3461692929267883, -0.0325655154883862, -0.2688976824283600, -0.1341977864503860, -0.1382253766059875, 0.2293784171342850, -0.1111817285418510, -0.1402447521686554, -0.3257531225681305, 0.0598510466516018, 0.1008039116859436)); - c1 += mul(bl2, MF4x4(0.1698816716670990, 0.3491003513336182, -0.1367681026458740, -0.1165873408317566, -0.2091718912124634, -0.1487034261226654, -0.0569749698042870, -0.2100717276334763, 0.0404917001724243, -0.1372035890817642, 0.0689046755433083, -0.0367818064987659, -0.0325474888086319, -0.0114965448155999, -0.0137249026447535, -0.0279692262411118)); - c1 += mul(tc1, MF4x4(-0.0563433989882469, 0.0132494345307350, -0.2434540390968323, 0.0796563774347305, -0.2109155058860779, 0.0387088693678379, -0.0591037571430206, 0.0955820381641388, 0.4660535752773285, -0.1204202473163605, 0.1332369595766068, -0.0285425651818514, -0.3886952698230743, -0.0434980578720570, -0.0849134400486946, 0.0802380964159966)); - c1 += mul(tc2, MF4x4(0.0412235632538795, 0.1571959257125854, 0.2050069272518158, -0.1138664111495018, 0.1962715685367584, 0.0594439841806889, 0.0351715497672558, -0.0129811102524400, 0.2055217623710632, -0.0647534057497978, 0.0373471938073635, 0.0877277255058289, -0.5734645724296570, 0.1188675239682198, -0.1145943328738213, -0.1182733029127121)); - c1 += mul(mc1, MF4x4(-0.2004909217357635, -0.4817073047161102, 0.5596802830696106, -0.0327854752540588, 0.0989314392209053, 0.4127818942070007, 0.7265836596488953, -0.2692042589187622, 0.5195841789245605, -0.2357539832592010, -0.3819393217563629, 0.1755530238151550, 0.6578183770179749, 0.1075539961457253, -0.2688144743442535, 0.3242723941802979)); - c1 += mul(mc2, MF4x4(-0.3221310675144196, 0.2978510260581970, 0.2269985526800156, -0.3184116482734680, 0.4845580160617828, 0.4407236874103546, 0.0099756307899952, -0.3121858239173889, -0.3810067176818848, -0.0553649961948395, 0.0202834140509367, 0.0409953594207764, 0.2532750964164734, 0.2731618583202362, 0.1237529441714287, 0.0134243080392480)); - c1 += mul(bc1, MF4x4(0.1835541725158691, 0.0549701862037182, -0.1749316602945328, -0.2030028849840164, 0.0263462308794260, 0.2781440317630768, 0.0372458845376968, 0.3643021881580353, -0.4047883749008179, 0.0660117194056511, 0.4863115549087524, -0.2024163603782654, -0.6403482556343079, 0.2765505611896515, 0.1417075097560883, 0.5064445734024048)); - c1 += mul(bc2, MF4x4(0.6106975078582764, -0.1570862233638763, -0.3223383128643036, -0.2497926801443100, -0.4854303300380707, 0.0132978223264217, -0.0609334111213684, 0.1285556703805923, -0.1412864029407501, -0.1379042416810989, -0.0258826259523630, 0.1357705891132355, -0.1285902857780457, -0.0577826797962189, 0.0550044551491737, 0.1717510819435120)); - c1 += mul(tr1, MF4x4(0.1389609426259995, 0.0835867226123810, 0.0309768319129944, -0.0278116948902607, -0.0390677824616432, -0.0111810686066747, -0.0025318188127130, 0.0069569633342326, 0.0347319357097149, 0.0191543344408274, 0.0314339138567448, -0.0228427499532700, 0.0416300334036350, 0.0249234102666378, 0.1210031509399414, 0.1142473593354225)); - c1 += mul(tr2, MF4x4(0.0607251487672329, 0.0386395826935768, -0.0219341218471527, -0.1102298423647881, 0.1487188935279846, 0.0602982006967068, -0.0280748903751373, -0.0211924221366644, 0.0042894422076643, -0.0269144997000694, 0.0814756453037262, -0.0314031280577183, -0.0213186051696539, -0.1362965404987335, 0.0382767543196678, -0.0669511556625366)); - c1 += mul(mr1, MF4x4(-0.2397561967372894, 0.3023172020912170, -0.2398054003715515, 0.0041919997893274, -0.1016605198383331, -0.1521034836769104, -0.1526568531990051, 0.0272433310747147, 0.0741761848330498, 0.1116370111703873, 0.1149727106094360, -0.0809784531593323, -0.1448147594928741, -0.0943927690386772, -0.0086280042305589, 0.1243222951889038)); - c1 += mul(mr2, MF4x4(-0.0469366572797298, -0.1655988991260529, -0.1029584184288979, -0.1347874104976654, 0.2064601778984070, 0.0521226711571217, -0.1366733759641647, -0.0041872998699546, 0.1077186539769173, 0.0184442866593599, -0.2309073060750961, -0.1637075096368790, -0.0417953692376614, -0.3190860450267792, -0.1593534499406815, 0.0136412177234888)); - c1 += mul(br1, MF4x4(0.1698798984289169, 0.0232755411416292, -0.0876034423708916, -0.3008348643779755, 0.0789884999394417, 0.0034748215693980, -0.0064704762771726, 0.0057828431017697, -0.0190630126744509, -0.0334153175354004, -0.0195646341890097, 0.0105131156742573, 0.0995147302746773, -0.3130289018154144, -0.0724022984504700, 0.0113303456455469)); - c1 += mul(br2, MF4x4(-0.0027791252359748, -0.0193455871194601, -0.0415000133216381, 0.0568981170654297, -0.2745247483253479, 0.1222846284508705, 0.1899162530899048, 0.1067754998803139, -0.0561975166201591, -0.1500336527824402, 0.0526139959692955, -0.3491798937320709, -0.0692384615540504, -0.0307095069438219, 0.0498757846653461, 0.0019003645284101)); + c1 = MulAdd(tl1, MF4x4(-0.1033539846539497, 0.0541300140321255, -0.0804840475320816, -0.0334571413695812, -0.0264753755182028, 0.1118840202689171, 0.1186013221740723, -0.0127575425431132, 0.2236593365669250, 0.0025286162272096, 0.0985530614852905, 0.0685181617736816, -0.1884875595569611, 0.0530862808227539, -0.0482063069939613, 0.0375233069062233), c1); + c1 = MulAdd(tl2, MF4x4(0.1837068796157837, -0.0632847175002098, 0.0016613919287920, 0.0392861217260361, 0.2923883199691772, -0.1713902205228806, 0.1907587945461273, 0.0550456829369068, 0.0644215345382690, -0.1046456992626190, 0.0187383033335209, 0.0770180150866508, 0.1933846622705460, -0.0455715768039227, 0.0375007353723049, -0.1053109914064407), c1); + c1 = MulAdd(ml1, MF4x4(-0.0972480997443199, 0.2820451855659485, 0.0114549007266760, -0.0954328626394272, 0.0706252008676529, 0.4829064607620239, -0.6371517181396484, 0.0005180989392102, 0.3280143439769745, 0.0665246024727821, -0.0503116399049759, -0.1261110603809357, 0.1114177703857422, -0.2053108513355255, 0.1428771317005157, 0.3926100134849548), c1); + c1 = MulAdd(ml2, MF4x4(-0.2571723163127899, 0.1627264618873596, -0.4940335154533386, -0.1361546218395233, 0.0804422944784164, -0.4231885373592377, 0.0650202706456184, 0.0518481098115444, -0.0502478554844856, -0.1305799931287766, 0.1814480125904083, 0.0090866927057505, -0.0510044656693935, -0.1691461503505707, 0.0922467112541199, -0.0314207412302494), c1); + c1 = MulAdd(bl1, MF4x4(0.1270498335361481, 0.0563284493982792, -0.0435525141656399, 0.1569847911596298, 0.0576847903430462, 0.3461692929267883, -0.0325655154883862, -0.2688976824283600, -0.1341977864503860, -0.1382253766059875, 0.2293784171342850, -0.1111817285418510, -0.1402447521686554, -0.3257531225681305, 0.0598510466516018, 0.1008039116859436), c1); + c1 = MulAdd(bl2, MF4x4(0.1698816716670990, 0.3491003513336182, -0.1367681026458740, -0.1165873408317566, -0.2091718912124634, -0.1487034261226654, -0.0569749698042870, -0.2100717276334763, 0.0404917001724243, -0.1372035890817642, 0.0689046755433083, -0.0367818064987659, -0.0325474888086319, -0.0114965448155999, -0.0137249026447535, -0.0279692262411118), c1); + c1 = MulAdd(tc1, MF4x4(-0.0563433989882469, 0.0132494345307350, -0.2434540390968323, 0.0796563774347305, -0.2109155058860779, 0.0387088693678379, -0.0591037571430206, 0.0955820381641388, 0.4660535752773285, -0.1204202473163605, 0.1332369595766068, -0.0285425651818514, -0.3886952698230743, -0.0434980578720570, -0.0849134400486946, 0.0802380964159966), c1); + c1 = MulAdd(tc2, MF4x4(0.0412235632538795, 0.1571959257125854, 0.2050069272518158, -0.1138664111495018, 0.1962715685367584, 0.0594439841806889, 0.0351715497672558, -0.0129811102524400, 0.2055217623710632, -0.0647534057497978, 0.0373471938073635, 0.0877277255058289, -0.5734645724296570, 0.1188675239682198, -0.1145943328738213, -0.1182733029127121), c1); + c1 = MulAdd(mc1, MF4x4(-0.2004909217357635, -0.4817073047161102, 0.5596802830696106, -0.0327854752540588, 0.0989314392209053, 0.4127818942070007, 0.7265836596488953, -0.2692042589187622, 0.5195841789245605, -0.2357539832592010, -0.3819393217563629, 0.1755530238151550, 0.6578183770179749, 0.1075539961457253, -0.2688144743442535, 0.3242723941802979), c1); + c1 = MulAdd(mc2, MF4x4(-0.3221310675144196, 0.2978510260581970, 0.2269985526800156, -0.3184116482734680, 0.4845580160617828, 0.4407236874103546, 0.0099756307899952, -0.3121858239173889, -0.3810067176818848, -0.0553649961948395, 0.0202834140509367, 0.0409953594207764, 0.2532750964164734, 0.2731618583202362, 0.1237529441714287, 0.0134243080392480), c1); + c1 = MulAdd(bc1, MF4x4(0.1835541725158691, 0.0549701862037182, -0.1749316602945328, -0.2030028849840164, 0.0263462308794260, 0.2781440317630768, 0.0372458845376968, 0.3643021881580353, -0.4047883749008179, 0.0660117194056511, 0.4863115549087524, -0.2024163603782654, -0.6403482556343079, 0.2765505611896515, 0.1417075097560883, 0.5064445734024048), c1); + c1 = MulAdd(bc2, MF4x4(0.6106975078582764, -0.1570862233638763, -0.3223383128643036, -0.2497926801443100, -0.4854303300380707, 0.0132978223264217, -0.0609334111213684, 0.1285556703805923, -0.1412864029407501, -0.1379042416810989, -0.0258826259523630, 0.1357705891132355, -0.1285902857780457, -0.0577826797962189, 0.0550044551491737, 0.1717510819435120), c1); + c1 = MulAdd(tr1, MF4x4(0.1389609426259995, 0.0835867226123810, 0.0309768319129944, -0.0278116948902607, -0.0390677824616432, -0.0111810686066747, -0.0025318188127130, 0.0069569633342326, 0.0347319357097149, 0.0191543344408274, 0.0314339138567448, -0.0228427499532700, 0.0416300334036350, 0.0249234102666378, 0.1210031509399414, 0.1142473593354225), c1); + c1 = MulAdd(tr2, MF4x4(0.0607251487672329, 0.0386395826935768, -0.0219341218471527, -0.1102298423647881, 0.1487188935279846, 0.0602982006967068, -0.0280748903751373, -0.0211924221366644, 0.0042894422076643, -0.0269144997000694, 0.0814756453037262, -0.0314031280577183, -0.0213186051696539, -0.1362965404987335, 0.0382767543196678, -0.0669511556625366), c1); + c1 = MulAdd(mr1, MF4x4(-0.2397561967372894, 0.3023172020912170, -0.2398054003715515, 0.0041919997893274, -0.1016605198383331, -0.1521034836769104, -0.1526568531990051, 0.0272433310747147, 0.0741761848330498, 0.1116370111703873, 0.1149727106094360, -0.0809784531593323, -0.1448147594928741, -0.0943927690386772, -0.0086280042305589, 0.1243222951889038), c1); + c1 = MulAdd(mr2, MF4x4(-0.0469366572797298, -0.1655988991260529, -0.1029584184288979, -0.1347874104976654, 0.2064601778984070, 0.0521226711571217, -0.1366733759641647, -0.0041872998699546, 0.1077186539769173, 0.0184442866593599, -0.2309073060750961, -0.1637075096368790, -0.0417953692376614, -0.3190860450267792, -0.1593534499406815, 0.0136412177234888), c1); + c1 = MulAdd(br1, MF4x4(0.1698798984289169, 0.0232755411416292, -0.0876034423708916, -0.3008348643779755, 0.0789884999394417, 0.0034748215693980, -0.0064704762771726, 0.0057828431017697, -0.0190630126744509, -0.0334153175354004, -0.0195646341890097, 0.0105131156742573, 0.0995147302746773, -0.3130289018154144, -0.0724022984504700, 0.0113303456455469), c1); + c1 = MulAdd(br2, MF4x4(-0.0027791252359748, -0.0193455871194601, -0.0415000133216381, 0.0568981170654297, -0.2745247483253479, 0.1222846284508705, 0.1899162530899048, 0.1067754998803139, -0.0561975166201591, -0.1500336527824402, 0.0526139959692955, -0.3491798937320709, -0.0692384615540504, -0.0307095069438219, 0.0498757846653461, 0.0019003645284101), c1); c1 = max(c1, 0) + MF4(0.1552927196025848, 0.0782765746116638, 0.7966942191123962, -1.1619627475738525) * min(c1, 0); MF4 c2 = { -0.1443098634481430,-0.1343899369239807,-0.0624338127672672,-0.1094277128577232 }; - c2 += mul(tl1, MF4x4(-0.0689977407455444, -0.1693786680698395, 0.0109281269833446, 0.0609922930598259, 0.0296908002346754, 0.1195700988173485, -0.0694077461957932, 0.0971287414431572, 0.0253518298268318, 0.1213042959570885, 0.0703809782862663, 0.0055739870294929, -0.1595942378044128, -0.1336689442396164, -0.0622441768646240, -0.0428023114800453)); - c2 += mul(tl2, MF4x4(0.0860001668334007, -0.0226618759334087, 0.1602241247892380, 0.0431661494076252, 0.1526461094617844, 0.2752982378005981, 0.0960300788283348, -0.0536719262599945, -0.0171773489564657, 0.0457364916801453, -0.0360932648181915, -0.0397153608500957, -0.0277090407907963, 0.0729821547865868, -0.0145150292664766, 0.0252893269062042)); - c2 += mul(ml1, MF4x4(-0.1407091915607452, -0.4007499516010284, -0.0302001200616360, -0.0606933943927288, -0.2960600554943085, -0.2263117432594299, 0.0721478462219238, -0.4578711986541748, 0.0960150733590126, -0.1606502830982208, 0.2444226741790771, 0.0000882153908606, 0.1472496986389160, 0.3256779909133911, -0.2132861614227295, 0.0339313484728336)); - c2 += mul(ml2, MF4x4(-0.1477648764848709, -0.1487885862588882, -0.1973863691091537, 0.0717295333743095, 0.0843430235981941, 0.6259996294975281, -0.1214931011199951, -0.1274987608194351, 0.2359549105167389, 0.3002171218395233, -0.0825233608484268, -0.0157950688153505, 0.0706149637699127, 0.1762917637825012, -0.0611497573554516, -0.0859689489006996)); - c2 += mul(bl1, MF4x4(0.0174895934760571, -0.0567042417824268, 0.0409146919846535, 0.0258173532783985, 0.1421577036380768, 0.1234543323516846, -0.1721662431955338, 0.1492216140031815, 0.1100751459598541, 0.0501539446413517, 0.1100447699427605, -0.1086079254746437, -0.0608497932553291, 0.0087817469611764, 0.0714464113116264, -0.1285197436809540)); - c2 += mul(bl2, MF4x4(-0.0017177806003019, -0.1463395059108734, -0.1085453778505325, 0.1650195866823196, 0.0813829153776169, 0.1102061793208122, -0.0578421875834465, -0.0232036896049976, -0.1239888817071915, 0.0155465165153146, 0.1079114526510239, -0.0420837886631489, -0.0775837749242783, 0.0148941157385707, -0.0502299368381500, -0.0654754191637039)); - c2 += mul(tc1, MF4x4(0.0918162539601326, 0.0440697595477104, -0.0515748932957649, 0.0417411290109158, 0.0353216230869293, 0.1535954177379608, 0.0439723692834377, -0.1288845241069794, 0.1076577678322792, -0.1306740194559097, 0.0715952813625336, -0.0681907683610916, -0.3798767924308777, 0.1023928597569466, -0.0970670804381371, 0.0077168666757643)); - c2 += mul(tc2, MF4x4(0.0634560957551003, -0.0550306066870689, 0.2073986232280731, 0.0520241297781467, 0.1162287592887878, -0.2218665480613708, 0.3199682831764221, 0.0606246069073677, -0.0058511858806014, -0.0667045339941978, -0.0449917949736118, 0.0707788690924644, -0.3323366343975067, -0.0763893201947212, -0.0997853428125381, -0.1181001588702202)); - c2 += mul(mc1, MF4x4(-0.3101258873939514, 0.2616009712219238, 0.0584651045501232, 0.1656491309404373, -0.0069236233830452, 0.2573371529579163, -0.1793291717767715, -0.2718756198883057, 0.0953581258654594, 0.0524105131626129, 0.1183085516095161, 0.0583294369280338, 0.5036848187446594, -0.5763167142868042, -0.2119628041982651, -0.3140562772750854)); - c2 += mul(mc2, MF4x4(-0.2497755438089371, -0.0146329319104552, -0.2741575539112091, 0.2459975033998489, 0.3562706708908081, -0.6528629064559937, -0.4287456274032593, 0.2055913358926773, 0.1739019453525543, -0.3855968713760376, -0.0958273336291313, -0.7066691517829895, 0.2365748286247253, -0.3046728968620300, -0.2590373754501343, -0.0496727414429188)); - c2 += mul(bc1, MF4x4(-0.0844531357288361, -0.0321611948311329, -0.0951840654015541, 0.0577518045902252, -0.1606003493070602, 0.2776086628437042, -0.1355003118515015, -0.0880064144730568, -0.1277643740177155, -0.0514567233622074, 0.1522682905197144, -0.1040910631418228, -0.2767944037914276, -0.1452194601297379, 0.0089118303731084, 0.0231996178627014)); - c2 += mul(bc2, MF4x4(0.2603267133235931, 0.0167464651167393, -0.2064073234796524, 0.1782064288854599, 0.4890212416648865, 0.0559245310723782, 0.1221160590648651, -0.0202587731182575, -0.4056585729122162, -0.1839511841535568, 0.2775998413562775, 0.0024275144096464, -0.2624500989913940, -0.0619418807327747, 0.0153478365391493, 0.0123427547514439)); - c2 += mul(tr1, MF4x4(0.0816635638475418, -0.0134946266189218, 0.0594766475260258, -0.0551253929734230, 0.0134431896731257, -0.0652195811271667, -0.0563635528087616, -0.0066532371565700, -0.0004114551993553, 0.0105680683627725, 0.1324467360973358, 0.0467248968780041, 0.0301312971860170, -0.1073397025465965, -0.0363437235355377, -0.0474153012037277)); - c2 += mul(tr2, MF4x4(0.0199097190052271, 0.0901319086551666, 0.0448978282511234, 0.0505443066358566, 0.0438878424465656, -0.0494784042239189, 0.0724927335977554, -0.0070675504393876, -0.0012125011999160, 0.0295279901474714, 0.0705125033855438, 0.0555334389209747, -0.0403393507003784, -0.1271172016859055, 0.0017914215568453, 0.1462216079235077)); - c2 += mul(mr1, MF4x4(-0.2827299833297729, 0.2052399665117264, 0.0042732120491564, -0.3969024717807770, -0.0782120972871780, 0.1960176974534988, -0.0675340741872787, 0.0027962317690253, 0.0516129024326801, -0.0352642722427845, 0.0546326488256454, 0.0065340655855834, -0.1062376946210861, 0.1364430636167526, -0.0536947809159756, 0.2098117172718048)); - c2 += mul(mr2, MF4x4(0.0045875865034759, 0.2162927240133286, -0.2158576399087906, -0.0047327815555036, 0.1251590698957443, 0.1279677897691727, -0.1188964918255806, 0.0328494384884834, 0.0076038073748350, -0.0561547242105007, 0.0335608273744583, 0.4332321286201477, 0.0021786799188703, 0.0844521671533585, -0.2102309316396713, -0.0189208015799522)); - c2 += mul(br1, MF4x4(0.0933093801140785, 0.1548244059085846, -0.0598701611161232, 0.0357220247387886, -0.1141726672649384, 0.0536412484943867, -0.0159156844019890, -0.0445508137345314, 0.1883231997489929, -0.1547038406133652, 0.0530619807541370, 0.0059371814131737, 0.0602529086172581, -0.0435577929019928, 0.0083390390500426, 0.0191930737346411)); - c2 += mul(br2, MF4x4(-0.0351041629910469, 0.2119503468275070, -0.0841927304863930, 0.0079463515430689, 0.0683520361781120, -0.1657009869813919, 0.0611055232584476, -0.0063667562790215, 0.0330024957656860, -0.1810818463563919, 0.0872574150562286, 0.1485669612884521, -0.1305806934833527, 0.0041402997449040, 0.0223289318382740, -0.0141495745629072)); + c2 = MulAdd(tl1, MF4x4(-0.0689977407455444, -0.1693786680698395, 0.0109281269833446, 0.0609922930598259, 0.0296908002346754, 0.1195700988173485, -0.0694077461957932, 0.0971287414431572, 0.0253518298268318, 0.1213042959570885, 0.0703809782862663, 0.0055739870294929, -0.1595942378044128, -0.1336689442396164, -0.0622441768646240, -0.0428023114800453), c2); + c2 = MulAdd(tl2, MF4x4(0.0860001668334007, -0.0226618759334087, 0.1602241247892380, 0.0431661494076252, 0.1526461094617844, 0.2752982378005981, 0.0960300788283348, -0.0536719262599945, -0.0171773489564657, 0.0457364916801453, -0.0360932648181915, -0.0397153608500957, -0.0277090407907963, 0.0729821547865868, -0.0145150292664766, 0.0252893269062042), c2); + c2 = MulAdd(ml1, MF4x4(-0.1407091915607452, -0.4007499516010284, -0.0302001200616360, -0.0606933943927288, -0.2960600554943085, -0.2263117432594299, 0.0721478462219238, -0.4578711986541748, 0.0960150733590126, -0.1606502830982208, 0.2444226741790771, 0.0000882153908606, 0.1472496986389160, 0.3256779909133911, -0.2132861614227295, 0.0339313484728336), c2); + c2 = MulAdd(ml2, MF4x4(-0.1477648764848709, -0.1487885862588882, -0.1973863691091537, 0.0717295333743095, 0.0843430235981941, 0.6259996294975281, -0.1214931011199951, -0.1274987608194351, 0.2359549105167389, 0.3002171218395233, -0.0825233608484268, -0.0157950688153505, 0.0706149637699127, 0.1762917637825012, -0.0611497573554516, -0.0859689489006996), c2); + c2 = MulAdd(bl1, MF4x4(0.0174895934760571, -0.0567042417824268, 0.0409146919846535, 0.0258173532783985, 0.1421577036380768, 0.1234543323516846, -0.1721662431955338, 0.1492216140031815, 0.1100751459598541, 0.0501539446413517, 0.1100447699427605, -0.1086079254746437, -0.0608497932553291, 0.0087817469611764, 0.0714464113116264, -0.1285197436809540), c2); + c2 = MulAdd(bl2, MF4x4(-0.0017177806003019, -0.1463395059108734, -0.1085453778505325, 0.1650195866823196, 0.0813829153776169, 0.1102061793208122, -0.0578421875834465, -0.0232036896049976, -0.1239888817071915, 0.0155465165153146, 0.1079114526510239, -0.0420837886631489, -0.0775837749242783, 0.0148941157385707, -0.0502299368381500, -0.0654754191637039), c2); + c2 = MulAdd(tc1, MF4x4(0.0918162539601326, 0.0440697595477104, -0.0515748932957649, 0.0417411290109158, 0.0353216230869293, 0.1535954177379608, 0.0439723692834377, -0.1288845241069794, 0.1076577678322792, -0.1306740194559097, 0.0715952813625336, -0.0681907683610916, -0.3798767924308777, 0.1023928597569466, -0.0970670804381371, 0.0077168666757643), c2); + c2 = MulAdd(tc2, MF4x4(0.0634560957551003, -0.0550306066870689, 0.2073986232280731, 0.0520241297781467, 0.1162287592887878, -0.2218665480613708, 0.3199682831764221, 0.0606246069073677, -0.0058511858806014, -0.0667045339941978, -0.0449917949736118, 0.0707788690924644, -0.3323366343975067, -0.0763893201947212, -0.0997853428125381, -0.1181001588702202), c2); + c2 = MulAdd(mc1, MF4x4(-0.3101258873939514, 0.2616009712219238, 0.0584651045501232, 0.1656491309404373, -0.0069236233830452, 0.2573371529579163, -0.1793291717767715, -0.2718756198883057, 0.0953581258654594, 0.0524105131626129, 0.1183085516095161, 0.0583294369280338, 0.5036848187446594, -0.5763167142868042, -0.2119628041982651, -0.3140562772750854), c2); + c2 = MulAdd(mc2, MF4x4(-0.2497755438089371, -0.0146329319104552, -0.2741575539112091, 0.2459975033998489, 0.3562706708908081, -0.6528629064559937, -0.4287456274032593, 0.2055913358926773, 0.1739019453525543, -0.3855968713760376, -0.0958273336291313, -0.7066691517829895, 0.2365748286247253, -0.3046728968620300, -0.2590373754501343, -0.0496727414429188), c2); + c2 = MulAdd(bc1, MF4x4(-0.0844531357288361, -0.0321611948311329, -0.0951840654015541, 0.0577518045902252, -0.1606003493070602, 0.2776086628437042, -0.1355003118515015, -0.0880064144730568, -0.1277643740177155, -0.0514567233622074, 0.1522682905197144, -0.1040910631418228, -0.2767944037914276, -0.1452194601297379, 0.0089118303731084, 0.0231996178627014), c2); + c2 = MulAdd(bc2, MF4x4(0.2603267133235931, 0.0167464651167393, -0.2064073234796524, 0.1782064288854599, 0.4890212416648865, 0.0559245310723782, 0.1221160590648651, -0.0202587731182575, -0.4056585729122162, -0.1839511841535568, 0.2775998413562775, 0.0024275144096464, -0.2624500989913940, -0.0619418807327747, 0.0153478365391493, 0.0123427547514439), c2); + c2 = MulAdd(tr1, MF4x4(0.0816635638475418, -0.0134946266189218, 0.0594766475260258, -0.0551253929734230, 0.0134431896731257, -0.0652195811271667, -0.0563635528087616, -0.0066532371565700, -0.0004114551993553, 0.0105680683627725, 0.1324467360973358, 0.0467248968780041, 0.0301312971860170, -0.1073397025465965, -0.0363437235355377, -0.0474153012037277), c2); + c2 = MulAdd(tr2, MF4x4(0.0199097190052271, 0.0901319086551666, 0.0448978282511234, 0.0505443066358566, 0.0438878424465656, -0.0494784042239189, 0.0724927335977554, -0.0070675504393876, -0.0012125011999160, 0.0295279901474714, 0.0705125033855438, 0.0555334389209747, -0.0403393507003784, -0.1271172016859055, 0.0017914215568453, 0.1462216079235077), c2); + c2 = MulAdd(mr1, MF4x4(-0.2827299833297729, 0.2052399665117264, 0.0042732120491564, -0.3969024717807770, -0.0782120972871780, 0.1960176974534988, -0.0675340741872787, 0.0027962317690253, 0.0516129024326801, -0.0352642722427845, 0.0546326488256454, 0.0065340655855834, -0.1062376946210861, 0.1364430636167526, -0.0536947809159756, 0.2098117172718048), c2); + c2 = MulAdd(mr2, MF4x4(0.0045875865034759, 0.2162927240133286, -0.2158576399087906, -0.0047327815555036, 0.1251590698957443, 0.1279677897691727, -0.1188964918255806, 0.0328494384884834, 0.0076038073748350, -0.0561547242105007, 0.0335608273744583, 0.4332321286201477, 0.0021786799188703, 0.0844521671533585, -0.2102309316396713, -0.0189208015799522), c2); + c2 = MulAdd(br1, MF4x4(0.0933093801140785, 0.1548244059085846, -0.0598701611161232, 0.0357220247387886, -0.1141726672649384, 0.0536412484943867, -0.0159156844019890, -0.0445508137345314, 0.1883231997489929, -0.1547038406133652, 0.0530619807541370, 0.0059371814131737, 0.0602529086172581, -0.0435577929019928, 0.0083390390500426, 0.0191930737346411), c2); + c2 = MulAdd(br2, MF4x4(-0.0351041629910469, 0.2119503468275070, -0.0841927304863930, 0.0079463515430689, 0.0683520361781120, -0.1657009869813919, 0.0611055232584476, -0.0063667562790215, 0.0330024957656860, -0.1810818463563919, 0.0872574150562286, 0.1485669612884521, -0.1305806934833527, 0.0041402997449040, 0.0223289318382740, -0.0141495745629072), c2); c2 = max(c2, 0) + MF4(0.5769761204719543, 0.1716064810752869, -0.0821026712656021, 0.2092144042253494) * min(c2, 0); MF4 target1 = MF4(0.0245648548007011, -0.4467784762382507, 0.0197526942938566, -0.0110000418499112); - target1 += mul(c1, MF4x4(0.0302665308117867, -0.9262221455574036, -0.1161134764552116, -0.0506900474429131, 0.2716045379638672, -0.0485871583223343, 0.0044713355600834, -0.4274623394012451, 0.0749531090259552, -0.3700785338878632, 0.0350039415061474, -0.0540786534547806, -0.0607390031218529, -0.8019900321960449, 0.0923245251178741, 0.1258827745914459)); - target1 += mul(c2, MF4x4(-0.0649135261774063, 0.0815236791968346, 0.0067334296181798, 0.1277425885200500, -0.0051357815973461, -0.1485908329486847, 0.0074226572178304, 0.0050623500719666, 0.0588018335402012, -0.0692552924156189, 0.1288725286722183, -0.0989386290311813, 0.0427936837077141, 0.0967708528041840, -0.0455632135272026, -0.0711275041103363)); + target1 = MulAdd(c1, MF4x4(0.0302665308117867, -0.9262221455574036, -0.1161134764552116, -0.0506900474429131, 0.2716045379638672, -0.0485871583223343, 0.0044713355600834, -0.4274623394012451, 0.0749531090259552, -0.3700785338878632, 0.0350039415061474, -0.0540786534547806, -0.0607390031218529, -0.8019900321960449, 0.0923245251178741, 0.1258827745914459), target1); + target1 = MulAdd(c2, MF4x4(-0.0649135261774063, 0.0815236791968346, 0.0067334296181798, 0.1277425885200500, -0.0051357815973461, -0.1485908329486847, 0.0074226572178304, 0.0050623500719666, 0.0588018335402012, -0.0692552924156189, 0.1288725286722183, -0.0989386290311813, 0.0427936837077141, 0.0967708528041840, -0.0455632135272026, -0.0711275041103363), target1); target1 += featureMap1.SampleLevel(sam, pos, 0); target1 = max(target1, 0) + MF4(0.9927186965942383, 0.0570580027997494, 1.3226752281188965, 1.0069466829299927) * min(target1, 0); MF4 target2 = MF4(-0.0425243787467480, -0.3715015351772308, -0.0256227850914001, -0.2774516046047211); - target2 += mul(c1, MF4x4(0.0238118842244148, 0.0295480657368898, -0.0066418983042240, 0.1021223962306976, -0.0568209178745747, -0.4355100393295288, -0.2700522541999817, -0.2060186564922333, -0.0689613372087479, -0.1689691990613937, -0.0306748505681753, -0.2461252212524414, -0.0057375836186111, -0.1892303228378296, -0.0285871494561434, -0.5032613277435303)); - target2 += mul(c2, MF4x4(0.5463213324546814, 0.0972800329327583, 0.0307560767978430, 0.0678058937191963, -0.0356063023209572, -0.7013865113258362, 0.1890443563461304, -0.1036657467484474, -0.1745826154947281, -0.2942218780517578, -0.0485423319041729, -0.2983124554157257, -0.0524431839585304, -0.3261034786701202, 0.3217246532440186, 0.1958018541336060)); + target2 = MulAdd(c1, MF4x4(0.0238118842244148, 0.0295480657368898, -0.0066418983042240, 0.1021223962306976, -0.0568209178745747, -0.4355100393295288, -0.2700522541999817, -0.2060186564922333, -0.0689613372087479, -0.1689691990613937, -0.0306748505681753, -0.2461252212524414, -0.0057375836186111, -0.1892303228378296, -0.0285871494561434, -0.5032613277435303), target2); + target2 = MulAdd(c2, MF4x4(0.5463213324546814, 0.0972800329327583, 0.0307560767978430, 0.0678058937191963, -0.0356063023209572, -0.7013865113258362, 0.1890443563461304, -0.1036657467484474, -0.1745826154947281, -0.2942218780517578, -0.0485423319041729, -0.2983124554157257, -0.0524431839585304, -0.3261034786701202, 0.3217246532440186, 0.1958018541336060), target2); target2 += featureMap2.SampleLevel(sam, pos, 0); target2 = max(target2, 0) + MF4(0.1391339898109436, 0.0960328355431557, 0.6235341429710388, 0.1177272796630859) * min(target2, 0); @@ -590,24 +592,24 @@ void Pass6(uint2 blockStart, uint3 threadId) { MF4 br2 = tex4.SampleLevel(sam, pos + inputPt, 0); MF4 result = { 0.2010385394096375,0.2058132737874985,0.1918809115886688,0.1961363703012466 }; - result += mul(tl1, MF4x4(-0.0005980331334285, -0.0095877395942807, -0.0149448839947581, -0.0026380482595414, 0.0320665836334229, -0.0706205591559410, -0.0054677254520357, 0.0215112231671810, -0.0025710910558701, -0.0000433265340689, 0.0044494951143861, -0.0034823501482606, -0.0050858515314758, 0.0109513988718390, 0.0208286065608263, -0.0032168829347938)); - result += mul(tl2, MF4x4(-0.0145305208861828, 0.0246876608580351, -0.0038286084309220, -0.0033089490607381, -0.0920709222555161, -0.0767898634076118, 0.0012083095498383, -0.0751532614231110, 0.0001302754972130, -0.0107085108757019, -0.0010383903281763, -0.0059571005403996, 0.0809685289859772, 0.0414833538234234, 0.0227938480675220, -0.0211347509175539)); - result += mul(ml1, MF4x4(0.0160999298095703, 0.0364215746521950, -0.0377063788473606, -0.0449111759662628, -0.0476365163922310, 0.1522845029830933, -0.0131391752511263, -0.0476671792566776, -0.0378389135003090, 0.0235454943031073, 0.0224007442593575, -0.0010372076649219, -0.0089435689151287, -0.0293026417493820, 0.0274190884083509, 0.0469092652201653)); - result += mul(ml2, MF4x4(0.0297575183212757, -0.0132508194074035, -0.0044682323932648, -0.0096222748979926, 0.2525918781757355, 0.1873829364776611, -0.5599535703659058, -0.2372044622898102, 0.0033207221422344, 0.0256173480302095, 0.0294605866074562, 0.0323960892856121, -0.1679904460906982, -0.1278967708349228, 0.3168168365955353, 0.1978507637977600)); - result += mul(bl1, MF4x4(-0.0047590560279787, -0.0149335600435734, 0.0033453819341958, -0.0012247267877683, 0.1112466752529144, 0.0147760482504964, 0.0031189601868391, 0.0391573049128056, -0.0028154491446912, -0.0036881719715893, -0.0116015253588557, -0.0037573333829641, 0.0047581391409039, 0.0071071563288569, -0.0033221673220396, 0.0004882142529823)); - result += mul(bl2, MF4x4(-0.0025197160430253, -0.0018677815096453, 0.0038254233077168, 0.0041981274262071, -0.1321131736040115, -0.0494364202022552, 0.0760654658079147, -0.1386690139770508, -0.0016222692793235, -0.0060105528682470, 0.0010201989207417, 0.0092753591015935, -0.0194614846259356, 0.0087382243946195, -0.0606758072972298, 0.0156162241473794)); - result += mul(tc1, MF4x4(-0.0073722628876567, 0.0012844242155552, 0.0241398401558399, -0.0075527969747782, -0.0865194946527481, -0.0610522404313087, 0.0289319511502981, -0.0994452014565468, 0.0281447004526854, -0.0250582899898291, 0.0044891634024680, -0.0246205236762762, 0.0112307453528047, -0.0010844616917893, -0.0223584957420826, 0.0177635718137026)); - result += mul(tc2, MF4x4(-0.0585863515734673, 0.0953190475702286, -0.0555586628615856, 0.1033507287502289, 0.1560877263545990, -0.0690897777676582, -0.0341389514505863, -0.0661668032407761, 0.0531073249876499, -0.0266165956854820, -0.0203275382518768, 0.0017760475166142, -0.1300747394561768, 0.1810652017593384, 0.0381597876548767, 0.1397419273853302)); - result += mul(mc1, MF4x4(0.6259804368019104, 0.6062518954277039, 0.5450409054756165, 0.5966195464134216, -0.0423948727548122, 0.0760537460446358, -0.0113651463761926, 0.3007817566394806, -0.3218322694301605, 0.2713021934032440, -0.3143473267555237, 0.2303840517997742, 0.3493050038814545, 0.3590726852416992, 0.4138027429580688, 0.3391666412353516)); - result += mul(mc2, MF4x4(0.0790478289127350, -0.0978994593024254, 0.0779844969511032, -0.0823706611990929, 0.0094470111653209, 0.1671760678291321, 0.1201528310775757, -0.2016288936138153, 0.3667598366737366, 0.3651430010795593, -0.3612343966960907, -0.2978236973285675, -0.4231655597686768, 0.0091423410922289, -0.1918412446975708, 0.4224558770656586)); - result += mul(bc1, MF4x4(-0.0186564289033413, 0.0274957418441772, -0.0064405309967697, 0.0056951809674501, 0.4864942431449890, -0.2563461959362030, 0.4357284605503082, -0.2976118028163910, 0.0374982468783855, 0.0167757049202919, 0.0305800959467888, 0.0232830215245485, 0.0138373551890254, -0.0191283021122217, 0.0032355054281652, 0.0055057541467249)); - result += mul(bc2, MF4x4(-0.0276355985552073, 0.0048149987123907, -0.0251619722694159, -0.0057246969081461, 0.0271473955363035, -0.0042668608948588, -0.0594691745936871, 0.2255926281213760, -0.0203660242259502, 0.0721646770834923, 0.0137230781838298, -0.0650938376784325, -0.3049557507038116, 0.2035628110170364, -0.2509683668613434, 0.1962853819131851)); - result += mul(tr1, MF4x4(0.0109980758279562, -0.0053752651438117, -0.0112550277262926, 0.0024017230607569, 0.0362104885280132, 0.0084348218515515, -0.0106990104541183, -0.0207723993808031, -0.0014961160486564, 0.0066790678538382, 0.0028113177977502, 0.0025022011250257, -0.0093937022611499, 0.0016421369509771, 0.0035362334456295, -0.0058064293116331)); - result += mul(tr2, MF4x4(0.0138889988884330, -0.0078343702480197, 0.0061464929021895, 0.0202130675315857, -0.0257590841501951, -0.0366640128195286, 0.0250097587704659, -0.0498071312904358, -0.0103149358183146, -0.0001786737266229, -0.0099909817799926, 0.0062733208760619, 0.0131437368690968, -0.0005469865864143, -0.0388854071497917, 0.0612070746719837)); - result += mul(mr1, MF4x4(0.0052813654765487, 0.0215748809278011, 0.0107395220547915, -0.0079439217224717, 0.0382786765694618, 0.0697424262762070, -0.0415962152183056, 0.0657853558659554, 0.0209470037370920, -0.0218399092555046, -0.0447359494864941, 0.0407319553196430, -0.0040902681648731, -0.0196106657385826, -0.0018554026028141, 0.0203906055539846)); - result += mul(mr2, MF4x4(-0.0106181986629963, 0.0084018819034100, 0.0131329754367471, -0.0198754761368036, 0.1117177084088326, 0.0990846082568169, -0.0732304081320763, 0.0163581725209951, -0.0648830309510231, -0.0451613292098045, 0.0206844564527273, 0.0031441387254745, -0.0106161693111062, -0.0567689687013626, 0.0782861113548279, -0.0306094046682119)); - result += mul(br1, MF4x4(0.0012452082009986, -0.0026056850329041, -0.0096226977184415, -0.0037850935477763, -0.0190967041999102, 0.0534373670816422, 0.1599360853433609, 0.0834670960903168, -0.0070255175232887, 0.0012873009545729, 0.0030876772943884, -0.0093916896730661, -0.0033529615029693, 0.0043485122732818, 0.0089034689590335, -0.0067489291541278)); - result += mul(br2, MF4x4(0.0004713654634543, -0.0034161377698183, -0.0026913962792605, 0.0053522582165897, -0.0040974905714393, 0.0273330621421337, -0.0333138220012188, -0.0701237097382545, 0.0082997502759099, -0.0183656588196754, -0.0122841577976942, -0.0052855615504086, -0.0023795007728040, -0.0438593104481697, -0.1101513057947159, -0.0182559806853533)); + result = MulAdd(tl1, MF4x4(-0.0005980331334285, -0.0095877395942807, -0.0149448839947581, -0.0026380482595414, 0.0320665836334229, -0.0706205591559410, -0.0054677254520357, 0.0215112231671810, -0.0025710910558701, -0.0000433265340689, 0.0044494951143861, -0.0034823501482606, -0.0050858515314758, 0.0109513988718390, 0.0208286065608263, -0.0032168829347938), result); + result = MulAdd(tl2, MF4x4(-0.0145305208861828, 0.0246876608580351, -0.0038286084309220, -0.0033089490607381, -0.0920709222555161, -0.0767898634076118, 0.0012083095498383, -0.0751532614231110, 0.0001302754972130, -0.0107085108757019, -0.0010383903281763, -0.0059571005403996, 0.0809685289859772, 0.0414833538234234, 0.0227938480675220, -0.0211347509175539), result); + result = MulAdd(ml1, MF4x4(0.0160999298095703, 0.0364215746521950, -0.0377063788473606, -0.0449111759662628, -0.0476365163922310, 0.1522845029830933, -0.0131391752511263, -0.0476671792566776, -0.0378389135003090, 0.0235454943031073, 0.0224007442593575, -0.0010372076649219, -0.0089435689151287, -0.0293026417493820, 0.0274190884083509, 0.0469092652201653), result); + result = MulAdd(ml2, MF4x4(0.0297575183212757, -0.0132508194074035, -0.0044682323932648, -0.0096222748979926, 0.2525918781757355, 0.1873829364776611, -0.5599535703659058, -0.2372044622898102, 0.0033207221422344, 0.0256173480302095, 0.0294605866074562, 0.0323960892856121, -0.1679904460906982, -0.1278967708349228, 0.3168168365955353, 0.1978507637977600), result); + result = MulAdd(bl1, MF4x4(-0.0047590560279787, -0.0149335600435734, 0.0033453819341958, -0.0012247267877683, 0.1112466752529144, 0.0147760482504964, 0.0031189601868391, 0.0391573049128056, -0.0028154491446912, -0.0036881719715893, -0.0116015253588557, -0.0037573333829641, 0.0047581391409039, 0.0071071563288569, -0.0033221673220396, 0.0004882142529823), result); + result = MulAdd(bl2, MF4x4(-0.0025197160430253, -0.0018677815096453, 0.0038254233077168, 0.0041981274262071, -0.1321131736040115, -0.0494364202022552, 0.0760654658079147, -0.1386690139770508, -0.0016222692793235, -0.0060105528682470, 0.0010201989207417, 0.0092753591015935, -0.0194614846259356, 0.0087382243946195, -0.0606758072972298, 0.0156162241473794), result); + result = MulAdd(tc1, MF4x4(-0.0073722628876567, 0.0012844242155552, 0.0241398401558399, -0.0075527969747782, -0.0865194946527481, -0.0610522404313087, 0.0289319511502981, -0.0994452014565468, 0.0281447004526854, -0.0250582899898291, 0.0044891634024680, -0.0246205236762762, 0.0112307453528047, -0.0010844616917893, -0.0223584957420826, 0.0177635718137026), result); + result = MulAdd(tc2, MF4x4(-0.0585863515734673, 0.0953190475702286, -0.0555586628615856, 0.1033507287502289, 0.1560877263545990, -0.0690897777676582, -0.0341389514505863, -0.0661668032407761, 0.0531073249876499, -0.0266165956854820, -0.0203275382518768, 0.0017760475166142, -0.1300747394561768, 0.1810652017593384, 0.0381597876548767, 0.1397419273853302), result); + result = MulAdd(mc1, MF4x4(0.6259804368019104, 0.6062518954277039, 0.5450409054756165, 0.5966195464134216, -0.0423948727548122, 0.0760537460446358, -0.0113651463761926, 0.3007817566394806, -0.3218322694301605, 0.2713021934032440, -0.3143473267555237, 0.2303840517997742, 0.3493050038814545, 0.3590726852416992, 0.4138027429580688, 0.3391666412353516), result); + result = MulAdd(mc2, MF4x4(0.0790478289127350, -0.0978994593024254, 0.0779844969511032, -0.0823706611990929, 0.0094470111653209, 0.1671760678291321, 0.1201528310775757, -0.2016288936138153, 0.3667598366737366, 0.3651430010795593, -0.3612343966960907, -0.2978236973285675, -0.4231655597686768, 0.0091423410922289, -0.1918412446975708, 0.4224558770656586), result); + result = MulAdd(bc1, MF4x4(-0.0186564289033413, 0.0274957418441772, -0.0064405309967697, 0.0056951809674501, 0.4864942431449890, -0.2563461959362030, 0.4357284605503082, -0.2976118028163910, 0.0374982468783855, 0.0167757049202919, 0.0305800959467888, 0.0232830215245485, 0.0138373551890254, -0.0191283021122217, 0.0032355054281652, 0.0055057541467249), result); + result = MulAdd(bc2, MF4x4(-0.0276355985552073, 0.0048149987123907, -0.0251619722694159, -0.0057246969081461, 0.0271473955363035, -0.0042668608948588, -0.0594691745936871, 0.2255926281213760, -0.0203660242259502, 0.0721646770834923, 0.0137230781838298, -0.0650938376784325, -0.3049557507038116, 0.2035628110170364, -0.2509683668613434, 0.1962853819131851), result); + result = MulAdd(tr1, MF4x4(0.0109980758279562, -0.0053752651438117, -0.0112550277262926, 0.0024017230607569, 0.0362104885280132, 0.0084348218515515, -0.0106990104541183, -0.0207723993808031, -0.0014961160486564, 0.0066790678538382, 0.0028113177977502, 0.0025022011250257, -0.0093937022611499, 0.0016421369509771, 0.0035362334456295, -0.0058064293116331), result); + result = MulAdd(tr2, MF4x4(0.0138889988884330, -0.0078343702480197, 0.0061464929021895, 0.0202130675315857, -0.0257590841501951, -0.0366640128195286, 0.0250097587704659, -0.0498071312904358, -0.0103149358183146, -0.0001786737266229, -0.0099909817799926, 0.0062733208760619, 0.0131437368690968, -0.0005469865864143, -0.0388854071497917, 0.0612070746719837), result); + result = MulAdd(mr1, MF4x4(0.0052813654765487, 0.0215748809278011, 0.0107395220547915, -0.0079439217224717, 0.0382786765694618, 0.0697424262762070, -0.0415962152183056, 0.0657853558659554, 0.0209470037370920, -0.0218399092555046, -0.0447359494864941, 0.0407319553196430, -0.0040902681648731, -0.0196106657385826, -0.0018554026028141, 0.0203906055539846), result); + result = MulAdd(mr2, MF4x4(-0.0106181986629963, 0.0084018819034100, 0.0131329754367471, -0.0198754761368036, 0.1117177084088326, 0.0990846082568169, -0.0732304081320763, 0.0163581725209951, -0.0648830309510231, -0.0451613292098045, 0.0206844564527273, 0.0031441387254745, -0.0106161693111062, -0.0567689687013626, 0.0782861113548279, -0.0306094046682119), result); + result = MulAdd(br1, MF4x4(0.0012452082009986, -0.0026056850329041, -0.0096226977184415, -0.0037850935477763, -0.0190967041999102, 0.0534373670816422, 0.1599360853433609, 0.0834670960903168, -0.0070255175232887, 0.0012873009545729, 0.0030876772943884, -0.0093916896730661, -0.0033529615029693, 0.0043485122732818, 0.0089034689590335, -0.0067489291541278), result); + result = MulAdd(br2, MF4x4(0.0004713654634543, -0.0034161377698183, -0.0026913962792605, 0.0053522582165897, -0.0040974905714393, 0.0273330621421337, -0.0333138220012188, -0.0701237097382545, 0.0082997502759099, -0.0183656588196754, -0.0122841577976942, -0.0052855615504086, -0.0023795007728040, -0.0438593104481697, -0.1101513057947159, -0.0182559806853533), result); [unroll] for (uint i = 0; i <= 1; ++i) { diff --git a/src/Effects/FSRCNNX/FSRCNNX_LineArt.hlsl b/src/Effects/FSRCNNX/FSRCNNX_LineArt.hlsl index 459ac5b8a..f66f93688 100644 --- a/src/Effects/FSRCNNX/FSRCNNX_LineArt.hlsl +++ b/src/Effects/FSRCNNX/FSRCNNX_LineArt.hlsl @@ -3,6 +3,9 @@ //!MAGPIE EFFECT //!VERSION 4 +//!USE FP16, MulAdd + +#include "..\StubDefs.hlsli" //!TEXTURE @@ -67,10 +70,10 @@ SamplerState sam1; #define SH_PIXELS_X (MP_BLOCK_WIDTH + 4) #define SH_PIXELS_Y (MP_BLOCK_HEIGHT + 4) -groupshared float shPixelsY[SH_PIXELS_Y][SH_PIXELS_X]; +groupshared MF shPixelsY[SH_PIXELS_Y][SH_PIXELS_X]; -float GetLuma(float3 rgb) { - return dot(float3(0.299f, 0.587f, 0.114f), rgb); +MF GetLuma(MF3 rgb) { + return dot(MF3(0.299f, 0.587f, 0.114f), rgb); } void Pass1(uint2 blockStart, uint3 threadId) { @@ -82,14 +85,14 @@ void Pass1(uint2 blockStart, uint3 threadId) { uint2 pos = uint2(i % SH_PIXELS_X, i / SH_PIXELS_X * 2); const float2 tpos = (blockStart + pos - 1.5f) * inputPt; - const float4 sr = INPUT.GatherRed(sam, tpos); - const float4 sg = INPUT.GatherGreen(sam, tpos); - const float4 sb = INPUT.GatherBlue(sam, tpos); + const MF4 sr = INPUT.GatherRed(sam, tpos); + const MF4 sg = INPUT.GatherGreen(sam, tpos); + const MF4 sb = INPUT.GatherBlue(sam, tpos); - shPixelsY[pos.y][pos.x] = GetLuma(float3(sr.w, sg.w, sb.w)); - shPixelsY[pos.y][pos.x + 1] = GetLuma(float3(sr.z, sg.z, sb.z)); - shPixelsY[pos.y + 1][pos.x] = GetLuma(float3(sr.x, sg.x, sb.x)); - shPixelsY[pos.y + 1][pos.x + 1] = GetLuma(float3(sr.y, sg.y, sb.y)); + shPixelsY[pos.y][pos.x] = GetLuma(MF3(sr.w, sg.w, sb.w)); + shPixelsY[pos.y][pos.x + 1] = GetLuma(MF3(sr.z, sg.z, sb.z)); + shPixelsY[pos.y + 1][pos.x] = GetLuma(MF3(sr.x, sg.x, sb.x)); + shPixelsY[pos.y + 1][pos.x + 1] = GetLuma(MF3(sr.y, sg.y, sb.y)); } GroupMemoryBarrierWithGroupSync(); @@ -102,7 +105,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { continue; } - float src[5][5]; + MF src[5][5]; [unroll] for (int i = 0; i < 5; ++i) { [unroll] @@ -111,59 +114,59 @@ void Pass1(uint2 blockStart, uint3 threadId) { } } - float4 target1 = float4(-0.3117050230503082, 0.1817725896835327, 0.0011673698900267, -0.0044658286496997); - target1 += float4(-0.0187959559261799, -0.0206312909722328, 0.0226501729339361, 0.0111862262710929) * src[0][0]; - target1 += float4(0.0469042696058750, 0.0428658165037632, -0.0208927169442177, -0.0053485808894038) * src[0][1]; - target1 += float4(0.0486242026090622, 0.0268428903073072, -0.1095351055264473, -0.0197027549147606) * src[0][2]; - target1 += float4(-0.0301427692174911, -0.0444439016282558, 0.0803908482193947, -0.0072240661829710) * src[0][3]; - target1 += float4(0.0097448397427797, 0.0132117131724954, -0.0087575586512685, 0.0003270092420280) * src[0][4]; - target1 += float4(0.0227436870336533, 0.0284603293985128, -0.0899902656674385, 0.0174379274249077) * src[1][0]; - target1 += float4(-0.0880827009677887, -0.0890802741050720, 0.3386772871017456, -0.0749290063977242) * src[1][1]; - target1 += float4(-0.0832799598574638, -0.1518420130014420, 0.1693033277988434, 0.1514045447111130) * src[1][2]; - target1 += float4(0.0490957386791706, 0.0839962288737297, 0.0323486365377903, -0.0491475425660610) * src[1][3]; - target1 += float4(0.0281097982078791, 0.0267692077904940, -0.0460123419761658, 0.0137899341061711) * src[1][4]; - target1 += float4(0.0592067055404186, -0.0008030450553633, 0.1280025541782379, -0.0270480886101723) * src[2][0]; - target1 += float4(-0.0784756019711494, -0.0078630214557052, -0.1963789612054825, 0.2132134586572647) * src[2][1]; - target1 += float4(0.9478371739387512, -0.7432878613471985, -0.4691794812679291, -0.4196422100067139) * src[2][2]; - target1 += float4(0.1578149050474167, -0.0874812081456184, 0.1223142221570015, 0.2514914274215698) * src[2][3]; - target1 += float4(0.0576529577374458, 0.0775778889656067, 0.0526014007627964, -0.1151828765869141) * src[2][4]; - target1 += float4(-0.0459806136786938, -0.0550342053174973, -0.0553226508200169, -0.0042642662301660) * src[3][0]; - target1 += float4(0.1346504986286163, 0.1795998811721802, -0.0741422399878502, -0.0004661275597755) * src[3][1]; - target1 += float4(-0.0344312079250813, -0.0998986735939980, 0.2834288179874420, 0.1789152175188065) * src[3][2]; - target1 += float4(-0.0376542955636978, -0.0137260686606169, -0.2183600962162018, -0.0829529240727425) * src[3][3]; - target1 += float4(0.0143303163349628, 0.0085790483281016, 0.0312815308570862, 0.0557830408215523) * src[3][4]; - target1 += float4(0.0196402054280043, 0.0245775021612644, 0.0333996489644051, 0.0064323167316616) * src[4][0]; - target1 += float4(-0.0247105974704027, -0.0139399459585547, 0.0039188005030155, 0.0138866743072867) * src[4][1]; - target1 += float4(0.0688862130045891, 0.0629303157329559, -0.0323157459497452, -0.1300792843103409) * src[4][2]; - target1 += float4(0.0111092608422041, 0.0116711426526308, 0.0460555553436279, 0.0563828162848949) * src[4][3]; - target1 += float4(-0.0043270774185658, -0.0096766958013177, -0.0235258601605892, -0.0409700050950050) * src[4][4]; - - float4 target2 = float4(0.0165165197104216, 0.0061719734221697, -0.0008248710073531, -0.0774794667959213); - target2 += float4(-0.0127812735736370, -0.0146999256685376, 0.0025963818188757, 0.0008133125957102) * src[0][0]; - target2 += float4(0.0192508958280087, 0.0089628640562296, 0.0046624913811684, -0.0005601323791780) * src[0][1]; - target2 += float4(-0.1021092385053635, -0.0491660982370377, -0.0818324312567711, -0.0719010531902313) * src[0][2]; - target2 += float4(0.0166876111179590, -0.0046075899153948, 0.0258100070059299, -0.0235325042158365) * src[0][3]; - target2 += float4(-0.0028500237967819, -0.0020616643596441, -0.0073093594983220, -0.0034190006554127) * src[0][4]; - target2 += float4(0.0024815262295306, 0.0222324915230274, -0.0080765523016453, 0.0105959763750434) * src[1][0]; - target2 += float4(0.1017390340566635, 0.0138921840116382, 0.0559288635849953, -0.0168517548590899) * src[1][1]; - target2 += float4(0.1267367750406265, -0.2365809977054596, 0.4724994897842407, -0.0154752098023891) * src[1][2]; - target2 += float4(0.0847241580486298, 0.1127829849720001, -0.0643212646245956, 0.0177757386118174) * src[1][3]; - target2 += float4(-0.0354492329061031, -0.0234994646161795, 0.0336676724255085, 0.0153558924794197) * src[1][4]; - target2 += float4(-0.1001686528325081, 0.0175829399377108, -0.0146998856216669, -0.0897502079606056) * src[2][0]; - target2 += float4(0.0973328053951263, -0.5987607836723328, -0.0770601108670235, 0.2343221157789230) * src[2][1]; - target2 += float4(-1.0639246702194214, 0.5335622429847717, -0.2365868240594864, 0.6484431028366089) * src[2][2]; - target2 += float4(-0.0258918590843678, 0.1439655423164368, 0.2597847878932953, -0.5380389094352722) * src[2][3]; - target2 += float4(0.0333042629063129, -0.0408495217561722, 0.0026879014912993, 0.0496195442974567) * src[2][4]; - target2 += float4(0.0017764334334061, 0.0032939016819000, -0.0121603077277541, -0.0066827093251050) * src[3][0]; - target2 += float4(0.0497846752405167, 0.0766935721039772, 0.0505562871694565, 0.0058483541943133) * src[3][1]; - target2 += float4(0.6903248429298401, 0.0658241882920265, -0.4562527537345886, -0.0117225451394916) * src[3][2]; - target2 += float4(0.1896255612373352, -0.0459045991301537, -0.0380226671695709, -0.0333303771913052) * src[3][3]; - target2 += float4(-0.0868696048855782, 0.0157926902174950, 0.0011628456413746, 0.0207170285284519) * src[3][4]; - target2 += float4(0.0130701754242182, -0.0067251212894917, -0.0007082104566507, -0.0017002354143187) * src[4][0]; - target2 += float4(0.0029672298114747, -0.0060487915761769, 0.0191176552325487, 0.0520425662398338) * src[4][1]; - target2 += float4(-0.0253955777734518, -0.0159530192613602, 0.0304108783602715, -0.0263646803796291) * src[4][2]; - target2 += float4(-0.0708072409033775, 0.0109798992052674, 0.0285820439457893, 0.0188453849405050) * src[4][3]; - target2 += float4(0.0698847994208336, -0.0164128411561251, 0.0043246182613075, -0.0244176983833313) * src[4][4]; + MF4 target1 = MF4(-0.3117050230503082, 0.1817725896835327, 0.0011673698900267, -0.0044658286496997); + target1 = mad(src[0][0], MF4(-0.0187959559261799, -0.0206312909722328, 0.0226501729339361, 0.0111862262710929), target1); + target1 = mad(src[0][1], MF4(0.0469042696058750, 0.0428658165037632, -0.0208927169442177, -0.0053485808894038), target1); + target1 = mad(src[0][2], MF4(0.0486242026090622, 0.0268428903073072, -0.1095351055264473, -0.0197027549147606), target1); + target1 = mad(src[0][3], MF4(-0.0301427692174911, -0.0444439016282558, 0.0803908482193947, -0.0072240661829710), target1); + target1 = mad(src[0][4], MF4(0.0097448397427797, 0.0132117131724954, -0.0087575586512685, 0.0003270092420280), target1); + target1 = mad(src[1][0], MF4(0.0227436870336533, 0.0284603293985128, -0.0899902656674385, 0.0174379274249077), target1); + target1 = mad(src[1][1], MF4(-0.0880827009677887, -0.0890802741050720, 0.3386772871017456, -0.0749290063977242), target1); + target1 = mad(src[1][2], MF4(-0.0832799598574638, -0.1518420130014420, 0.1693033277988434, 0.1514045447111130), target1); + target1 = mad(src[1][3], MF4(0.0490957386791706, 0.0839962288737297, 0.0323486365377903, -0.0491475425660610), target1); + target1 = mad(src[1][4], MF4(0.0281097982078791, 0.0267692077904940, -0.0460123419761658, 0.0137899341061711), target1); + target1 = mad(src[2][0], MF4(0.0592067055404186, -0.0008030450553633, 0.1280025541782379, -0.0270480886101723), target1); + target1 = mad(src[2][1], MF4(-0.0784756019711494, -0.0078630214557052, -0.1963789612054825, 0.2132134586572647), target1); + target1 = mad(src[2][2], MF4(0.9478371739387512, -0.7432878613471985, -0.4691794812679291, -0.4196422100067139), target1); + target1 = mad(src[2][3], MF4(0.1578149050474167, -0.0874812081456184, 0.1223142221570015, 0.2514914274215698), target1); + target1 = mad(src[2][4], MF4(0.0576529577374458, 0.0775778889656067, 0.0526014007627964, -0.1151828765869141), target1); + target1 = mad(src[3][0], MF4(-0.0459806136786938, -0.0550342053174973, -0.0553226508200169, -0.0042642662301660), target1); + target1 = mad(src[3][1], MF4(0.1346504986286163, 0.1795998811721802, -0.0741422399878502, -0.0004661275597755), target1); + target1 = mad(src[3][2], MF4(-0.0344312079250813, -0.0998986735939980, 0.2834288179874420, 0.1789152175188065), target1); + target1 = mad(src[3][3], MF4(-0.0376542955636978, -0.0137260686606169, -0.2183600962162018, -0.0829529240727425), target1); + target1 = mad(src[3][4], MF4(0.0143303163349628, 0.0085790483281016, 0.0312815308570862, 0.0557830408215523), target1); + target1 = mad(src[4][0], MF4(0.0196402054280043, 0.0245775021612644, 0.0333996489644051, 0.0064323167316616), target1); + target1 = mad(src[4][1], MF4(-0.0247105974704027, -0.0139399459585547, 0.0039188005030155, 0.0138866743072867), target1); + target1 = mad(src[4][2], MF4(0.0688862130045891, 0.0629303157329559, -0.0323157459497452, -0.1300792843103409), target1); + target1 = mad(src[4][3], MF4(0.0111092608422041, 0.0116711426526308, 0.0460555553436279, 0.0563828162848949), target1); + target1 = mad(src[4][4], MF4(-0.0043270774185658, -0.0096766958013177, -0.0235258601605892, -0.0409700050950050), target1); + + MF4 target2 = MF4(0.0165165197104216, 0.0061719734221697, -0.0008248710073531, -0.0774794667959213); + target2 = mad(src[0][0], MF4(-0.0127812735736370, -0.0146999256685376, 0.0025963818188757, 0.0008133125957102), target2); + target2 = mad(src[0][1], MF4(0.0192508958280087, 0.0089628640562296, 0.0046624913811684, -0.0005601323791780), target2); + target2 = mad(src[0][2], MF4(-0.1021092385053635, -0.0491660982370377, -0.0818324312567711, -0.0719010531902313), target2); + target2 = mad(src[0][3], MF4(0.0166876111179590, -0.0046075899153948, 0.0258100070059299, -0.0235325042158365), target2); + target2 = mad(src[0][4], MF4(-0.0028500237967819, -0.0020616643596441, -0.0073093594983220, -0.0034190006554127), target2); + target2 = mad(src[1][0], MF4(0.0024815262295306, 0.0222324915230274, -0.0080765523016453, 0.0105959763750434), target2); + target2 = mad(src[1][1], MF4(0.1017390340566635, 0.0138921840116382, 0.0559288635849953, -0.0168517548590899), target2); + target2 = mad(src[1][2], MF4(0.1267367750406265, -0.2365809977054596, 0.4724994897842407, -0.0154752098023891), target2); + target2 = mad(src[1][3], MF4(0.0847241580486298, 0.1127829849720001, -0.0643212646245956, 0.0177757386118174), target2); + target2 = mad(src[1][4], MF4(-0.0354492329061031, -0.0234994646161795, 0.0336676724255085, 0.0153558924794197), target2); + target2 = mad(src[2][0], MF4(-0.1001686528325081, 0.0175829399377108, -0.0146998856216669, -0.0897502079606056), target2); + target2 = mad(src[2][1], MF4(0.0973328053951263, -0.5987607836723328, -0.0770601108670235, 0.2343221157789230), target2); + target2 = mad(src[2][2], MF4(-1.0639246702194214, 0.5335622429847717, -0.2365868240594864, 0.6484431028366089), target2); + target2 = mad(src[2][3], MF4(-0.0258918590843678, 0.1439655423164368, 0.2597847878932953, -0.5380389094352722), target2); + target2 = mad(src[2][4], MF4(0.0333042629063129, -0.0408495217561722, 0.0026879014912993, 0.0496195442974567), target2); + target2 = mad(src[3][0], MF4(0.0017764334334061, 0.0032939016819000, -0.0121603077277541, -0.0066827093251050), target2); + target2 = mad(src[3][1], MF4(0.0497846752405167, 0.0766935721039772, 0.0505562871694565, 0.0058483541943133), target2); + target2 = mad(src[3][2], MF4(0.6903248429298401, 0.0658241882920265, -0.4562527537345886, -0.0117225451394916), target2); + target2 = mad(src[3][3], MF4(0.1896255612373352, -0.0459045991301537, -0.0380226671695709, -0.0333303771913052), target2); + target2 = mad(src[3][4], MF4(-0.0868696048855782, 0.0157926902174950, 0.0011628456413746, 0.0207170285284519), target2); + target2 = mad(src[4][0], MF4(0.0130701754242182, -0.0067251212894917, -0.0007082104566507, -0.0017002354143187), target2); + target2 = mad(src[4][1], MF4(0.0029672298114747, -0.0060487915761769, 0.0191176552325487, 0.0520425662398338), target2); + target2 = mad(src[4][2], MF4(-0.0253955777734518, -0.0159530192613602, 0.0304108783602715, -0.0263646803796291), target2); + target2 = mad(src[4][3], MF4(-0.0708072409033775, 0.0109798992052674, 0.0285820439457893, 0.0188453849405050), target2); + target2 = mad(src[4][4], MF4(0.0698847994208336, -0.0164128411561251, 0.0043246182613075, -0.0244176983833313), target2); featureMap1[destPos] = target1; featureMap2[destPos] = target2; @@ -191,67 +194,67 @@ void Pass2(uint2 blockStart, uint3 threadId) { // [tl, tc, tr] // [ml, mc, mr] // [bl, bc, br] - float4 tl1 = featureMap1.SampleLevel(sam, pos - inputPt, 0); - float4 ml1 = featureMap1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); - float4 bl1 = featureMap1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); - float4 tc1 = featureMap1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); - float4 mc1 = featureMap1.SampleLevel(sam, pos, 0); - float4 bc1 = featureMap1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); - float4 tr1 = featureMap1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); - float4 mr1 = featureMap1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); - float4 br1 = featureMap1.SampleLevel(sam, pos + inputPt, 0); - - float4 tl2 = featureMap2.SampleLevel(sam, pos - inputPt, 0); - float4 ml2 = featureMap2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); - float4 bl2 = featureMap2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); - float4 tc2 = featureMap2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); - float4 mc2 = featureMap2.SampleLevel(sam, pos, 0); - float4 bc2 = featureMap2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); - float4 tr2 = featureMap2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); - float4 mr2 = featureMap2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); - float4 br2 = featureMap2.SampleLevel(sam, pos + inputPt, 0); - - float4 target1 = float4(-0.0031195033807307, -0.0977938771247864, 0.0337169878184795, 0.0840695425868034); - target1 += mul(tl1, float4x4(0.0028950418345630, 0.2153117954730988, -0.1120878234505653, 0.1065240651369095, -0.0902118757367134, 0.2227627933025360, -0.1268638819456100, 0.0378417931497097, 0.0262128096073866, 0.1100647151470184, -0.0224360711872578, -0.2487984448671341, 0.3278627693653107, 0.3930607438087463, -0.3361104130744934, -0.2318559885025024)); - target1 += mul(tl2, float4x4(-0.1388952732086182, -0.0210590325295925, -0.0107318097725511, 0.1340505480766296, -0.2403931617736816, 0.4324082732200623, -0.1229069381952286, -0.1129430904984474, -0.2194076776504517, -0.2529417872428894, 0.2493167072534561, 0.1228863969445229, -0.6289532780647278, 0.2511698901653290, -0.1145481690764427, -0.1931190490722656)); - target1 += mul(ml1, float4x4(0.0802633240818977, -0.2823207676410675, -0.0453533977270126, 0.2149281948804855, 0.2391699999570847, -0.3012505769729614, -0.0672336667776108, 0.1134754717350006, -0.1874532252550125, 0.2426864057779312, 0.0001024203302222, -0.2685940861701965, -0.2393693625926971, -0.0148733090609312, 0.4815890491008759, -0.5666245818138123)); - target1 += mul(ml2, float4x4(0.2353847920894623, 0.7481728792190552, 0.0613395981490612, -0.3136185705661774, -0.6453479528427124, 0.2987860739231110, -0.1935778856277466, -0.4407877624034882, -0.1155721992254257, -0.8143445253372192, -0.1829861551523209, 0.0808847546577454, 0.3689287006855011, -0.1318729221820831, 0.1494798213243484, -0.7250000834465027)); - target1 += mul(bl1, float4x4(-0.2325237691402435, -0.0383906811475754, -0.0762876123189926, 0.0158057715743780, -0.3222318589687347, -0.0946261659264565, -0.1157991588115692, 0.2080847620964050, -0.1521182358264923, 0.4038263857364655, -0.2508496940135956, 0.0620750486850739, 0.1382832378149033, -0.1788915544748306, -0.1054779291152954, -0.1079574525356293)); - target1 += mul(bl2, float4x4(0.0441149584949017, -0.1473216116428375, 0.1350974887609482, -0.2101743519306183, 0.4860914349555969, -0.0438372306525707, 0.1496813595294952, 0.1337997019290924, 0.2939592599868774, -0.2875731289386749, 0.3024089336395264, 0.2730985283851624, 0.3860357403755188, -0.2070714235305786, 0.0471001267433167, 0.0515876151621342)); - target1 += mul(tc1, float4x4(-0.0282726809382439, -0.0808876901865005, -0.1294671446084976, 0.0327957235276699, 0.2005466669797897, 0.0710152760148048, -0.2951858937740326, 0.1284866034984589, -0.3245949447154999, -0.2784474790096283, -0.0651488602161407, 0.2024642229080200, -0.1790685206651688, -0.1523464322090149, 0.0683958381414413, -0.0721997469663620)); - target1 += mul(tc2, float4x4(0.2084605693817139, 0.2224501073360443, 0.2990169227123260, -0.0822417438030243, 0.1661120802164078, 0.2242873460054398, 0.3008987009525299, -0.0589924007654190, 1.0089585781097412, 0.3364263474941254, 0.3114744126796722, -0.4205997586250305, 0.2149223387241364, -0.2686808407306671, 0.6869788169860840, 0.0397010855376720)); - target1 += mul(mc1, float4x4(-0.1719545274972916, 0.2342357635498047, -0.1108281537890434, 0.0051285717636347, -0.5348495244979858, -0.0063809715211391, -0.2947000265121460, 0.0092384787276387, 0.1788431107997894, -0.8757466077804565, -0.0199933666735888, -0.0933040529489517, -1.1017562150955200, -1.1397477388381958, -0.8490890264511108, 2.0844755172729492)); - target1 += mul(mc2, float4x4(-0.7517850399017334, -0.6626257300376892, -1.7181873321533203, 1.3924138545989990, 0.3148886561393738, 1.2373961210250854, 0.8413697481155396, 0.2569177746772766, 0.1905626207590103, -0.8806108832359314, 0.7340399026870728, 1.8838906288146973, -0.1782593727111816, 0.3429502546787262, -0.3488911390304565, -0.6653195619583130)); - target1 += mul(bc1, float4x4(0.1612574905157089, -0.0092199165374041, -0.2294603884220123, 0.2070839852094650, 0.1995067894458771, -0.1586991697549820, -0.1423671096563339, 0.1524601876735687, 0.6368640661239624, -0.1302748024463654, 0.2046667486429214, 0.4024843573570251, 0.3522947132587433, 1.0427794456481934, -0.4195784628391266, -0.7421376705169678)); - target1 += mul(bc2, float4x4(-0.0139375794678926, 0.0099870329722762, 0.1957603991031647, 0.2892707288265228, -0.0361699834465981, 0.5173625946044922, -0.0569337680935860, 0.1873179972171783, -1.0557887554168701, 0.2226557582616806, 0.0604930445551872, 0.3329092264175415, -0.1138753890991211, 0.3199435174465179, 0.0987524166703224, 0.1584812700748444)); - target1 += mul(tr1, float4x4(-0.1148171499371529, 0.0505522675812244, -0.1067250370979309, 0.0587497279047966, -0.3531772792339325, -0.0130594912916422, -0.0051763984374702, 0.0720054879784584, -0.2512235343456268, 0.5235862731933594, 0.1203625276684761, 0.0220968686044216, 0.5066124200820923, -0.2726359069347382, 0.3687904477119446, -0.3189409077167511)); - target1 += mul(tr2, float4x4(0.2119312435388565, -0.0852348133921623, 0.1512662768363953, 0.0316264666616917, 0.2671527862548828, 0.2980401515960693, -0.1022484675049782, -0.1188400015234947, 0.1485718190670013, 0.2684609889984131, 0.1905853003263474, -0.1392537802457809, 0.4225537180900574, 0.0611033178865910, -0.0134558668360114, -0.2009256333112717)); - target1 += mul(mr1, float4x4(-0.0571580827236176, -0.0214836131781340, -0.2749050855636597, 0.0790889635682106, -0.0811165198683739, -0.2081381976604462, -0.3047288656234741, 0.0027117941062897, -0.2009213417768478, -0.7622461318969727, -0.4736055433750153, 0.2779547572135925, 0.4795901477336884, 0.7571166157722473, 1.2372496128082275, -0.7046401500701904)); - target1 += mul(mr2, float4x4(-0.1537595987319946, 0.3831464648246765, -0.1571276187896729, -0.1867597997188568, 0.6104238033294678, 0.0229409243911505, 0.2101978808641434, -0.1350114792585373, 0.4454170167446136, 0.3683053851127625, -0.3370352983474731, -0.3944822549819946, -0.4503754675388336, -0.4261152446269989, 0.5954129695892334, 0.0479046516120434)); - target1 += mul(br1, float4x4(0.2166123390197754, 0.0542660057544708, 0.1477318406105042, 0.1632562726736069, 0.4279211461544037, 0.2100527286529541, -0.0213893372565508, 0.3388189971446991, 0.2154107838869095, -0.2220560610294342, -0.0808312967419624, 0.1158433631062508, -0.1609301865100861, 0.4950682818889618, 0.4356543123722076, -0.0398453846573830)); - target1 += mul(br2, float4x4(0.0525114983320236, 0.1031088456511497, -0.2437869310379028, -0.1797652095556259, -0.2139296382665634, -0.0230520907789469, 0.0188236199319363, 0.2274840623140335, 0.2199348062276840, -0.0977248921990395, 0.0661730542778969, -0.1618098169565201, -0.1215345263481140, -0.2456843554973602, -0.1434712260961533, -0.1226665675640106)); - target1 = max(target1, 0) + float4(-0.9874631166458130, 0.2700935602188110, 1.0154639482498169, 0.7301973104476929) * min(target1, 0); - - float4 target2 = float4(0.0170604288578033, -0.0097856530919671, 0.0882583037018776, 0.0158541873097420); - target2 += mul(tl1, float4x4(0.1967887729406357, -0.0514578297734261, 0.0351365692913532, -0.0027727256529033, 0.0978068783879280, -0.3902593851089478, 0.0123964082449675, -0.1210777312517166, 0.3820665776729584, -0.5003674030303955, 0.0546922460198402, -0.0777103230357170, 0.9743819236755371, -0.3239430189132690, -0.2496883124113083, 0.1733392328023911)); - target2 += mul(tl2, float4x4(-0.1924884468317032, 0.5075340867042542, -0.0542841143906116, 0.0434595011174679, 0.1881206482648849, -0.1774751842021942, -0.1752865165472031, 0.0315622761845589, -0.4268247485160828, 0.3984751403331757, -0.0704891532659531, -0.0969574451446533, 0.1777588576078415, 0.2743588685989380, -0.0317226983606815, -0.1993683725595474)); - target2 += mul(ml1, float4x4(-0.1353080570697784, -0.0161478724330664, 0.0713945776224136, 0.1482806354761124, -0.0025943452492356, -0.0296892002224922, 0.1426411569118500, 0.3263220191001892, 0.3354269266128540, -0.0851829424500465, 0.1781585812568665, -0.0421005003154278, -0.5350970029830933, -1.3111218214035034, 0.2340501397848129, -0.9513134956359863)); - target2 += mul(ml2, float4x4(-0.0965117588639259, -0.3247327506542206, 0.1102668121457100, -0.1604842394590378, 0.4602060914039612, 0.0756718367338181, -0.3309438228607178, -0.3001569509506226, 0.2267884165048599, 0.1377216577529907, -0.0426548905670643, 0.1272846758365631, -0.1080727055668831, -0.4640344679355621, -0.3294694125652313, -0.2043451815843582)); - target2 += mul(bl1, float4x4(0.0720937326550484, -0.1846759617328644, -0.1708657741546631, -0.0725364983081818, 0.1260499358177185, -0.1162428930401802, -0.2501497566699982, -0.1012131050229073, 0.2760527133941650, -0.0939920768141747, 0.2875119149684906, 0.0406376719474792, 0.1008657962083817, -0.1240409687161446, -0.4121425449848175, 0.3269978761672974)); - target2 += mul(bl2, float4x4(0.0125542022287846, 0.2582587003707886, 0.1726561784744263, 0.0180624593049288, 0.0579377673566341, -0.0663961246609688, -0.0078865075483918, -0.0506187379360199, -0.0882500410079956, -0.0282228980213404, -0.1616529822349548, -0.1816279888153076, 0.1482390761375427, -0.3260181546211243, 0.1757252663373947, 0.1394872069358826)); - target2 += mul(tc1, float4x4(-0.0768914818763733, -0.1610976904630661, -0.0581125281751156, 0.1043644994497299, -0.0832794085144997, 0.1854220479726791, -0.0097211552783847, 0.2851990759372711, -0.6227292418479919, -0.2649715840816498, 0.4023403823375702, -0.2051993161439896, 0.4956052303314209, 0.8367735743522644, 0.2145122885704041, 0.3317213356494904)); - target2 += mul(tc2, float4x4(0.9697892665863037, -0.2038540095090866, -0.3166446983814240, -0.0504710040986538, 0.1532189846038818, 0.3050784170627594, -0.1300316900014877, -0.2059933692216873, 0.4894859492778778, -0.3882815837860107, -0.7273328304290771, 0.3463444113731384, -0.8791087865829468, -0.2762917280197144, -0.2789021134376526, -0.1907687485218048)); - target2 += mul(mc1, float4x4(0.1201086342334747, 0.1047629937529564, -0.3030976653099060, 0.0362001918256283, -0.2728919386863708, -0.5267004966735840, 0.1090360283851624, -0.2792145609855652, -0.3083780109882355, 0.2616442143917084, 0.4801669716835022, 0.1518263220787048, -1.6350433826446533, 0.7567611336708069, -1.9801075458526611, 1.1116229295730591)); - target2 += mul(mc2, float4x4(-0.6893532276153564, -0.3531652688980103, -0.1891958266496658, 1.7840391397476196, 1.4943064451217651, 0.7292221188545227, -1.7417374849319458, 0.0550648272037506, -0.1120251268148422, 0.8250336647033691, 1.1355321407318115, -1.1124770641326904, -0.2689424455165863, 0.9269363284111023, 1.9637582302093506, -0.3709079027175903)); - target2 += mul(bc1, float4x4(-0.0544882826507092, -0.1718381494283676, -0.1280352175235748, 0.1454906165599823, -0.1176344379782677, 0.0151336872950196, -0.2567785680294037, 0.2511477768421173, 0.3610197603702545, 0.5843607783317566, 0.0848828330636024, 0.1415835469961166, 0.4969498217105865, 0.5772764682769775, 0.5247990489006042, -0.0598939247429371)); - target2 += mul(bc2, float4x4(0.0529166162014008, -0.1535185724496841, -0.1523050367832184, -0.0501741841435432, -0.0633302107453346, -0.3949260413646698, 0.1972121149301529, -0.2604303061962128, -0.1828030794858932, 0.2246686667203903, -0.6004081368446350, 0.0432657450437546, -0.2086566388607025, 0.2799777686595917, 0.2933793962001801, -0.0253354366868734)); - target2 += mul(tr1, float4x4(-0.0009011612855829, -0.0231834072619677, -0.0538895800709724, -0.0315926298499107, -0.1181581020355225, -0.2161513417959213, -0.2679739892482758, -0.0537310577929020, 0.3116895258426666, -0.1043426766991615, -0.4458046257495880, 0.0201701205223799, -0.8122410774230957, -0.5180496573448181, -0.1277437359094620, -0.1671603769063950)); - target2 += mul(tr2, float4x4(0.2782520055770874, 0.2888738214969635, 0.2265798002481461, 0.0807978361845016, -0.1869603991508484, -0.2283953428268433, -0.4374879896640778, 0.2510242760181427, 0.4857149720191956, -0.1796883046627045, 0.2519723176956177, 0.1950220316648483, -0.0102280136197805, -0.4034306704998016, -0.2963733077049255, 0.4622495174407959)); - target2 += mul(mr1, float4x4(0.0423482730984688, -0.2923308312892914, -0.0082442639395595, 0.2373020350933075, 0.1710588335990906, 0.0392467305064201, 0.0116449045017362, 0.3267012536525726, -0.8832122683525085, 0.7230877280235291, -0.2364437282085419, -0.5563997030258179, 0.2307662665843964, -1.0188170671463013, 0.3680693507194519, -0.8103905916213989)); - target2 += mul(mr2, float4x4(-0.1161488518118858, -0.6775091290473938, -0.0844684988260269, -0.3587656021118164, -0.0190705843269825, -0.5557464957237244, 0.3721883893013000, 0.1322396695613861, -0.1157554760575294, -0.1545359939336777, 0.4236145615577698, -0.9936751127243042, -0.0805041715502739, -0.2628504335880280, 0.1407603323459625, -0.4183281958103180)); - target2 += mul(br1, float4x4(-0.0447837486863136, 0.0488513521850109, -0.1358503550291061, 0.0202376656234264, -0.0258089359849691, -0.2975459396839142, -0.1580457836389542, 0.1025377139449120, -0.0301857776939869, -0.1071514338254929, -0.0150549048557878, 0.0794499814510345, 0.4558653235435486, 0.8186704516410828, 0.2287982851266861, -0.1438317447900772)); - target2 += mul(br2, float4x4(0.1536326855421066, 0.2366072386503220, 0.0591898001730442, -0.0547822229564190, 0.1068296432495117, -0.0342746265232563, -0.1388098448514938, 0.3756637275218964, 0.2406303733587265, 0.1881252676248550, -0.1518276780843735, 0.1872117221355438, 0.1484777033329010, 0.1825126409530640, -0.1781855672597885, 0.1650572419166565)); - target2 = max(target2, 0) + float4(-0.5060276985168457, -1.3971502780914307, 0.7606850862503052, -0.5118398666381836) * min(target2, 0); + MF4 tl1 = featureMap1.SampleLevel(sam, pos - inputPt, 0); + MF4 ml1 = featureMap1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + MF4 bl1 = featureMap1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + MF4 tc1 = featureMap1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + MF4 mc1 = featureMap1.SampleLevel(sam, pos, 0); + MF4 bc1 = featureMap1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + MF4 tr1 = featureMap1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + MF4 mr1 = featureMap1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + MF4 br1 = featureMap1.SampleLevel(sam, pos + inputPt, 0); + + MF4 tl2 = featureMap2.SampleLevel(sam, pos - inputPt, 0); + MF4 ml2 = featureMap2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + MF4 bl2 = featureMap2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + MF4 tc2 = featureMap2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + MF4 mc2 = featureMap2.SampleLevel(sam, pos, 0); + MF4 bc2 = featureMap2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + MF4 tr2 = featureMap2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + MF4 mr2 = featureMap2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + MF4 br2 = featureMap2.SampleLevel(sam, pos + inputPt, 0); + + MF4 target1 = MF4(-0.0031195033807307, -0.0977938771247864, 0.0337169878184795, 0.0840695425868034); + target1 = MulAdd(tl1, MF4x4(0.0028950418345630, 0.2153117954730988, -0.1120878234505653, 0.1065240651369095, -0.0902118757367134, 0.2227627933025360, -0.1268638819456100, 0.0378417931497097, 0.0262128096073866, 0.1100647151470184, -0.0224360711872578, -0.2487984448671341, 0.3278627693653107, 0.3930607438087463, -0.3361104130744934, -0.2318559885025024), target1); + target1 = MulAdd(tl2, MF4x4(-0.1388952732086182, -0.0210590325295925, -0.0107318097725511, 0.1340505480766296, -0.2403931617736816, 0.4324082732200623, -0.1229069381952286, -0.1129430904984474, -0.2194076776504517, -0.2529417872428894, 0.2493167072534561, 0.1228863969445229, -0.6289532780647278, 0.2511698901653290, -0.1145481690764427, -0.1931190490722656), target1); + target1 = MulAdd(ml1, MF4x4(0.0802633240818977, -0.2823207676410675, -0.0453533977270126, 0.2149281948804855, 0.2391699999570847, -0.3012505769729614, -0.0672336667776108, 0.1134754717350006, -0.1874532252550125, 0.2426864057779312, 0.0001024203302222, -0.2685940861701965, -0.2393693625926971, -0.0148733090609312, 0.4815890491008759, -0.5666245818138123), target1); + target1 = MulAdd(ml2, MF4x4(0.2353847920894623, 0.7481728792190552, 0.0613395981490612, -0.3136185705661774, -0.6453479528427124, 0.2987860739231110, -0.1935778856277466, -0.4407877624034882, -0.1155721992254257, -0.8143445253372192, -0.1829861551523209, 0.0808847546577454, 0.3689287006855011, -0.1318729221820831, 0.1494798213243484, -0.7250000834465027), target1); + target1 = MulAdd(bl1, MF4x4(-0.2325237691402435, -0.0383906811475754, -0.0762876123189926, 0.0158057715743780, -0.3222318589687347, -0.0946261659264565, -0.1157991588115692, 0.2080847620964050, -0.1521182358264923, 0.4038263857364655, -0.2508496940135956, 0.0620750486850739, 0.1382832378149033, -0.1788915544748306, -0.1054779291152954, -0.1079574525356293), target1); + target1 = MulAdd(bl2, MF4x4(0.0441149584949017, -0.1473216116428375, 0.1350974887609482, -0.2101743519306183, 0.4860914349555969, -0.0438372306525707, 0.1496813595294952, 0.1337997019290924, 0.2939592599868774, -0.2875731289386749, 0.3024089336395264, 0.2730985283851624, 0.3860357403755188, -0.2070714235305786, 0.0471001267433167, 0.0515876151621342), target1); + target1 = MulAdd(tc1, MF4x4(-0.0282726809382439, -0.0808876901865005, -0.1294671446084976, 0.0327957235276699, 0.2005466669797897, 0.0710152760148048, -0.2951858937740326, 0.1284866034984589, -0.3245949447154999, -0.2784474790096283, -0.0651488602161407, 0.2024642229080200, -0.1790685206651688, -0.1523464322090149, 0.0683958381414413, -0.0721997469663620), target1); + target1 = MulAdd(tc2, MF4x4(0.2084605693817139, 0.2224501073360443, 0.2990169227123260, -0.0822417438030243, 0.1661120802164078, 0.2242873460054398, 0.3008987009525299, -0.0589924007654190, 1.0089585781097412, 0.3364263474941254, 0.3114744126796722, -0.4205997586250305, 0.2149223387241364, -0.2686808407306671, 0.6869788169860840, 0.0397010855376720), target1); + target1 = MulAdd(mc1, MF4x4(-0.1719545274972916, 0.2342357635498047, -0.1108281537890434, 0.0051285717636347, -0.5348495244979858, -0.0063809715211391, -0.2947000265121460, 0.0092384787276387, 0.1788431107997894, -0.8757466077804565, -0.0199933666735888, -0.0933040529489517, -1.1017562150955200, -1.1397477388381958, -0.8490890264511108, 2.0844755172729492), target1); + target1 = MulAdd(mc2, MF4x4(-0.7517850399017334, -0.6626257300376892, -1.7181873321533203, 1.3924138545989990, 0.3148886561393738, 1.2373961210250854, 0.8413697481155396, 0.2569177746772766, 0.1905626207590103, -0.8806108832359314, 0.7340399026870728, 1.8838906288146973, -0.1782593727111816, 0.3429502546787262, -0.3488911390304565, -0.6653195619583130), target1); + target1 = MulAdd(bc1, MF4x4(0.1612574905157089, -0.0092199165374041, -0.2294603884220123, 0.2070839852094650, 0.1995067894458771, -0.1586991697549820, -0.1423671096563339, 0.1524601876735687, 0.6368640661239624, -0.1302748024463654, 0.2046667486429214, 0.4024843573570251, 0.3522947132587433, 1.0427794456481934, -0.4195784628391266, -0.7421376705169678), target1); + target1 = MulAdd(bc2, MF4x4(-0.0139375794678926, 0.0099870329722762, 0.1957603991031647, 0.2892707288265228, -0.0361699834465981, 0.5173625946044922, -0.0569337680935860, 0.1873179972171783, -1.0557887554168701, 0.2226557582616806, 0.0604930445551872, 0.3329092264175415, -0.1138753890991211, 0.3199435174465179, 0.0987524166703224, 0.1584812700748444), target1); + target1 = MulAdd(tr1, MF4x4(-0.1148171499371529, 0.0505522675812244, -0.1067250370979309, 0.0587497279047966, -0.3531772792339325, -0.0130594912916422, -0.0051763984374702, 0.0720054879784584, -0.2512235343456268, 0.5235862731933594, 0.1203625276684761, 0.0220968686044216, 0.5066124200820923, -0.2726359069347382, 0.3687904477119446, -0.3189409077167511), target1); + target1 = MulAdd(tr2, MF4x4(0.2119312435388565, -0.0852348133921623, 0.1512662768363953, 0.0316264666616917, 0.2671527862548828, 0.2980401515960693, -0.1022484675049782, -0.1188400015234947, 0.1485718190670013, 0.2684609889984131, 0.1905853003263474, -0.1392537802457809, 0.4225537180900574, 0.0611033178865910, -0.0134558668360114, -0.2009256333112717), target1); + target1 = MulAdd(mr1, MF4x4(-0.0571580827236176, -0.0214836131781340, -0.2749050855636597, 0.0790889635682106, -0.0811165198683739, -0.2081381976604462, -0.3047288656234741, 0.0027117941062897, -0.2009213417768478, -0.7622461318969727, -0.4736055433750153, 0.2779547572135925, 0.4795901477336884, 0.7571166157722473, 1.2372496128082275, -0.7046401500701904), target1); + target1 = MulAdd(mr2, MF4x4(-0.1537595987319946, 0.3831464648246765, -0.1571276187896729, -0.1867597997188568, 0.6104238033294678, 0.0229409243911505, 0.2101978808641434, -0.1350114792585373, 0.4454170167446136, 0.3683053851127625, -0.3370352983474731, -0.3944822549819946, -0.4503754675388336, -0.4261152446269989, 0.5954129695892334, 0.0479046516120434), target1); + target1 = MulAdd(br1, MF4x4(0.2166123390197754, 0.0542660057544708, 0.1477318406105042, 0.1632562726736069, 0.4279211461544037, 0.2100527286529541, -0.0213893372565508, 0.3388189971446991, 0.2154107838869095, -0.2220560610294342, -0.0808312967419624, 0.1158433631062508, -0.1609301865100861, 0.4950682818889618, 0.4356543123722076, -0.0398453846573830), target1); + target1 = MulAdd(br2, MF4x4(0.0525114983320236, 0.1031088456511497, -0.2437869310379028, -0.1797652095556259, -0.2139296382665634, -0.0230520907789469, 0.0188236199319363, 0.2274840623140335, 0.2199348062276840, -0.0977248921990395, 0.0661730542778969, -0.1618098169565201, -0.1215345263481140, -0.2456843554973602, -0.1434712260961533, -0.1226665675640106), target1); + target1 = max(target1, 0) + MF4(-0.9874631166458130, 0.2700935602188110, 1.0154639482498169, 0.7301973104476929) * min(target1, 0); + + MF4 target2 = MF4(0.0170604288578033, -0.0097856530919671, 0.0882583037018776, 0.0158541873097420); + target2 = MulAdd(tl1, MF4x4(0.1967887729406357, -0.0514578297734261, 0.0351365692913532, -0.0027727256529033, 0.0978068783879280, -0.3902593851089478, 0.0123964082449675, -0.1210777312517166, 0.3820665776729584, -0.5003674030303955, 0.0546922460198402, -0.0777103230357170, 0.9743819236755371, -0.3239430189132690, -0.2496883124113083, 0.1733392328023911), target2); + target2 = MulAdd(tl2, MF4x4(-0.1924884468317032, 0.5075340867042542, -0.0542841143906116, 0.0434595011174679, 0.1881206482648849, -0.1774751842021942, -0.1752865165472031, 0.0315622761845589, -0.4268247485160828, 0.3984751403331757, -0.0704891532659531, -0.0969574451446533, 0.1777588576078415, 0.2743588685989380, -0.0317226983606815, -0.1993683725595474), target2); + target2 = MulAdd(ml1, MF4x4(-0.1353080570697784, -0.0161478724330664, 0.0713945776224136, 0.1482806354761124, -0.0025943452492356, -0.0296892002224922, 0.1426411569118500, 0.3263220191001892, 0.3354269266128540, -0.0851829424500465, 0.1781585812568665, -0.0421005003154278, -0.5350970029830933, -1.3111218214035034, 0.2340501397848129, -0.9513134956359863), target2); + target2 = MulAdd(ml2, MF4x4(-0.0965117588639259, -0.3247327506542206, 0.1102668121457100, -0.1604842394590378, 0.4602060914039612, 0.0756718367338181, -0.3309438228607178, -0.3001569509506226, 0.2267884165048599, 0.1377216577529907, -0.0426548905670643, 0.1272846758365631, -0.1080727055668831, -0.4640344679355621, -0.3294694125652313, -0.2043451815843582), target2); + target2 = MulAdd(bl1, MF4x4(0.0720937326550484, -0.1846759617328644, -0.1708657741546631, -0.0725364983081818, 0.1260499358177185, -0.1162428930401802, -0.2501497566699982, -0.1012131050229073, 0.2760527133941650, -0.0939920768141747, 0.2875119149684906, 0.0406376719474792, 0.1008657962083817, -0.1240409687161446, -0.4121425449848175, 0.3269978761672974), target2); + target2 = MulAdd(bl2, MF4x4(0.0125542022287846, 0.2582587003707886, 0.1726561784744263, 0.0180624593049288, 0.0579377673566341, -0.0663961246609688, -0.0078865075483918, -0.0506187379360199, -0.0882500410079956, -0.0282228980213404, -0.1616529822349548, -0.1816279888153076, 0.1482390761375427, -0.3260181546211243, 0.1757252663373947, 0.1394872069358826), target2); + target2 = MulAdd(tc1, MF4x4(-0.0768914818763733, -0.1610976904630661, -0.0581125281751156, 0.1043644994497299, -0.0832794085144997, 0.1854220479726791, -0.0097211552783847, 0.2851990759372711, -0.6227292418479919, -0.2649715840816498, 0.4023403823375702, -0.2051993161439896, 0.4956052303314209, 0.8367735743522644, 0.2145122885704041, 0.3317213356494904), target2); + target2 = MulAdd(tc2, MF4x4(0.9697892665863037, -0.2038540095090866, -0.3166446983814240, -0.0504710040986538, 0.1532189846038818, 0.3050784170627594, -0.1300316900014877, -0.2059933692216873, 0.4894859492778778, -0.3882815837860107, -0.7273328304290771, 0.3463444113731384, -0.8791087865829468, -0.2762917280197144, -0.2789021134376526, -0.1907687485218048), target2); + target2 = MulAdd(mc1, MF4x4(0.1201086342334747, 0.1047629937529564, -0.3030976653099060, 0.0362001918256283, -0.2728919386863708, -0.5267004966735840, 0.1090360283851624, -0.2792145609855652, -0.3083780109882355, 0.2616442143917084, 0.4801669716835022, 0.1518263220787048, -1.6350433826446533, 0.7567611336708069, -1.9801075458526611, 1.1116229295730591), target2); + target2 = MulAdd(mc2, MF4x4(-0.6893532276153564, -0.3531652688980103, -0.1891958266496658, 1.7840391397476196, 1.4943064451217651, 0.7292221188545227, -1.7417374849319458, 0.0550648272037506, -0.1120251268148422, 0.8250336647033691, 1.1355321407318115, -1.1124770641326904, -0.2689424455165863, 0.9269363284111023, 1.9637582302093506, -0.3709079027175903), target2); + target2 = MulAdd(bc1, MF4x4(-0.0544882826507092, -0.1718381494283676, -0.1280352175235748, 0.1454906165599823, -0.1176344379782677, 0.0151336872950196, -0.2567785680294037, 0.2511477768421173, 0.3610197603702545, 0.5843607783317566, 0.0848828330636024, 0.1415835469961166, 0.4969498217105865, 0.5772764682769775, 0.5247990489006042, -0.0598939247429371), target2); + target2 = MulAdd(bc2, MF4x4(0.0529166162014008, -0.1535185724496841, -0.1523050367832184, -0.0501741841435432, -0.0633302107453346, -0.3949260413646698, 0.1972121149301529, -0.2604303061962128, -0.1828030794858932, 0.2246686667203903, -0.6004081368446350, 0.0432657450437546, -0.2086566388607025, 0.2799777686595917, 0.2933793962001801, -0.0253354366868734), target2); + target2 = MulAdd(tr1, MF4x4(-0.0009011612855829, -0.0231834072619677, -0.0538895800709724, -0.0315926298499107, -0.1181581020355225, -0.2161513417959213, -0.2679739892482758, -0.0537310577929020, 0.3116895258426666, -0.1043426766991615, -0.4458046257495880, 0.0201701205223799, -0.8122410774230957, -0.5180496573448181, -0.1277437359094620, -0.1671603769063950), target2); + target2 = MulAdd(tr2, MF4x4(0.2782520055770874, 0.2888738214969635, 0.2265798002481461, 0.0807978361845016, -0.1869603991508484, -0.2283953428268433, -0.4374879896640778, 0.2510242760181427, 0.4857149720191956, -0.1796883046627045, 0.2519723176956177, 0.1950220316648483, -0.0102280136197805, -0.4034306704998016, -0.2963733077049255, 0.4622495174407959), target2); + target2 = MulAdd(mr1, MF4x4(0.0423482730984688, -0.2923308312892914, -0.0082442639395595, 0.2373020350933075, 0.1710588335990906, 0.0392467305064201, 0.0116449045017362, 0.3267012536525726, -0.8832122683525085, 0.7230877280235291, -0.2364437282085419, -0.5563997030258179, 0.2307662665843964, -1.0188170671463013, 0.3680693507194519, -0.8103905916213989), target2); + target2 = MulAdd(mr2, MF4x4(-0.1161488518118858, -0.6775091290473938, -0.0844684988260269, -0.3587656021118164, -0.0190705843269825, -0.5557464957237244, 0.3721883893013000, 0.1322396695613861, -0.1157554760575294, -0.1545359939336777, 0.4236145615577698, -0.9936751127243042, -0.0805041715502739, -0.2628504335880280, 0.1407603323459625, -0.4183281958103180), target2); + target2 = MulAdd(br1, MF4x4(-0.0447837486863136, 0.0488513521850109, -0.1358503550291061, 0.0202376656234264, -0.0258089359849691, -0.2975459396839142, -0.1580457836389542, 0.1025377139449120, -0.0301857776939869, -0.1071514338254929, -0.0150549048557878, 0.0794499814510345, 0.4558653235435486, 0.8186704516410828, 0.2287982851266861, -0.1438317447900772), target2); + target2 = MulAdd(br2, MF4x4(0.1536326855421066, 0.2366072386503220, 0.0591898001730442, -0.0547822229564190, 0.1068296432495117, -0.0342746265232563, -0.1388098448514938, 0.3756637275218964, 0.2406303733587265, 0.1881252676248550, -0.1518276780843735, 0.1872117221355438, 0.1484777033329010, 0.1825126409530640, -0.1781855672597885, 0.1650572419166565), target2); + target2 = max(target2, 0) + MF4(-0.5060276985168457, -1.3971502780914307, 0.7606850862503052, -0.5118398666381836) * min(target2, 0); tex1[gxy] = target1; tex2[gxy] = target2; @@ -278,67 +281,67 @@ void Pass3(uint2 blockStart, uint3 threadId) { // [tl, tc, tr] // [ml, mc, mr] // [bl, bc, br] - float4 tl1 = tex1.SampleLevel(sam, pos - inputPt, 0); - float4 ml1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); - float4 bl1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); - float4 tc1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); - float4 mc1 = tex1.SampleLevel(sam, pos, 0); - float4 bc1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); - float4 tr1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); - float4 mr1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); - float4 br1 = tex1.SampleLevel(sam, pos + inputPt, 0); - - float4 tl2 = tex2.SampleLevel(sam, pos - inputPt, 0); - float4 ml2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); - float4 bl2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); - float4 tc2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); - float4 mc2 = tex2.SampleLevel(sam, pos, 0); - float4 bc2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); - float4 tr2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); - float4 mr2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); - float4 br2 = tex2.SampleLevel(sam, pos + inputPt, 0); - - float4 target1 = float4(0.2456959486007690, 0.1773831695318222, -0.0800321474671364, -0.1356369554996490); - target1 += mul(tl1, float4x4(0.0867862403392792, -0.0188770499080420, -0.1502736657857895, -0.1099725291132927, -0.3013791441917419, 0.0430033504962921, 0.0345034115016460, -0.0400283746421337, 0.0455053038895130, -0.0785564482212067, -0.1695308536291122, 0.0467130616307259, -0.0208115540444851, 0.0026763146743178, -0.1338489353656769, -0.1844547539949417)); - target1 += mul(tl2, float4x4(0.1620235443115234, 0.0102646192535758, -0.0163768343627453, 0.0822434723377228, 0.1546859890222549, 0.0250307265669107, 0.0373145006597042, -0.0117816952988505, 0.0517709590494633, -0.0252467226237059, -0.1536794751882553, -0.0202652048319578, -0.3231309056282043, 0.1350613087415695, 0.1932685226202011, 0.1793868690729141)); - target1 += mul(ml1, float4x4(-0.5794479846954346, 0.1861644536256790, 0.1153499931097031, 0.1069827228784561, 0.4245908558368683, 0.1373304873704910, -0.1780052036046982, 0.0466761402785778, -1.1490619182586670, 0.8257195353507996, 0.0008257642621174, 0.0699498802423477, 0.3857855796813965, 0.1599738448858261, -0.0160159282386303, -0.1072350814938545)); - target1 += mul(ml2, float4x4(-0.0448461174964905, -0.1027067080140114, 0.1546361148357391, -0.1321994811296463, 0.3319362998008728, 0.0670638754963875, -0.0980701223015785, -0.1242648735642433, 0.0487120002508163, 0.1034812107682228, -0.3318608701229095, -0.0663819089531898, -0.7049940824508667, -0.2681597173213959, 0.5434955358505249, 0.3798713982105255)); - target1 += mul(bl1, float4x4(-0.1650677323341370, -0.1774029582738876, -0.0693891644477844, 0.0644233599305153, 0.0096654882654548, 0.0590313524007797, -0.0626199543476105, -0.1101114749908447, -0.0919653624296188, -0.2198607474565506, -0.3686812222003937, -0.0030939118005335, -0.0286871381103992, -0.0267137177288532, -0.2114386558532715, -0.1006813868880272)); - target1 += mul(bl2, float4x4(-0.1621828079223633, -0.3327856957912445, -0.3447196483612061, -0.0941574051976204, 0.1980617940425873, -0.0039776018820703, 0.0631400719285011, -0.0371704883873463, 0.0801121518015862, -0.2228745818138123, -0.1361533999443054, -0.0061448244377971, -0.2867666780948639, -0.0285903755575418, -0.2431204020977020, 0.0123175233602524)); - target1 += mul(tc1, float4x4(-0.0213332772254944, 0.0521896183490753, 0.1267389953136444, 0.0592065081000328, 0.2370698899030685, -0.0788677260279655, -0.0918647274374962, 0.0576282069087029, -0.0021516692359000, -0.1609319597482681, 0.1739181131124496, 0.4105915725231171, -0.0331462696194649, 0.0667985677719116, 0.0239557847380638, 0.2053552418947220)); - target1 += mul(tc2, float4x4(0.0452669933438301, -0.0624216794967651, -0.2210896909236908, -0.2319414317607880, 0.0553048253059387, -0.0195244718343019, -0.0948461145162582, -0.1411011815071106, 0.1357870846986771, -0.0044978843070567, 0.0117386765778065, 0.2855076789855957, 0.0721756964921951, 0.0725790113210678, 0.0879172906279564, 0.2261598110198975)); - target1 += mul(mc1, float4x4(0.1625189036130905, 0.2556113302707672, 0.0096751591190696, 0.4720825850963593, 0.1724947541952133, 0.7794855833053589, -0.5798769593238831, -0.5527915954589844, -0.2930226027965546, -0.1513507068157196, -0.1671935617923737, 0.1259696036577225, -1.5636392831802368, -0.6248261332511902, -0.7778694629669189, 0.7258287668228149)); - target1 += mul(mc2, float4x4(-0.2203702777624130, -0.2413295805454254, 0.5873484611511230, 0.8019542694091797, 0.2896324992179871, -0.0073753874748945, -0.4705016911029816, -0.4037020802497864, -0.5916352272033691, 0.8720123767852783, 1.4509203433990479, -0.4347604215145111, 0.1296572685241699, -0.0382503382861614, 1.0146147012710571, 0.6662492156028748)); - target1 += mul(bc1, float4x4(0.1450282633304596, 0.1838535815477371, -0.2957614958286285, -0.1175402477383614, 0.0455737337470055, -0.1042275950312614, 0.2409446090459824, 0.2161487638950348, 0.2523523867130280, -0.1657318323850632, 0.3264083266258240, 0.0015554791316390, 0.0756938308477402, 0.6486120820045471, 0.2910028994083405, 0.0061267162673175)); - target1 += mul(bc2, float4x4(0.1902535557746887, -1.9667011499404907, 0.5702443718910217, 0.1346294432878494, -0.1391871571540833, 0.0248745214194059, -0.0059022656641901, -0.1015660688281059, -0.0244528464972973, 0.6021597385406494, 0.1408251225948334, 0.1453502923250198, 0.1481679081916809, -0.0782008916139603, 0.1340244859457016, 0.2094520628452301)); - target1 += mul(tr1, float4x4(0.0108401505276561, 0.0073569868691266, 0.0448315776884556, 0.0920599550008774, -0.0208782758563757, -0.0072226687334478, 0.1590231209993362, 0.0974729061126709, -0.0504082255065441, -0.1492372304201126, -0.0240942239761353, -0.3387995064258575, -0.0284501910209656, -0.0475567393004894, -0.1351375281810760, -0.0968868359923363)); - target1 += mul(tr2, float4x4(-0.0175144840031862, 0.0215768050402403, -0.2070538252592087, -0.1020313948392868, -0.0632536634802818, 0.0187655575573444, 0.0033090459182858, 0.0483726076781750, 0.0874270573258400, 0.0391933582723141, -0.0733725428581238, 0.0455813333392143, 0.0519542098045349, -0.0167136136442423, 0.0001770213857526, -0.0226714108139277)); - target1 += mul(mr1, float4x4(0.0230981707572937, 0.0211336743086576, -0.0202524177730083, 0.0004777485737577, -0.3133100867271423, -0.2222708314657211, -0.3225338459014893, 0.0252504348754883, -0.1565012782812119, -0.1223759651184082, -0.1677924543619156, 0.1436173915863037, -0.1002913638949394, -0.4352810978889465, -0.1214068830013275, 0.1200122535228729)); - target1 += mul(mr2, float4x4(0.2746300697326660, 0.0240563396364450, 0.2214205712080002, -0.0140676703304052, -0.1697816103696823, 0.0239461977034807, -0.2184012532234192, -0.1122284159064293, -0.0025032388512045, -0.1982196122407913, -0.0088773546740413, -0.0592936985194683, 0.0981788560748100, 0.0590783730149269, 0.1699221283197403, 0.1146017014980316)); - target1 += mul(br1, float4x4(-0.1190557554364204, 0.0139884017407894, -0.3765408396720886, -0.1967576593160629, -0.0013050300767645, 0.0838785469532013, 0.0467342510819435, 0.0197970345616341, 0.0199079178273678, 0.1127095147967339, -0.0382974669337273, -0.0808331072330475, 0.0045804185792804, 0.1423084437847137, 0.0275978501886129, 0.0051016276702285)); - target1 += mul(br2, float4x4(0.0694821104407310, -0.1185832619667053, 0.1340767890214920, -0.0096760904416442, -0.0057105780579150, -0.0358094684779644, -0.0208928529173136, -0.0422658622264862, -0.1662766784429550, 0.0397685728967190, -0.0169682707637548, 0.1427496373653412, 0.1324639916419983, 0.0579542480409145, 0.1712465286254883, 0.1062873229384422)); - target1 = max(target1, 0) + float4(0.0476732961833477, -0.0824369415640831, 1.4746414422988892, 1.6789640188217163) * min(target1, 0); - - float4 target2 = float4(-0.0375947281718254, 0.2783663868904114, 0.0855874642729759, -0.0183580406010151); - target2 += mul(tl1, float4x4(-0.3375159502029419, -0.0481248162686825, 0.0022695809602737, -0.1379150450229645, 0.2087368816137314, -0.1413425505161285, 0.0311671234667301, 0.2090687304735184, -0.1255441159009933, -0.3856352567672729, 0.0592494457960129, -0.2192105948925018, 0.0635740235447884, -0.0259831510484219, 0.1284605711698532, 0.1543060839176178)); - target2 += mul(tl2, float4x4(0.0265662875026464, 0.1603409945964813, -0.0106395082548261, 0.0252655427902937, 0.0633112043142319, 0.1634869277477264, 0.0606260225176811, -0.0386067330837250, 0.1025275588035583, -0.0086877709254622, 0.0572752207517624, 0.2958410382270813, 0.2315495908260345, -0.0511345490813255, -0.0684579163789749, 0.2366850525140762)); - target2 += mul(ml1, float4x4(-0.6637977361679077, 0.1115299314260483, 0.0334465689957142, -0.0595322623848915, 0.0194256473332644, 0.1154914125800133, -0.0093330284580588, -0.2107555270195007, 0.2593949139118195, -0.2310725152492523, -0.0191440880298615, 0.0831847414374352, 0.0869263112545013, 0.1271044909954071, -0.0199039578437805, 0.0421413294970989)); - target2 += mul(ml2, float4x4(0.1171221211552620, -0.2125719487667084, -0.0189515724778175, 0.2465390264987946, 0.1773879528045654, 0.2518055438995361, 0.0552976131439209, -0.1894477456808090, 0.1769066900014877, -0.1464872211217880, -0.0573948174715042, -0.4012156426906586, 0.2111275196075439, -0.5377770662307739, -0.2866773009300232, 0.1336809694766998)); - target2 += mul(bl1, float4x4(-0.6472494006156921, -0.0555078461766243, 0.0564644038677216, 0.0711399838328362, -0.0228650532662868, -0.0755083113908768, 0.0132119813933969, 0.1565485745668411, 0.0769101306796074, -0.4400988519191742, -0.0369989611208439, -0.0459617786109447, 0.1246264874935150, -0.2121030986309052, 0.0351070538163185, 0.1162980273365974)); - target2 += mul(bl2, float4x4(-0.0239488855004311, -0.4389697015285492, -0.0041466108523309, 0.2026203870773315, 0.0299914367496967, 0.0214463528245687, -0.0340079553425312, -0.0866646468639374, -0.1258078664541245, 0.0335666500031948, 0.0279387012124062, 0.0377361401915550, -0.0037173877935857, -0.1970001310110092, 0.0554011650383472, 0.0747631862759590)); - target2 += mul(tc1, float4x4(-0.5669959783554077, -0.0150139974430203, -0.0079386057332158, -0.1156958788633347, -0.0749717876315117, 0.1512815952301025, -0.0340143367648125, -0.1504366695880890, -0.1540268361568451, -0.0089722918346524, -0.0974140912294388, -0.4191842377185822, 0.0414282791316509, -0.0518460534512997, -0.1025082096457481, -0.1974052190780640)); - target2 += mul(tc2, float4x4(0.1328157931566238, -0.0447603911161423, 0.0625142455101013, 0.1125901266932487, 0.1033857688307762, 0.0811788439750671, 0.1270843595266342, -0.0564684942364693, 0.0797754079103470, 0.1379490494728088, -0.2863929569721222, -0.0602805763483047, 0.2699469923973083, 0.1226278319954872, -0.2505964636802673, 0.0639543756842613)); - target2 += mul(mc1, float4x4(-1.5813068151473999, 0.5872991085052490, -0.2429279834032059, -0.4303708970546722, 0.2854560911655426, 1.0167927742004395, 0.8617131114006042, 0.2191447615623474, 0.9627910852432251, 0.7867327332496643, 1.2628984451293945, 0.8908280134201050, -0.4586973786354065, -0.7981753349304199, 0.4780183732509613, -0.9264264106750488)); - target2 += mul(mc2, float4x4(0.2435170710086823, -0.0829131007194519, -0.3455205559730530, 0.4117922484874725, 0.2749316394329071, 0.1895177811384201, 0.4110289216041565, -0.1298204958438873, 0.1637304723262787, 0.8604004383087158, 1.0940867662429810, -0.3959148228168488, 0.3289682567119598, -0.0633709058165550, -2.0705056190490723, 0.1684481352567673)); - target2 += mul(bc1, float4x4(-0.8055392503738403, 0.2874773740768433, -0.1400482803583145, -0.1834644526243210, 0.0150187248364091, 0.0192099008709192, -0.0783268958330154, -0.2944276928901672, 0.0451190918684006, 0.1181604787707329, 0.1095703318715096, -0.2282790690660477, 0.1960140317678452, 0.3371279239654541, 0.0243086088448763, -0.0463834926486015)); - target2 += mul(bc2, float4x4(0.2196981906890869, -0.0534196794033051, -0.0839012116193771, 0.2049407809972763, 0.0194450635462999, -0.0593264624476433, 0.1640597432851791, 0.0274629276245832, -0.1243807971477509, 0.0611803941428661, -0.1799024045467377, -0.1864561140537262, 0.2465235143899918, -0.0211831126362085, -0.2282803803682327, -0.1430586874485016)); - target2 += mul(tr1, float4x4(-0.3611976802349091, 0.0288475938141346, -0.0297703798860312, -0.0418547466397285, -0.3251218497753143, -0.0134126413613558, -0.0686949566006660, -0.0233805924654007, -0.2749838531017303, -0.2486374378204346, 0.0724888965487480, 0.1193816959857941, -0.2721751034259796, -0.2033173292875290, 0.0248280912637711, 0.0589503161609173)); - target2 += mul(tr2, float4x4(0.1689156740903854, 0.0712056383490562, 0.1930764019489288, 0.0722641199827194, 0.0640723854303360, 0.0566449724137783, 0.0815568938851357, -0.0213705692440271, -0.1826065927743912, 0.0393006950616837, -0.1493768393993378, 0.0386883616447449, -0.0130320172756910, -0.0327960774302483, -0.0204591657966375, 0.0134796360507607)); - target2 += mul(mr1, float4x4(-0.5736998319625854, -0.0392777882516384, 0.1370634734630585, -0.0484432727098465, 0.1308025121688843, -0.2323654592037201, -0.2625242173671722, -0.2956316471099854, -0.1103305667638779, -0.0551420338451862, 0.0006514643318951, 0.0022458140738308, 0.2859890162944794, -0.0839410424232483, 0.5223253369331360, 0.0280438754707575)); - target2 += mul(mr2, float4x4(0.0556896403431892, 0.0735942423343658, -0.2387326955795288, 0.1338670998811722, 0.0996377170085907, 0.0365633517503738, 0.3044275343418121, -0.0164738632738590, 0.1139278411865234, 0.1249758303165436, 0.2395293861627579, -0.0708516016602516, 0.1228865459561348, 0.0634353235363960, -0.3463226258754730, -0.0362484715878963)); - target2 += mul(br1, float4x4(-0.4082182049751282, -0.1144043654203415, 0.0233679264783859, 0.0130491442978382, 0.0237790253013372, 0.0709472149610519, 0.1275831013917923, -0.0888639837503433, -0.0140889342874289, -0.1301848441362381, -0.1709596514701843, 0.0314525589346886, 0.0293366052210331, 0.0934117212891579, 0.0720594301819801, 0.0094668027013540)); - target2 += mul(br2, float4x4(0.1782542318105698, -0.1532294601202011, -0.2839424610137939, 0.0435897931456566, 0.0621095262467861, -0.0348550342023373, 0.0461588650941849, 0.0183234252035618, 0.2204841077327728, 0.1267120093107224, 0.1979495882987976, -0.2149147540330887, 0.2110942006111145, 0.0718472301959991, -0.1063910648226738, -0.0493422709405422)); - target2 = max(target2, 0) + float4(-0.0162308197468519, 0.4942881166934967, 0.1156802847981453, 1.4069133996963501) * min(target2, 0); + MF4 tl1 = tex1.SampleLevel(sam, pos - inputPt, 0); + MF4 ml1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + MF4 bl1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + MF4 tc1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + MF4 mc1 = tex1.SampleLevel(sam, pos, 0); + MF4 bc1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + MF4 tr1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + MF4 mr1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + MF4 br1 = tex1.SampleLevel(sam, pos + inputPt, 0); + + MF4 tl2 = tex2.SampleLevel(sam, pos - inputPt, 0); + MF4 ml2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + MF4 bl2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + MF4 tc2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + MF4 mc2 = tex2.SampleLevel(sam, pos, 0); + MF4 bc2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + MF4 tr2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + MF4 mr2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + MF4 br2 = tex2.SampleLevel(sam, pos + inputPt, 0); + + MF4 target1 = MF4(0.2456959486007690, 0.1773831695318222, -0.0800321474671364, -0.1356369554996490); + target1 = MulAdd(tl1, MF4x4(0.0867862403392792, -0.0188770499080420, -0.1502736657857895, -0.1099725291132927, -0.3013791441917419, 0.0430033504962921, 0.0345034115016460, -0.0400283746421337, 0.0455053038895130, -0.0785564482212067, -0.1695308536291122, 0.0467130616307259, -0.0208115540444851, 0.0026763146743178, -0.1338489353656769, -0.1844547539949417), target1); + target1 = MulAdd(tl2, MF4x4(0.1620235443115234, 0.0102646192535758, -0.0163768343627453, 0.0822434723377228, 0.1546859890222549, 0.0250307265669107, 0.0373145006597042, -0.0117816952988505, 0.0517709590494633, -0.0252467226237059, -0.1536794751882553, -0.0202652048319578, -0.3231309056282043, 0.1350613087415695, 0.1932685226202011, 0.1793868690729141), target1); + target1 = MulAdd(ml1, MF4x4(-0.5794479846954346, 0.1861644536256790, 0.1153499931097031, 0.1069827228784561, 0.4245908558368683, 0.1373304873704910, -0.1780052036046982, 0.0466761402785778, -1.1490619182586670, 0.8257195353507996, 0.0008257642621174, 0.0699498802423477, 0.3857855796813965, 0.1599738448858261, -0.0160159282386303, -0.1072350814938545), target1); + target1 = MulAdd(ml2, MF4x4(-0.0448461174964905, -0.1027067080140114, 0.1546361148357391, -0.1321994811296463, 0.3319362998008728, 0.0670638754963875, -0.0980701223015785, -0.1242648735642433, 0.0487120002508163, 0.1034812107682228, -0.3318608701229095, -0.0663819089531898, -0.7049940824508667, -0.2681597173213959, 0.5434955358505249, 0.3798713982105255), target1); + target1 = MulAdd(bl1, MF4x4(-0.1650677323341370, -0.1774029582738876, -0.0693891644477844, 0.0644233599305153, 0.0096654882654548, 0.0590313524007797, -0.0626199543476105, -0.1101114749908447, -0.0919653624296188, -0.2198607474565506, -0.3686812222003937, -0.0030939118005335, -0.0286871381103992, -0.0267137177288532, -0.2114386558532715, -0.1006813868880272), target1); + target1 = MulAdd(bl2, MF4x4(-0.1621828079223633, -0.3327856957912445, -0.3447196483612061, -0.0941574051976204, 0.1980617940425873, -0.0039776018820703, 0.0631400719285011, -0.0371704883873463, 0.0801121518015862, -0.2228745818138123, -0.1361533999443054, -0.0061448244377971, -0.2867666780948639, -0.0285903755575418, -0.2431204020977020, 0.0123175233602524), target1); + target1 = MulAdd(tc1, MF4x4(-0.0213332772254944, 0.0521896183490753, 0.1267389953136444, 0.0592065081000328, 0.2370698899030685, -0.0788677260279655, -0.0918647274374962, 0.0576282069087029, -0.0021516692359000, -0.1609319597482681, 0.1739181131124496, 0.4105915725231171, -0.0331462696194649, 0.0667985677719116, 0.0239557847380638, 0.2053552418947220), target1); + target1 = MulAdd(tc2, MF4x4(0.0452669933438301, -0.0624216794967651, -0.2210896909236908, -0.2319414317607880, 0.0553048253059387, -0.0195244718343019, -0.0948461145162582, -0.1411011815071106, 0.1357870846986771, -0.0044978843070567, 0.0117386765778065, 0.2855076789855957, 0.0721756964921951, 0.0725790113210678, 0.0879172906279564, 0.2261598110198975), target1); + target1 = MulAdd(mc1, MF4x4(0.1625189036130905, 0.2556113302707672, 0.0096751591190696, 0.4720825850963593, 0.1724947541952133, 0.7794855833053589, -0.5798769593238831, -0.5527915954589844, -0.2930226027965546, -0.1513507068157196, -0.1671935617923737, 0.1259696036577225, -1.5636392831802368, -0.6248261332511902, -0.7778694629669189, 0.7258287668228149), target1); + target1 = MulAdd(mc2, MF4x4(-0.2203702777624130, -0.2413295805454254, 0.5873484611511230, 0.8019542694091797, 0.2896324992179871, -0.0073753874748945, -0.4705016911029816, -0.4037020802497864, -0.5916352272033691, 0.8720123767852783, 1.4509203433990479, -0.4347604215145111, 0.1296572685241699, -0.0382503382861614, 1.0146147012710571, 0.6662492156028748), target1); + target1 = MulAdd(bc1, MF4x4(0.1450282633304596, 0.1838535815477371, -0.2957614958286285, -0.1175402477383614, 0.0455737337470055, -0.1042275950312614, 0.2409446090459824, 0.2161487638950348, 0.2523523867130280, -0.1657318323850632, 0.3264083266258240, 0.0015554791316390, 0.0756938308477402, 0.6486120820045471, 0.2910028994083405, 0.0061267162673175), target1); + target1 = MulAdd(bc2, MF4x4(0.1902535557746887, -1.9667011499404907, 0.5702443718910217, 0.1346294432878494, -0.1391871571540833, 0.0248745214194059, -0.0059022656641901, -0.1015660688281059, -0.0244528464972973, 0.6021597385406494, 0.1408251225948334, 0.1453502923250198, 0.1481679081916809, -0.0782008916139603, 0.1340244859457016, 0.2094520628452301), target1); + target1 = MulAdd(tr1, MF4x4(0.0108401505276561, 0.0073569868691266, 0.0448315776884556, 0.0920599550008774, -0.0208782758563757, -0.0072226687334478, 0.1590231209993362, 0.0974729061126709, -0.0504082255065441, -0.1492372304201126, -0.0240942239761353, -0.3387995064258575, -0.0284501910209656, -0.0475567393004894, -0.1351375281810760, -0.0968868359923363), target1); + target1 = MulAdd(tr2, MF4x4(-0.0175144840031862, 0.0215768050402403, -0.2070538252592087, -0.1020313948392868, -0.0632536634802818, 0.0187655575573444, 0.0033090459182858, 0.0483726076781750, 0.0874270573258400, 0.0391933582723141, -0.0733725428581238, 0.0455813333392143, 0.0519542098045349, -0.0167136136442423, 0.0001770213857526, -0.0226714108139277), target1); + target1 = MulAdd(mr1, MF4x4(0.0230981707572937, 0.0211336743086576, -0.0202524177730083, 0.0004777485737577, -0.3133100867271423, -0.2222708314657211, -0.3225338459014893, 0.0252504348754883, -0.1565012782812119, -0.1223759651184082, -0.1677924543619156, 0.1436173915863037, -0.1002913638949394, -0.4352810978889465, -0.1214068830013275, 0.1200122535228729), target1); + target1 = MulAdd(mr2, MF4x4(0.2746300697326660, 0.0240563396364450, 0.2214205712080002, -0.0140676703304052, -0.1697816103696823, 0.0239461977034807, -0.2184012532234192, -0.1122284159064293, -0.0025032388512045, -0.1982196122407913, -0.0088773546740413, -0.0592936985194683, 0.0981788560748100, 0.0590783730149269, 0.1699221283197403, 0.1146017014980316), target1); + target1 = MulAdd(br1, MF4x4(-0.1190557554364204, 0.0139884017407894, -0.3765408396720886, -0.1967576593160629, -0.0013050300767645, 0.0838785469532013, 0.0467342510819435, 0.0197970345616341, 0.0199079178273678, 0.1127095147967339, -0.0382974669337273, -0.0808331072330475, 0.0045804185792804, 0.1423084437847137, 0.0275978501886129, 0.0051016276702285), target1); + target1 = MulAdd(br2, MF4x4(0.0694821104407310, -0.1185832619667053, 0.1340767890214920, -0.0096760904416442, -0.0057105780579150, -0.0358094684779644, -0.0208928529173136, -0.0422658622264862, -0.1662766784429550, 0.0397685728967190, -0.0169682707637548, 0.1427496373653412, 0.1324639916419983, 0.0579542480409145, 0.1712465286254883, 0.1062873229384422), target1); + target1 = max(target1, 0) + MF4(0.0476732961833477, -0.0824369415640831, 1.4746414422988892, 1.6789640188217163) * min(target1, 0); + + MF4 target2 = MF4(-0.0375947281718254, 0.2783663868904114, 0.0855874642729759, -0.0183580406010151); + target2 = MulAdd(tl1, MF4x4(-0.3375159502029419, -0.0481248162686825, 0.0022695809602737, -0.1379150450229645, 0.2087368816137314, -0.1413425505161285, 0.0311671234667301, 0.2090687304735184, -0.1255441159009933, -0.3856352567672729, 0.0592494457960129, -0.2192105948925018, 0.0635740235447884, -0.0259831510484219, 0.1284605711698532, 0.1543060839176178), target2); + target2 = MulAdd(tl2, MF4x4(0.0265662875026464, 0.1603409945964813, -0.0106395082548261, 0.0252655427902937, 0.0633112043142319, 0.1634869277477264, 0.0606260225176811, -0.0386067330837250, 0.1025275588035583, -0.0086877709254622, 0.0572752207517624, 0.2958410382270813, 0.2315495908260345, -0.0511345490813255, -0.0684579163789749, 0.2366850525140762), target2); + target2 = MulAdd(ml1, MF4x4(-0.6637977361679077, 0.1115299314260483, 0.0334465689957142, -0.0595322623848915, 0.0194256473332644, 0.1154914125800133, -0.0093330284580588, -0.2107555270195007, 0.2593949139118195, -0.2310725152492523, -0.0191440880298615, 0.0831847414374352, 0.0869263112545013, 0.1271044909954071, -0.0199039578437805, 0.0421413294970989), target2); + target2 = MulAdd(ml2, MF4x4(0.1171221211552620, -0.2125719487667084, -0.0189515724778175, 0.2465390264987946, 0.1773879528045654, 0.2518055438995361, 0.0552976131439209, -0.1894477456808090, 0.1769066900014877, -0.1464872211217880, -0.0573948174715042, -0.4012156426906586, 0.2111275196075439, -0.5377770662307739, -0.2866773009300232, 0.1336809694766998), target2); + target2 = MulAdd(bl1, MF4x4(-0.6472494006156921, -0.0555078461766243, 0.0564644038677216, 0.0711399838328362, -0.0228650532662868, -0.0755083113908768, 0.0132119813933969, 0.1565485745668411, 0.0769101306796074, -0.4400988519191742, -0.0369989611208439, -0.0459617786109447, 0.1246264874935150, -0.2121030986309052, 0.0351070538163185, 0.1162980273365974), target2); + target2 = MulAdd(bl2, MF4x4(-0.0239488855004311, -0.4389697015285492, -0.0041466108523309, 0.2026203870773315, 0.0299914367496967, 0.0214463528245687, -0.0340079553425312, -0.0866646468639374, -0.1258078664541245, 0.0335666500031948, 0.0279387012124062, 0.0377361401915550, -0.0037173877935857, -0.1970001310110092, 0.0554011650383472, 0.0747631862759590), target2); + target2 = MulAdd(tc1, MF4x4(-0.5669959783554077, -0.0150139974430203, -0.0079386057332158, -0.1156958788633347, -0.0749717876315117, 0.1512815952301025, -0.0340143367648125, -0.1504366695880890, -0.1540268361568451, -0.0089722918346524, -0.0974140912294388, -0.4191842377185822, 0.0414282791316509, -0.0518460534512997, -0.1025082096457481, -0.1974052190780640), target2); + target2 = MulAdd(tc2, MF4x4(0.1328157931566238, -0.0447603911161423, 0.0625142455101013, 0.1125901266932487, 0.1033857688307762, 0.0811788439750671, 0.1270843595266342, -0.0564684942364693, 0.0797754079103470, 0.1379490494728088, -0.2863929569721222, -0.0602805763483047, 0.2699469923973083, 0.1226278319954872, -0.2505964636802673, 0.0639543756842613), target2); + target2 = MulAdd(mc1, MF4x4(-1.5813068151473999, 0.5872991085052490, -0.2429279834032059, -0.4303708970546722, 0.2854560911655426, 1.0167927742004395, 0.8617131114006042, 0.2191447615623474, 0.9627910852432251, 0.7867327332496643, 1.2628984451293945, 0.8908280134201050, -0.4586973786354065, -0.7981753349304199, 0.4780183732509613, -0.9264264106750488), target2); + target2 = MulAdd(mc2, MF4x4(0.2435170710086823, -0.0829131007194519, -0.3455205559730530, 0.4117922484874725, 0.2749316394329071, 0.1895177811384201, 0.4110289216041565, -0.1298204958438873, 0.1637304723262787, 0.8604004383087158, 1.0940867662429810, -0.3959148228168488, 0.3289682567119598, -0.0633709058165550, -2.0705056190490723, 0.1684481352567673), target2); + target2 = MulAdd(bc1, MF4x4(-0.8055392503738403, 0.2874773740768433, -0.1400482803583145, -0.1834644526243210, 0.0150187248364091, 0.0192099008709192, -0.0783268958330154, -0.2944276928901672, 0.0451190918684006, 0.1181604787707329, 0.1095703318715096, -0.2282790690660477, 0.1960140317678452, 0.3371279239654541, 0.0243086088448763, -0.0463834926486015), target2); + target2 = MulAdd(bc2, MF4x4(0.2196981906890869, -0.0534196794033051, -0.0839012116193771, 0.2049407809972763, 0.0194450635462999, -0.0593264624476433, 0.1640597432851791, 0.0274629276245832, -0.1243807971477509, 0.0611803941428661, -0.1799024045467377, -0.1864561140537262, 0.2465235143899918, -0.0211831126362085, -0.2282803803682327, -0.1430586874485016), target2); + target2 = MulAdd(tr1, MF4x4(-0.3611976802349091, 0.0288475938141346, -0.0297703798860312, -0.0418547466397285, -0.3251218497753143, -0.0134126413613558, -0.0686949566006660, -0.0233805924654007, -0.2749838531017303, -0.2486374378204346, 0.0724888965487480, 0.1193816959857941, -0.2721751034259796, -0.2033173292875290, 0.0248280912637711, 0.0589503161609173), target2); + target2 = MulAdd(tr2, MF4x4(0.1689156740903854, 0.0712056383490562, 0.1930764019489288, 0.0722641199827194, 0.0640723854303360, 0.0566449724137783, 0.0815568938851357, -0.0213705692440271, -0.1826065927743912, 0.0393006950616837, -0.1493768393993378, 0.0386883616447449, -0.0130320172756910, -0.0327960774302483, -0.0204591657966375, 0.0134796360507607), target2); + target2 = MulAdd(mr1, MF4x4(-0.5736998319625854, -0.0392777882516384, 0.1370634734630585, -0.0484432727098465, 0.1308025121688843, -0.2323654592037201, -0.2625242173671722, -0.2956316471099854, -0.1103305667638779, -0.0551420338451862, 0.0006514643318951, 0.0022458140738308, 0.2859890162944794, -0.0839410424232483, 0.5223253369331360, 0.0280438754707575), target2); + target2 = MulAdd(mr2, MF4x4(0.0556896403431892, 0.0735942423343658, -0.2387326955795288, 0.1338670998811722, 0.0996377170085907, 0.0365633517503738, 0.3044275343418121, -0.0164738632738590, 0.1139278411865234, 0.1249758303165436, 0.2395293861627579, -0.0708516016602516, 0.1228865459561348, 0.0634353235363960, -0.3463226258754730, -0.0362484715878963), target2); + target2 = MulAdd(br1, MF4x4(-0.4082182049751282, -0.1144043654203415, 0.0233679264783859, 0.0130491442978382, 0.0237790253013372, 0.0709472149610519, 0.1275831013917923, -0.0888639837503433, -0.0140889342874289, -0.1301848441362381, -0.1709596514701843, 0.0314525589346886, 0.0293366052210331, 0.0934117212891579, 0.0720594301819801, 0.0094668027013540), target2); + target2 = MulAdd(br2, MF4x4(0.1782542318105698, -0.1532294601202011, -0.2839424610137939, 0.0435897931456566, 0.0621095262467861, -0.0348550342023373, 0.0461588650941849, 0.0183234252035618, 0.2204841077327728, 0.1267120093107224, 0.1979495882987976, -0.2149147540330887, 0.2110942006111145, 0.0718472301959991, -0.1063910648226738, -0.0493422709405422), target2); + target2 = max(target2, 0) + MF4(-0.0162308197468519, 0.4942881166934967, 0.1156802847981453, 1.4069133996963501) * min(target2, 0); tex3[gxy] = target1; tex4[gxy] = target2; @@ -365,67 +368,67 @@ void Pass4(uint2 blockStart, uint3 threadId) { // [tl, tc, tr] // [ml, mc, mr] // [bl, bc, br] - float4 tl1 = tex3.SampleLevel(sam, pos - inputPt, 0); - float4 ml1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); - float4 bl1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); - float4 tc1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); - float4 mc1 = tex3.SampleLevel(sam, pos, 0); - float4 bc1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); - float4 tr1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); - float4 mr1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); - float4 br1 = tex3.SampleLevel(sam, pos + inputPt, 0); - - float4 tl2 = tex4.SampleLevel(sam, pos - inputPt, 0); - float4 ml2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); - float4 bl2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); - float4 tc2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); - float4 mc2 = tex4.SampleLevel(sam, pos, 0); - float4 bc2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); - float4 tr2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); - float4 mr2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); - float4 br2 = tex4.SampleLevel(sam, pos + inputPt, 0); - - float4 target1 = float4(0.1563357263803482, -0.0187121629714966, -0.0126413907855749, 0.1197946891188622); - target1 += mul(tl1, float4x4(0.0248779058456421, 0.0970748737454414, -0.0710923895239830, -0.0502988137304783, -0.1513994187116623, 0.2118151038885117, -0.1060286536812782, -0.0017144688172266, -0.0556356199085712, -0.0478693507611752, 0.1046469956636429, 0.0019442490302026, -0.1099060326814651, 0.0196554642170668, -0.0393092185258865, -0.0131395589560270)); - target1 += mul(tl2, float4x4(0.4478245079517365, -0.0910880789160728, -0.2786923944950104, -0.0162817053496838, 0.0112394141033292, -0.1194593086838722, 0.0425493344664574, -0.0044937161728740, -0.1628813296556473, -0.2678257226943970, 0.2309084981679916, 0.0530976355075836, -0.1750707328319550, -0.0064609451219440, -0.0241944380104542, 0.0247293189167976)); - target1 += mul(ml1, float4x4(-0.0999718829989433, 0.2529200315475464, -0.2410994917154312, -0.0778252258896828, -0.1284751892089844, 0.0736103430390358, -0.0542359724640846, -0.0292810052633286, 0.0842959657311440, 0.1324738860130310, -0.1200775727629662, 0.0923810601234436, -0.3018241524696350, -0.0444923602044582, -0.2015913128852844, -0.0204973872750998)); - target1 += mul(ml2, float4x4(-0.3240330517292023, -0.5515946745872498, -0.0859212949872017, 0.0462087914347649, -0.0053406502120197, -0.0506816878914833, 0.1402591317892075, 0.0343847945332527, -0.1610218435525894, 0.7799831628799438, -0.4482840299606323, 0.4723005294799805, -0.2119023054838181, 0.1128389388322830, -0.1832690685987473, -0.0362148694694042)); - target1 += mul(bl1, float4x4(-0.0930702313780785, -0.0642902702093124, -0.0367766097187996, -0.0020038220100105, 0.0453414916992188, 0.0075245667248964, -0.0119760576635599, 0.0035134663339704, 0.1123304143548012, 0.1128236204385757, 0.0063381437212229, -0.0171416625380516, -0.1588087528944016, -0.2571538090705872, 0.2774460315704346, 0.0171892605721951)); - target1 += mul(bl2, float4x4(-0.0579603239893913, 0.3495539724826813, -0.4717904627323151, -0.0042169657535851, -0.0763773098587990, -0.1721021682024002, 0.1467801630496979, -0.0172323398292065, -0.0802723765373230, -0.0116639221087098, 0.0178409330546856, -0.0121179902926087, -0.0710472315549850, -0.1480658054351807, 0.1912731230258942, 0.0008336952887475)); - target1 += mul(tc1, float4x4(0.0445428267121315, -0.1481070518493652, 0.1304957717657089, -0.1157935336232185, -0.1166803613305092, -0.3920064568519592, 0.8788923621177673, 0.3384790122509003, -0.0278418567031622, -0.1011456921696663, -0.0513025075197220, -0.0748791247606277, -0.0502855703234673, -0.1696091145277023, -0.0049756630323827, -0.0209914986044168)); - target1 += mul(tc2, float4x4(-0.1725586950778961, 0.1313660293817520, -0.3432282805442810, -0.0246748421341181, -0.0593904331326485, 0.1949618458747864, -0.2589366734027863, 0.0379127524793148, -0.0550928115844727, -0.0913493037223816, 0.1150950565934181, -0.1235521435737610, -0.0625528916716576, -0.3131158649921417, 0.4109992682933807, 0.0410865694284439)); - target1 += mul(mc1, float4x4(0.8893174529075623, -0.0700975209474564, 0.7708584070205688, -0.2465052455663681, -0.1121069490909576, -0.5598245263099670, 0.7997139692306519, -0.5694547295570374, -0.1351616084575653, 0.1108073145151138, 0.7269443273544312, 0.1369582563638687, 0.6071134805679321, 0.8817817568778992, 0.0194139964878559, -0.2244683355093002)); - target1 += mul(mc2, float4x4(-0.2949840426445007, -0.3180212676525116, -0.5362266302108765, 0.0656562000513077, -0.6400785446166992, -0.4476518630981445, -0.6344851851463318, 0.7187259793281555, -0.3846258223056793, 1.0990517139434814, -0.7282652258872986, -0.6530264616012573, 0.8294114470481873, 0.6079595088958740, 0.3271140158176422, 0.4062923491001129)); - target1 += mul(bc1, float4x4(-0.0373790934681892, -0.1651912927627563, 0.0589407421648502, 0.0622759014368057, 0.0095487469807267, 0.0824478641152382, -0.0216544214636087, 0.1070290282368660, -0.0805450007319450, -0.0367405600845814, 0.0055392896756530, 0.0046677836216986, 0.1806629896163940, 0.2809534966945648, 0.0341635458171368, 0.1274557113647461)); - target1 += mul(bc2, float4x4(0.3259792327880859, -0.3150677680969238, -0.2272015213966370, 0.0287732314318419, 0.0530966222286224, 0.3310768604278564, -0.2079527378082275, -0.1340134441852570, 0.0769909769296646, -0.2229669988155365, 0.1012685745954514, 0.0622584670782089, 0.1539722383022308, 0.2163516432046890, -0.1021269038319588, -0.0561319366097450)); - target1 += mul(tr1, float4x4(-0.1077229678630829, -0.2074016332626343, 0.0913541764020920, 0.0391069389879704, 0.0848263725638390, -0.0416730083525181, 0.0603712275624275, 0.0457836911082268, 0.0035252417437732, 0.0004963557003066, 0.0027605029754341, 0.0254582706838846, -0.0146415829658508, 0.0273043140769005, 0.0692857503890991, 0.0091926595196128)); - target1 += mul(tr2, float4x4(0.0692942291498184, -0.4098799824714661, 0.3745719194412231, -0.0331038050353527, -0.0513759665191174, 0.0989063531160355, -0.1431623697280884, -0.0274865441024303, 0.0244991369545460, -0.0112041812390089, 0.0523535087704659, 0.0222812052816153, -0.0314176008105278, 0.2347036451101303, -0.0928338095545769, -0.0338262394070625)); - target1 += mul(mr1, float4x4(0.3805117309093475, -0.1917886883020401, 0.2292910665273666, 0.3065188527107239, -0.2231798321008682, 0.2646720707416534, -0.1371945887804031, -0.0272636637091637, 0.1435333937406540, -0.0137438504025340, 0.0088603384792805, -0.0633594989776611, -0.1662645787000656, 0.2498313635587692, -0.2899549007415771, 0.0460192002356052)); - target1 += mul(mr2, float4x4(0.1833423078060150, 0.0624732412397861, -0.3103306889533997, -0.0102488445118070, 0.0073305973783135, -0.2617286443710327, 0.2580088973045349, -0.0416168905794621, 0.1506632268428802, -0.0574487410485744, 0.0778761878609657, 0.1702914088964462, -0.0307608898729086, 0.0848424360156059, -0.1303439885377884, -0.0837477520108223)); - target1 += mul(br1, float4x4(0.0605936460196972, -0.0835580825805664, 0.0067690783180296, 0.0539834238588810, 0.0881687626242638, -0.0001589829771547, -0.0706917122006416, 0.0060382266528904, 0.1218314692378044, 0.0132934488356113, 0.0503435060381889, -0.0386124141514301, -0.1492055207490921, -0.0103553524240851, -0.0697906538844109, -0.0208332743495703)); - target1 += mul(br2, float4x4(-0.2907077968120575, -0.1428615152835846, -0.1178332567214966, 0.0093302968889475, -0.0501379445195198, 0.1940260678529739, -0.0139665808528662, 0.0440400391817093, 0.0546711236238480, -0.0606320053339005, 0.0891899466514587, -0.0187927689403296, -0.0581561885774136, -0.0785671249032021, -0.0746953785419464, -0.0350385755300522)); - target1 = max(target1, 0) + float4(0.0636819079518318, -0.0394099690020084, 0.0154740391299129, 1.4728027582168579) * min(target1, 0); - - float4 target2 = float4(0.0120743932202458, -0.0392544493079185, 0.0073779639787972, 0.0674902275204659); - target2 += mul(tl1, float4x4(-0.0253207311034203, -0.0178817976266146, -0.0941111445426941, -0.0096205184236169, -0.0948953703045845, -0.1085971817374229, -0.1137845888733864, -0.1022860705852509, 0.0362259782850742, 0.0741802081465721, -0.0426849052309990, 0.1004608497023582, -0.0553506910800934, -0.0631089508533478, 0.0144856451079249, -0.0129664530977607)); - target2 += mul(tl2, float4x4(0.1779767572879791, 0.0777176544070244, 0.3302779793739319, -0.0630711168050766, 0.0130759663879871, -0.0583435148000717, 0.0534219592809677, -0.0205510091036558, -0.1502479761838913, 0.0436260215938091, -0.3180699944496155, 0.1497740298509598, -0.0714024156332016, -0.0304171387106180, -0.1271478682756424, -0.0160594787448645)); - target2 += mul(ml1, float4x4(-0.2060592919588089, 0.0177838709205389, 0.2679423391819000, 0.0484818480908871, -0.1020416766405106, -0.0875749215483665, 0.2993223369121552, 0.0260893367230892, -0.0320936217904091, -0.0193585660308599, 0.1074631884694099, -0.0031519578769803, -0.1419622153043747, -0.0621272362768650, -0.2517412602901459, -0.1112222597002983)); - target2 += mul(ml2, float4x4(0.0975706353783607, -0.1846135258674622, -0.2201799452304840, -0.0123737258836627, 0.0554487742483616, -0.0255174264311790, -0.2444359511137009, -0.1069484427571297, -0.0487980805337429, -0.0570272766053677, 0.1149747893214226, -0.0176141038537025, -0.1059966161847115, 0.1263166964054108, 0.1091895326972008, 0.0400139950215816)); - target2 += mul(bl1, float4x4(-0.0971131697297096, 0.1365687996149063, -0.1780374944210052, 0.2879253029823303, -0.0652871504426003, -0.0537611208856106, -0.0763697773218155, 0.0455291420221329, 0.0246813204139471, -0.0074042826890945, 0.2309278100728989, 0.0046464367769659, -0.0692639946937561, 0.0042336005717516, -0.2525716722011566, 0.3263924717903137)); - target2 += mul(bl2, float4x4(-0.0798230320215225, -0.1135407239198685, -0.4427868127822876, 0.0395730547606945, 0.0537165030837059, 0.0225568320602179, -0.1189213171601295, -0.0707803219556808, -0.0074193109758198, -0.0493272021412849, 0.1401828378438950, 0.1580671072006226, -0.0574450828135014, 0.0058684512041509, -0.1626979410648346, 0.0857749953866005)); - target2 += mul(tc1, float4x4(-0.0715018808841705, 0.0310761369764805, -0.3861580789089203, 0.0770959705114365, 0.1908793896436691, 0.2067244797945023, 0.1176377162337303, 0.0705406218767166, -0.0944501385092735, 0.1110353469848633, -0.2772715091705322, -0.0079436022788286, 0.1045550853013992, 0.0076957782730460, 0.0220303647220135, 0.0434708297252655)); - target2 += mul(tc2, float4x4(-0.4399432241916656, -0.0885980203747749, 0.2042984664440155, 0.0499991811811924, 0.0443918742239475, -0.0322260186076164, 0.0960535407066345, 0.0173596814274788, 0.0851852819323540, -0.0549903102219105, -0.2807548046112061, -0.1112457811832428, 0.0906120762228966, 0.1066406965255737, -0.3857226073741913, 0.1345559209585190)); - target2 += mul(mc1, float4x4(0.2723454833030701, -0.1252564936876297, 0.3694194555282593, 0.0895726680755615, 0.2200681418180466, 0.3019879162311554, 0.4471587240695953, 0.2883224189281464, 0.0264542233198881, 0.3020884990692139, 0.2432236075401306, 0.5683830380439758, -0.0914180725812912, -0.1473430246114731, -0.5914288163185120, -0.1922498643398285)); - target2 += mul(mc2, float4x4(0.2161763310432434, -0.0415927544236183, -0.0378856658935547, -0.0317508913576603, -0.2287719398736954, -0.4885228574275970, -0.2818722724914551, -0.3797133862972260, 0.4456195533275604, 0.7929218411445618, -0.1307591795921326, 0.2016224861145020, 0.2801168859004974, -0.0006753758061677, 0.5686879754066467, 0.0415142513811588)); - target2 += mul(bc1, float4x4(0.0530648417770863, -0.2444190829992294, 0.0235249921679497, 0.0224611610174179, 0.0651976913213730, -0.0449720136821270, 0.1208736971020699, -0.0743656828999519, 0.1318923383951187, 0.1823218315839767, 0.5197241306304932, 0.1862808614969254, 0.2317387014627457, -0.2857755720615387, 0.1650039553642273, -0.1755792349576950)); - target2 += mul(bc2, float4x4(-0.0835669562220573, 0.0129750147461891, -0.4473843872547150, -0.5028023719787598, -0.0481940247118473, -0.0905050709843636, -0.6921447515487671, -0.2693449556827545, 0.2342379540205002, 0.0392520241439342, 0.4797120690345764, 0.1215118318796158, 0.1369755119085312, -0.1010836884379387, -0.0070533878169954, -0.2589581906795502)); - target2 += mul(tr1, float4x4(0.1088275387883186, 0.0839678123593330, -0.3048903048038483, -0.0084876483306289, 0.3669581115245819, 0.0472131110727787, -0.1243446245789528, -0.1012610718607903, 0.1622449755668640, -0.1317851245403290, -0.0711368247866631, -0.1593778431415558, -0.0104977218434215, -0.0608197152614594, 0.0286014154553413, 0.0388568006455898)); - target2 += mul(tr2, float4x4(0.0885753333568573, 0.1340429484844208, -0.0027331225574017, -0.0736069232225418, -0.1520483642816544, 0.1104429140686989, 0.1728315353393555, 0.1210049912333488, 0.0688045620918274, -0.1218316256999969, -0.0629790797829628, -0.1348981261253357, 0.0943875387310982, -0.0730865821242332, -0.2342475503683090, -0.0808216184377670)); - target2 += mul(mr1, float4x4(-0.1263358592987061, -0.7069915533065796, 0.1411920040845871, -0.2682386934757233, 0.2957956194877625, 0.1127238497138023, 0.3032427430152893, 0.2759581208229065, 0.8320354819297791, -0.0136295817792416, 0.0530097521841526, -0.0727380812168121, 0.0614950619637966, 0.0339637212455273, -0.0386842861771584, -0.0550391897559166)); - target2 += mul(mr2, float4x4(-0.0998953506350517, -0.2231116443872452, 0.0948988571763039, 0.1258799731731415, -0.6855500936508179, -0.4546283185482025, -0.3335786461830139, 0.0718025788664818, 0.6456025242805481, -0.2023779749870300, 0.1325027197599411, -0.1078727394342422, 0.3024467229843140, 0.1703380942344666, 0.2321108430624008, 0.2143797874450684)); - target2 += mul(br1, float4x4(0.0542521663010120, -0.2265717238187790, -0.0289179943501949, 0.0697252005338669, -0.1518151611089706, 0.0225123148411512, 0.0370684377849102, -0.1546901017427444, 0.0753403753042221, -0.0465561784803867, 0.1635994315147400, 0.1127668544650078, 0.0738654434680939, 0.1077028661966324, -0.1282461881637573, -0.0510208979249001)); - target2 += mul(br2, float4x4(-0.1740311384201050, 0.0542572811245918, 0.0551791004836559, 0.1728909015655518, -0.0078740902245045, 0.0999085083603859, -0.0136023676022887, 0.0501077920198441, 0.0529310964047909, -0.0859082415699959, -0.0285708475857973, -0.0186515673995018, -0.0793913751840591, 0.0688859447836876, -0.1684362143278122, 0.0473327860236168)); - target2 = max(target2, 0) + float4(-0.1339675635099411, 0.3599768280982971, -0.1313954293727875, 0.8648772835731506) * min(target2, 0); + MF4 tl1 = tex3.SampleLevel(sam, pos - inputPt, 0); + MF4 ml1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + MF4 bl1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + MF4 tc1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + MF4 mc1 = tex3.SampleLevel(sam, pos, 0); + MF4 bc1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + MF4 tr1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + MF4 mr1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + MF4 br1 = tex3.SampleLevel(sam, pos + inputPt, 0); + + MF4 tl2 = tex4.SampleLevel(sam, pos - inputPt, 0); + MF4 ml2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + MF4 bl2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + MF4 tc2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + MF4 mc2 = tex4.SampleLevel(sam, pos, 0); + MF4 bc2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + MF4 tr2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + MF4 mr2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + MF4 br2 = tex4.SampleLevel(sam, pos + inputPt, 0); + + MF4 target1 = MF4(0.1563357263803482, -0.0187121629714966, -0.0126413907855749, 0.1197946891188622); + target1 = MulAdd(tl1, MF4x4(0.0248779058456421, 0.0970748737454414, -0.0710923895239830, -0.0502988137304783, -0.1513994187116623, 0.2118151038885117, -0.1060286536812782, -0.0017144688172266, -0.0556356199085712, -0.0478693507611752, 0.1046469956636429, 0.0019442490302026, -0.1099060326814651, 0.0196554642170668, -0.0393092185258865, -0.0131395589560270), target1); + target1 = MulAdd(tl2, MF4x4(0.4478245079517365, -0.0910880789160728, -0.2786923944950104, -0.0162817053496838, 0.0112394141033292, -0.1194593086838722, 0.0425493344664574, -0.0044937161728740, -0.1628813296556473, -0.2678257226943970, 0.2309084981679916, 0.0530976355075836, -0.1750707328319550, -0.0064609451219440, -0.0241944380104542, 0.0247293189167976), target1); + target1 = MulAdd(ml1, MF4x4(-0.0999718829989433, 0.2529200315475464, -0.2410994917154312, -0.0778252258896828, -0.1284751892089844, 0.0736103430390358, -0.0542359724640846, -0.0292810052633286, 0.0842959657311440, 0.1324738860130310, -0.1200775727629662, 0.0923810601234436, -0.3018241524696350, -0.0444923602044582, -0.2015913128852844, -0.0204973872750998), target1); + target1 = MulAdd(ml2, MF4x4(-0.3240330517292023, -0.5515946745872498, -0.0859212949872017, 0.0462087914347649, -0.0053406502120197, -0.0506816878914833, 0.1402591317892075, 0.0343847945332527, -0.1610218435525894, 0.7799831628799438, -0.4482840299606323, 0.4723005294799805, -0.2119023054838181, 0.1128389388322830, -0.1832690685987473, -0.0362148694694042), target1); + target1 = MulAdd(bl1, MF4x4(-0.0930702313780785, -0.0642902702093124, -0.0367766097187996, -0.0020038220100105, 0.0453414916992188, 0.0075245667248964, -0.0119760576635599, 0.0035134663339704, 0.1123304143548012, 0.1128236204385757, 0.0063381437212229, -0.0171416625380516, -0.1588087528944016, -0.2571538090705872, 0.2774460315704346, 0.0171892605721951), target1); + target1 = MulAdd(bl2, MF4x4(-0.0579603239893913, 0.3495539724826813, -0.4717904627323151, -0.0042169657535851, -0.0763773098587990, -0.1721021682024002, 0.1467801630496979, -0.0172323398292065, -0.0802723765373230, -0.0116639221087098, 0.0178409330546856, -0.0121179902926087, -0.0710472315549850, -0.1480658054351807, 0.1912731230258942, 0.0008336952887475), target1); + target1 = MulAdd(tc1, MF4x4(0.0445428267121315, -0.1481070518493652, 0.1304957717657089, -0.1157935336232185, -0.1166803613305092, -0.3920064568519592, 0.8788923621177673, 0.3384790122509003, -0.0278418567031622, -0.1011456921696663, -0.0513025075197220, -0.0748791247606277, -0.0502855703234673, -0.1696091145277023, -0.0049756630323827, -0.0209914986044168), target1); + target1 = MulAdd(tc2, MF4x4(-0.1725586950778961, 0.1313660293817520, -0.3432282805442810, -0.0246748421341181, -0.0593904331326485, 0.1949618458747864, -0.2589366734027863, 0.0379127524793148, -0.0550928115844727, -0.0913493037223816, 0.1150950565934181, -0.1235521435737610, -0.0625528916716576, -0.3131158649921417, 0.4109992682933807, 0.0410865694284439), target1); + target1 = MulAdd(mc1, MF4x4(0.8893174529075623, -0.0700975209474564, 0.7708584070205688, -0.2465052455663681, -0.1121069490909576, -0.5598245263099670, 0.7997139692306519, -0.5694547295570374, -0.1351616084575653, 0.1108073145151138, 0.7269443273544312, 0.1369582563638687, 0.6071134805679321, 0.8817817568778992, 0.0194139964878559, -0.2244683355093002), target1); + target1 = MulAdd(mc2, MF4x4(-0.2949840426445007, -0.3180212676525116, -0.5362266302108765, 0.0656562000513077, -0.6400785446166992, -0.4476518630981445, -0.6344851851463318, 0.7187259793281555, -0.3846258223056793, 1.0990517139434814, -0.7282652258872986, -0.6530264616012573, 0.8294114470481873, 0.6079595088958740, 0.3271140158176422, 0.4062923491001129), target1); + target1 = MulAdd(bc1, MF4x4(-0.0373790934681892, -0.1651912927627563, 0.0589407421648502, 0.0622759014368057, 0.0095487469807267, 0.0824478641152382, -0.0216544214636087, 0.1070290282368660, -0.0805450007319450, -0.0367405600845814, 0.0055392896756530, 0.0046677836216986, 0.1806629896163940, 0.2809534966945648, 0.0341635458171368, 0.1274557113647461), target1); + target1 = MulAdd(bc2, MF4x4(0.3259792327880859, -0.3150677680969238, -0.2272015213966370, 0.0287732314318419, 0.0530966222286224, 0.3310768604278564, -0.2079527378082275, -0.1340134441852570, 0.0769909769296646, -0.2229669988155365, 0.1012685745954514, 0.0622584670782089, 0.1539722383022308, 0.2163516432046890, -0.1021269038319588, -0.0561319366097450), target1); + target1 = MulAdd(tr1, MF4x4(-0.1077229678630829, -0.2074016332626343, 0.0913541764020920, 0.0391069389879704, 0.0848263725638390, -0.0416730083525181, 0.0603712275624275, 0.0457836911082268, 0.0035252417437732, 0.0004963557003066, 0.0027605029754341, 0.0254582706838846, -0.0146415829658508, 0.0273043140769005, 0.0692857503890991, 0.0091926595196128), target1); + target1 = MulAdd(tr2, MF4x4(0.0692942291498184, -0.4098799824714661, 0.3745719194412231, -0.0331038050353527, -0.0513759665191174, 0.0989063531160355, -0.1431623697280884, -0.0274865441024303, 0.0244991369545460, -0.0112041812390089, 0.0523535087704659, 0.0222812052816153, -0.0314176008105278, 0.2347036451101303, -0.0928338095545769, -0.0338262394070625), target1); + target1 = MulAdd(mr1, MF4x4(0.3805117309093475, -0.1917886883020401, 0.2292910665273666, 0.3065188527107239, -0.2231798321008682, 0.2646720707416534, -0.1371945887804031, -0.0272636637091637, 0.1435333937406540, -0.0137438504025340, 0.0088603384792805, -0.0633594989776611, -0.1662645787000656, 0.2498313635587692, -0.2899549007415771, 0.0460192002356052), target1); + target1 = MulAdd(mr2, MF4x4(0.1833423078060150, 0.0624732412397861, -0.3103306889533997, -0.0102488445118070, 0.0073305973783135, -0.2617286443710327, 0.2580088973045349, -0.0416168905794621, 0.1506632268428802, -0.0574487410485744, 0.0778761878609657, 0.1702914088964462, -0.0307608898729086, 0.0848424360156059, -0.1303439885377884, -0.0837477520108223), target1); + target1 = MulAdd(br1, MF4x4(0.0605936460196972, -0.0835580825805664, 0.0067690783180296, 0.0539834238588810, 0.0881687626242638, -0.0001589829771547, -0.0706917122006416, 0.0060382266528904, 0.1218314692378044, 0.0132934488356113, 0.0503435060381889, -0.0386124141514301, -0.1492055207490921, -0.0103553524240851, -0.0697906538844109, -0.0208332743495703), target1); + target1 = MulAdd(br2, MF4x4(-0.2907077968120575, -0.1428615152835846, -0.1178332567214966, 0.0093302968889475, -0.0501379445195198, 0.1940260678529739, -0.0139665808528662, 0.0440400391817093, 0.0546711236238480, -0.0606320053339005, 0.0891899466514587, -0.0187927689403296, -0.0581561885774136, -0.0785671249032021, -0.0746953785419464, -0.0350385755300522), target1); + target1 = max(target1, 0) + MF4(0.0636819079518318, -0.0394099690020084, 0.0154740391299129, 1.4728027582168579) * min(target1, 0); + + MF4 target2 = MF4(0.0120743932202458, -0.0392544493079185, 0.0073779639787972, 0.0674902275204659); + target2 = MulAdd(tl1, MF4x4(-0.0253207311034203, -0.0178817976266146, -0.0941111445426941, -0.0096205184236169, -0.0948953703045845, -0.1085971817374229, -0.1137845888733864, -0.1022860705852509, 0.0362259782850742, 0.0741802081465721, -0.0426849052309990, 0.1004608497023582, -0.0553506910800934, -0.0631089508533478, 0.0144856451079249, -0.0129664530977607), target2); + target2 = MulAdd(tl2, MF4x4(0.1779767572879791, 0.0777176544070244, 0.3302779793739319, -0.0630711168050766, 0.0130759663879871, -0.0583435148000717, 0.0534219592809677, -0.0205510091036558, -0.1502479761838913, 0.0436260215938091, -0.3180699944496155, 0.1497740298509598, -0.0714024156332016, -0.0304171387106180, -0.1271478682756424, -0.0160594787448645), target2); + target2 = MulAdd(ml1, MF4x4(-0.2060592919588089, 0.0177838709205389, 0.2679423391819000, 0.0484818480908871, -0.1020416766405106, -0.0875749215483665, 0.2993223369121552, 0.0260893367230892, -0.0320936217904091, -0.0193585660308599, 0.1074631884694099, -0.0031519578769803, -0.1419622153043747, -0.0621272362768650, -0.2517412602901459, -0.1112222597002983), target2); + target2 = MulAdd(ml2, MF4x4(0.0975706353783607, -0.1846135258674622, -0.2201799452304840, -0.0123737258836627, 0.0554487742483616, -0.0255174264311790, -0.2444359511137009, -0.1069484427571297, -0.0487980805337429, -0.0570272766053677, 0.1149747893214226, -0.0176141038537025, -0.1059966161847115, 0.1263166964054108, 0.1091895326972008, 0.0400139950215816), target2); + target2 = MulAdd(bl1, MF4x4(-0.0971131697297096, 0.1365687996149063, -0.1780374944210052, 0.2879253029823303, -0.0652871504426003, -0.0537611208856106, -0.0763697773218155, 0.0455291420221329, 0.0246813204139471, -0.0074042826890945, 0.2309278100728989, 0.0046464367769659, -0.0692639946937561, 0.0042336005717516, -0.2525716722011566, 0.3263924717903137), target2); + target2 = MulAdd(bl2, MF4x4(-0.0798230320215225, -0.1135407239198685, -0.4427868127822876, 0.0395730547606945, 0.0537165030837059, 0.0225568320602179, -0.1189213171601295, -0.0707803219556808, -0.0074193109758198, -0.0493272021412849, 0.1401828378438950, 0.1580671072006226, -0.0574450828135014, 0.0058684512041509, -0.1626979410648346, 0.0857749953866005), target2); + target2 = MulAdd(tc1, MF4x4(-0.0715018808841705, 0.0310761369764805, -0.3861580789089203, 0.0770959705114365, 0.1908793896436691, 0.2067244797945023, 0.1176377162337303, 0.0705406218767166, -0.0944501385092735, 0.1110353469848633, -0.2772715091705322, -0.0079436022788286, 0.1045550853013992, 0.0076957782730460, 0.0220303647220135, 0.0434708297252655), target2); + target2 = MulAdd(tc2, MF4x4(-0.4399432241916656, -0.0885980203747749, 0.2042984664440155, 0.0499991811811924, 0.0443918742239475, -0.0322260186076164, 0.0960535407066345, 0.0173596814274788, 0.0851852819323540, -0.0549903102219105, -0.2807548046112061, -0.1112457811832428, 0.0906120762228966, 0.1066406965255737, -0.3857226073741913, 0.1345559209585190), target2); + target2 = MulAdd(mc1, MF4x4(0.2723454833030701, -0.1252564936876297, 0.3694194555282593, 0.0895726680755615, 0.2200681418180466, 0.3019879162311554, 0.4471587240695953, 0.2883224189281464, 0.0264542233198881, 0.3020884990692139, 0.2432236075401306, 0.5683830380439758, -0.0914180725812912, -0.1473430246114731, -0.5914288163185120, -0.1922498643398285), target2); + target2 = MulAdd(mc2, MF4x4(0.2161763310432434, -0.0415927544236183, -0.0378856658935547, -0.0317508913576603, -0.2287719398736954, -0.4885228574275970, -0.2818722724914551, -0.3797133862972260, 0.4456195533275604, 0.7929218411445618, -0.1307591795921326, 0.2016224861145020, 0.2801168859004974, -0.0006753758061677, 0.5686879754066467, 0.0415142513811588), target2); + target2 = MulAdd(bc1, MF4x4(0.0530648417770863, -0.2444190829992294, 0.0235249921679497, 0.0224611610174179, 0.0651976913213730, -0.0449720136821270, 0.1208736971020699, -0.0743656828999519, 0.1318923383951187, 0.1823218315839767, 0.5197241306304932, 0.1862808614969254, 0.2317387014627457, -0.2857755720615387, 0.1650039553642273, -0.1755792349576950), target2); + target2 = MulAdd(bc2, MF4x4(-0.0835669562220573, 0.0129750147461891, -0.4473843872547150, -0.5028023719787598, -0.0481940247118473, -0.0905050709843636, -0.6921447515487671, -0.2693449556827545, 0.2342379540205002, 0.0392520241439342, 0.4797120690345764, 0.1215118318796158, 0.1369755119085312, -0.1010836884379387, -0.0070533878169954, -0.2589581906795502), target2); + target2 = MulAdd(tr1, MF4x4(0.1088275387883186, 0.0839678123593330, -0.3048903048038483, -0.0084876483306289, 0.3669581115245819, 0.0472131110727787, -0.1243446245789528, -0.1012610718607903, 0.1622449755668640, -0.1317851245403290, -0.0711368247866631, -0.1593778431415558, -0.0104977218434215, -0.0608197152614594, 0.0286014154553413, 0.0388568006455898), target2); + target2 = MulAdd(tr2, MF4x4(0.0885753333568573, 0.1340429484844208, -0.0027331225574017, -0.0736069232225418, -0.1520483642816544, 0.1104429140686989, 0.1728315353393555, 0.1210049912333488, 0.0688045620918274, -0.1218316256999969, -0.0629790797829628, -0.1348981261253357, 0.0943875387310982, -0.0730865821242332, -0.2342475503683090, -0.0808216184377670), target2); + target2 = MulAdd(mr1, MF4x4(-0.1263358592987061, -0.7069915533065796, 0.1411920040845871, -0.2682386934757233, 0.2957956194877625, 0.1127238497138023, 0.3032427430152893, 0.2759581208229065, 0.8320354819297791, -0.0136295817792416, 0.0530097521841526, -0.0727380812168121, 0.0614950619637966, 0.0339637212455273, -0.0386842861771584, -0.0550391897559166), target2); + target2 = MulAdd(mr2, MF4x4(-0.0998953506350517, -0.2231116443872452, 0.0948988571763039, 0.1258799731731415, -0.6855500936508179, -0.4546283185482025, -0.3335786461830139, 0.0718025788664818, 0.6456025242805481, -0.2023779749870300, 0.1325027197599411, -0.1078727394342422, 0.3024467229843140, 0.1703380942344666, 0.2321108430624008, 0.2143797874450684), target2); + target2 = MulAdd(br1, MF4x4(0.0542521663010120, -0.2265717238187790, -0.0289179943501949, 0.0697252005338669, -0.1518151611089706, 0.0225123148411512, 0.0370684377849102, -0.1546901017427444, 0.0753403753042221, -0.0465561784803867, 0.1635994315147400, 0.1127668544650078, 0.0738654434680939, 0.1077028661966324, -0.1282461881637573, -0.0510208979249001), target2); + target2 = MulAdd(br2, MF4x4(-0.1740311384201050, 0.0542572811245918, 0.0551791004836559, 0.1728909015655518, -0.0078740902245045, 0.0999085083603859, -0.0136023676022887, 0.0501077920198441, 0.0529310964047909, -0.0859082415699959, -0.0285708475857973, -0.0186515673995018, -0.0793913751840591, 0.0688859447836876, -0.1684362143278122, 0.0473327860236168), target2); + target2 = max(target2, 0) + MF4(-0.1339675635099411, 0.3599768280982971, -0.1313954293727875, 0.8648772835731506) * min(target2, 0); tex1[gxy] = target1; tex2[gxy] = target2; @@ -452,79 +455,79 @@ void Pass5(uint2 blockStart, uint3 threadId) { // [tl, tc, tr] // [ml, mc, mr] // [bl, bc, br] - float4 tl1 = tex1.SampleLevel(sam, pos - inputPt, 0); - float4 ml1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); - float4 bl1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); - float4 tc1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); - float4 mc1 = tex1.SampleLevel(sam, pos, 0); - float4 bc1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); - float4 tr1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); - float4 mr1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); - float4 br1 = tex1.SampleLevel(sam, pos + inputPt, 0); - - float4 tl2 = tex2.SampleLevel(sam, pos - inputPt, 0); - float4 ml2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); - float4 bl2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); - float4 tc2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); - float4 mc2 = tex2.SampleLevel(sam, pos, 0); - float4 bc2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); - float4 tr2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); - float4 mr2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); - float4 br2 = tex2.SampleLevel(sam, pos + inputPt, 0); - - float4 c1 = { -0.0466146245598793,0.0514914207160473,-0.1174036413431168,0.1775186359882355 }; - c1 += mul(tl1, float4x4(0.0403013713657856, -0.0201274380087852, -0.0814156234264374, -0.0156530365347862, -0.1377502679824829, -0.0417905971407890, 0.0382207930088043, -0.1200272291898727, 0.0445257574319839, -0.0034040021710098, -0.1461200863122940, -0.0280020851641893, -0.0284179765731096, -0.0257030930370092, -0.0306571796536446, 0.0027001383714378)); - c1 += mul(tl2, float4x4(0.0474935993552208, -0.1903701126575470, -0.5459222793579102, 0.0117317456752062, 0.1298255324363708, -0.1852943897247314, 0.0912317335605621, 0.0597700774669647, 0.0291008763015270, -0.0133294332772493, 0.2249753177165985, -0.1039957106113434, -0.0360252261161804, 0.0713268965482712, -0.1684121936559677, 0.0038015090394765)); - c1 += mul(ml1, float4x4(0.1707874685525894, -0.0190297029912472, 0.0620337612926960, 0.1826018989086151, -0.0537132881581783, 0.0105489455163479, 0.0954312533140182, -0.0787296965718269, 0.1069839373230934, 0.1045138239860535, 0.1288910657167435, 0.0561389364302158, -0.0446831695735455, 0.0814650580286980, 0.0968160405755043, -0.0053927232511342)); - c1 += mul(ml2, float4x4(-0.4036456644535065, 0.5173328518867493, 0.2669098377227783, 0.1405677497386932, 0.7028214335441589, 0.4912839531898499, 0.6029286384582520, 0.5650771260261536, 0.0271350406110287, 0.0700132623314857, -0.0723611563444138, -0.1151952818036079, -0.1905510127544403, -0.1715065985918045, 0.2840125858783722, -0.0591109022498131)); - c1 += mul(bl1, float4x4(-0.0454136617481709, 0.0026271974202245, 0.0717174336314201, -0.0211183167994022, -0.0145919620990753, -0.0671255514025688, -0.0502023696899414, 0.0087185343727469, -0.0288616847246885, -0.0113478675484657, -0.2463355064392090, 0.0117223775014281, -0.0308947954326868, -0.0319634601473808, 0.0732206851243973, -0.0482302159070969)); - c1 += mul(bl2, float4x4(0.1823509633541107, -0.0050594508647919, -0.3849277794361115, 0.0906973257660866, -0.2269264608621597, -0.1660439521074295, -0.0572436749935150, -0.2200210094451904, -0.0326488390564919, -0.0223822314292192, 0.2504203319549561, -0.0020041887182742, 0.0842133983969688, 0.0320363529026508, -0.2134571075439453, 0.0419851355254650)); - c1 += mul(tc1, float4x4(-0.1438133865594864, 0.0348416790366173, 0.0934710800647736, 0.0492126196622849, -0.2420855760574341, 0.0501662716269493, 0.1352647244930267, -0.0790266394615173, 0.2533731162548065, 0.0078659672290087, -0.6405819058418274, 0.0266417451202869, 0.1385341882705688, 0.2193202823400497, -0.0242960918694735, 0.0509849824011326)); - c1 += mul(tc2, float4x4(-0.1021846532821655, 0.0409118719398975, -0.0411306917667389, -0.0381408408284187, 0.3855873942375183, 0.0598353408277035, -0.0579320192337036, 0.0582632422447205, 0.1651727408170700, 0.1139431521296501, 0.3897903859615326, -0.2790665924549103, -0.2033874839544296, 0.0795733034610748, 0.1354800611734390, 0.0271498821675777)); - c1 += mul(mc1, float4x4(0.5974506139755249, 0.2842084765434265, 0.2237064242362976, -0.1201776340603828, 0.4645690321922302, -0.4258180558681488, 0.2686293423175812, -0.5262981653213501, -0.6491079330444336, -0.1924646943807602, 0.5328685045242310, -0.2459655404090881, -0.0878667980432510, -0.5910828113555908, 0.5153566598892212, 0.1743167340755463)); - c1 += mul(mc2, float4x4(-0.0390859283506870, 0.1246269494295120, 0.0820790305733681, -0.0892064496874809, 0.2791964113712311, -0.0395625308156013, 0.8821132779121399, 0.0838626548647881, 0.0933722704648972, -0.3405517935752869, -0.6993819475173950, -0.2844510078430176, 0.0422397889196873, -0.0712213888764381, 0.4871867001056671, 0.1554806381464005)); - c1 += mul(bc1, float4x4(-0.2699566185474396, 0.0006965834181756, 0.2727784812450409, -0.0620054267346859, -0.3147658407688141, -0.0038526458665729, 0.3236006200313568, -0.0877058431506157, 0.2106281071901321, 0.0453009121119976, -0.5439859628677368, 0.0184146761894226, -0.0296773612499237, -0.0038715677801520, 0.1077574864029884, 0.0138774076476693)); - c1 += mul(bc2, float4x4(-0.0886235535144806, -0.0839715227484703, -0.1154380440711975, -0.0073053352534771, 0.0023921213578433, -0.0275820419192314, 0.0455723628401756, 0.0129091050475836, -0.0887900739908218, -0.0148513885214925, 0.0876928195357323, -0.0784894824028015, -0.0062762177549303, -0.0526834838092327, 0.0037576633039862, 0.0319633670151234)); - c1 += mul(tr1, float4x4(-0.0464110672473907, 0.0236346330493689, 0.0669583231210709, -0.0472575165331364, 0.0208626259118319, -0.0271043106913567, -0.0606599785387516, -0.0139442197978497, -0.0014815550530329, 0.0122182741761208, 0.0004507199628279, 0.0158279109746218, 0.0385761559009552, 0.0296174921095371, -0.0291853323578835, 0.0165338683873415)); - c1 += mul(tr2, float4x4(-0.0233153514564037, 0.0219305511564016, -0.0743728205561638, -0.0093567697331309, 0.0286262184381485, 0.0774424001574516, 0.1148394867777824, -0.0301702339202166, 0.0024869549088180, -0.0374004244804382, 0.4192572534084320, -0.0790813118219376, -0.0493366643786430, -0.0104152699932456, -0.0490021072328091, -0.0024766430724412)); - c1 += mul(mr1, float4x4(0.0144469039514661, -0.1078102141618729, 0.1059413179755211, -0.1089596152305603, 0.0470409952104092, 0.0500787831842899, -0.0514846295118332, 0.0626327991485596, -0.0882258489727974, 0.0321751609444618, 0.2398201376199722, -0.2443147897720337, 0.0259658545255661, -0.0396602302789688, 0.2174025326967239, 0.0715249925851822)); - c1 += mul(mr2, float4x4(-0.0154824554920197, 0.0291374288499355, 0.0039887567982078, -0.0431501194834709, 0.0593536123633385, -0.0845754146575928, 0.2000104188919067, 0.0186260938644409, 0.0547599084675312, 0.0526885949075222, -0.1233010515570641, -0.0432526804506779, 0.0357321202754974, 0.0021650493144989, 0.0814491733908653, 0.0971980616450310)); - c1 += mul(br1, float4x4(0.0287933535873890, 0.0494442507624626, 0.0210838094353676, -0.0483787320554256, -0.0759167149662971, -0.0253081526607275, 0.1219362914562225, -0.0418672412633896, -0.0189503412693739, 0.0143473483622074, -0.1790502667427063, -0.0662427768111229, 0.0324281528592110, 0.0148597611114383, 0.0266743116080761, 0.0377049185335636)); - c1 += mul(br2, float4x4(-0.0054656565189362, -0.0150196319445968, -0.0463149808347225, -0.0104772448539734, 0.0347928367555141, 0.0091987038031220, 0.0037015024572611, 0.0577751062810421, 0.0232732165604830, 0.0034828644711524, 0.0977631732821465, -0.0051266341470182, -0.0189268663525581, -0.0049852686934173, 0.0438088737428188, -0.0025434335693717)); - c1 = max(c1, 0) + float4(0.2930726408958435, -0.7832366824150085, 0.0082256151363254, 1.0583437681198120) * min(c1, 0); - - float4 c2 = { -0.0836324766278267,0.0299216359853745,0.0159619841724634,-0.1379968672990799 }; - c2 += mul(tl1, float4x4(-0.0837135538458824, 0.0536015741527081, -0.0739900916814804, 0.0180259179323912, 0.0306078922003508, 0.0638481751084328, -0.0674207285046577, -0.0308991391211748, -0.0134472101926804, -0.0455930270254612, 0.0009395828237757, -0.0776428431272507, -0.0028933393768966, -0.0612038075923920, -0.0162172410637140, -0.0327735245227814)); - c2 += mul(tl2, float4x4(-0.1481152623891830, 0.1160185635089874, -0.1463897079229355, 0.0038319902960211, -0.1815536171197891, 0.0099428771063685, -0.1647379845380783, 0.0335076004266739, -0.0302220620214939, -0.1180571690201759, -0.0232424903661013, -0.0164348836988211, -0.0015188503311947, 0.1787684559822083, 0.0413909815251827, 0.0699580833315849)); - c2 += mul(ml1, float4x4(0.0381213910877705, -1.8747351169586182, 0.0916024073958397, -0.1057635396718979, -0.1335459649562836, 0.0378836020827293, -0.0848037749528885, 0.1720509082078934, 0.1115766093134880, 0.0519676357507706, 0.1311796754598618, -0.2338305413722992, -0.0886595770716667, 0.1390771120786667, 0.0120587171986699, 0.0929709225893021)); - c2 += mul(ml2, float4x4(-0.0403309725224972, 0.1023108437657356, -0.0249778237193823, -0.2056589871644974, 0.0864044427871704, 0.7677633166313171, 0.6112527251243591, -0.0935023576021194, 0.0358289442956448, -0.0510838404297829, 0.0531301461160183, -0.1200713515281677, -0.0281702410429716, 0.3054289221763611, -0.1970508396625519, 0.1440129280090332)); - c2 += mul(bl1, float4x4(0.0137230604887009, 0.1833357512950897, -0.0056075016036630, -0.1050542071461678, 0.0367035493254662, 0.0896537080407143, -0.0109558179974556, -0.0221142154186964, -0.0462382063269615, -0.1151964291930199, -0.0042086942121387, 0.0297981910407543, 0.0043998458422720, 0.0687817037105560, -0.0601253211498260, 0.0031949516851455)); - c2 += mul(bl2, float4x4(0.0760864540934563, 0.1863034367561340, 0.0503818355500698, -0.0258647575974464, -0.0760487392544746, 0.2433954179286957, -0.1973436474800110, 0.0079258847981691, -0.0294476337730885, -0.0404389686882496, -0.0294238775968552, 0.0358795709908009, 0.0298653114587069, 0.0783578902482986, 0.0419599078595638, 0.0248970054090023)); - c2 += mul(tc1, float4x4(0.0907182395458221, 0.0576495565474033, 0.0530257523059845, 0.0549531430006027, 0.0915074944496155, -0.0465312339365482, -0.0230909585952759, -0.1178105399012566, -0.1995413154363632, -0.0433083362877369, -0.0418573357164860, 0.0866744294762611, -0.0793146342039108, 0.0074148247949779, 0.1731810569763184, 0.0715740397572517)); - c2 += mul(tc2, float4x4(0.0023458630312234, 0.0404974594712257, -0.1356777399778366, 0.0418198816478252, -0.0675975754857063, 0.0050714882090688, 0.1104314029216766, -0.1206769123673439, -0.1913090348243713, 0.0773992761969566, 0.0560133233666420, -0.2490582764148712, 0.0971352458000183, 0.0728188008069992, 0.0192280132323503, 0.0784228071570396)); - c2 += mul(mc1, float4x4(-0.0261659007519484, 0.1157309040427208, -0.3534074723720551, 0.5200188755989075, 0.1345363408327103, -0.1973183751106262, 0.1199645772576332, -1.1136766672134399, 0.1412540972232819, 0.1534357517957687, 0.2593606412410736, 0.3824510574340820, 0.5013928413391113, -0.1928857117891312, -0.2875523269176483, -0.0354673676192760)); - c2 += mul(mc2, float4x4(0.1194906458258629, 0.1256935596466064, 0.0932049900293350, 0.1464174836874008, 0.0670514181256294, -0.1400509625673294, 0.1003381684422493, -0.1169824004173279, 0.2452844530344009, -0.0348181650042534, -0.3607256710529327, -0.3338264226913452, -0.0456272326409817, -0.0939910858869553, -0.1304696053266525, 0.0402086712419987)); - c2 += mul(bc1, float4x4(-0.0869473740458488, -0.1211445480585098, 0.0065225088037550, 0.0068075512535870, -0.1304764598608017, -0.0536689385771751, -0.1490984708070755, -0.0136555638164282, 0.1024399474263191, 0.0315260104835033, 0.0369606800377369, -0.0167442485690117, 0.0501606240868568, -0.0476666353642941, 0.0291139576584101, -0.0997947081923485)); - c2 += mul(bc2, float4x4(-0.0373449698090553, -0.0382255539298058, -0.1446493864059448, 0.0054460307583213, 0.0428361445665359, -0.2411493360996246, 0.0281034875661135, -0.1959404051303864, -0.0147660700604320, -0.0115446811541915, -0.0435077212750912, -0.0445765219628811, 0.0025869212113321, -0.0454641655087471, -0.0552970357239246, 0.0636689588427544)); - c2 += mul(tr1, float4x4(0.0051982915028930, -0.0825250744819641, 0.0167135465890169, -0.1018612906336784, -0.0686964690685272, 0.0087886471301317, -0.0206265803426504, -0.0206072553992271, 0.0498707666993141, -0.0364030301570892, 0.0309015773236752, 0.0090340757742524, -0.0129618886858225, 0.0051486417651176, 0.0260841641575098, 0.0167939160019159)); - c2 += mul(tr2, float4x4(0.0010780893499032, -0.0352349840104580, -0.0192162413150072, -0.0381371527910233, 0.0282760411500931, -0.0559629201889038, 0.0619673281908035, -0.0514238551259041, 0.0122259482741356, 0.0608348101377487, -0.0754647627472878, -0.1400517821311951, -0.0136043848469853, 0.0550616234540939, -0.0093750739470124, 0.0393888689577579)); - c2 += mul(mr1, float4x4(-0.1028572022914886, -0.0257119275629520, -0.0871436968445778, -0.2802977561950684, 0.1279940754175186, 0.0694741085171700, 0.0366430617868900, 0.1782210469245911, -0.0931140556931496, -0.1249292492866516, -0.0775476619601250, -0.3643486201763153, -0.1307956129312515, -0.0392269045114517, -0.0212084632366896, 0.0024634231813252)); - c2 += mul(mr2, float4x4(-0.0036887160968035, -0.0489760562777519, -0.0022769547067583, -0.0936355590820312, -0.0981694832444191, -0.0402673967182636, -0.0764046013355255, 0.0067043504677713, 0.0426195561885834, -0.0056512621231377, 0.0814872384071350, -0.0765113532543182, 0.0027868365868926, 0.0623648613691330, 0.0598746836185455, 0.1226531565189362)); - c2 += mul(br1, float4x4(0.0350537523627281, 0.0229270569980145, 0.0465267412364483, -0.0452729463577271, 0.0256041418761015, 0.0215708781033754, -0.0193957649171352, -0.0173931997269392, 0.0231500957161188, -0.0499401167035103, 0.0173991154879332, -0.0804103761911392, -0.0232445765286684, -0.0107213268056512, 0.0450597628951073, 0.0691299363970757)); - c2 += mul(br2, float4x4(0.0023379696067423, -0.0239015202969313, -0.0137971211224794, -0.0529763884842396, 0.0394022278487682, 0.0184435173869133, 0.0855478867888451, 0.0777183994650841, -0.0052624838426709, -0.0031146518886089, 0.0387042500078678, 0.0015482418239117, -0.0088084554299712, 0.0233839545398951, -0.0151950558647513, 0.0070519605651498)); - c2 = max(c2, 0) + float4(0.7931001186370850, -0.0506631620228291, 0.3833878636360168, 0.3061273992061615) * min(c2, 0); - - float4 target1 = float4(0.0313877351582050, -0.0325053185224533, -0.0413495972752571, 0.0357267409563065); - target1 += mul(c1, float4x4(0.1461677402257919, -0.1208343803882599, 0.0125428512692451, -0.1319324076175690, -0.3387282788753510, -0.3393035829067230, -0.2043240815401077, 0.0048940703272820, -0.0343158058822155, 0.0995195582509041, -0.0571033284068108, -0.0356303341686726, 0.1097832918167114, 0.1462953090667725, 0.0575199872255325, -0.1682354062795639)); - target1 += mul(c2, float4x4(0.1110563054680824, 0.0068465564399958, -0.0098551185801625, -0.1380221396684647, -0.3268660008907318, -0.2625139057636261, 0.5479852557182312, -0.0188483651727438, -0.7557058334350586, -0.6716431379318237, 0.0680231377482414, 1.0786534547805786, -0.0519768036901951, -0.0483648441731930, 0.1052823588252068, 0.0570317767560482)); + MF4 tl1 = tex1.SampleLevel(sam, pos - inputPt, 0); + MF4 ml1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + MF4 bl1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + MF4 tc1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + MF4 mc1 = tex1.SampleLevel(sam, pos, 0); + MF4 bc1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + MF4 tr1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + MF4 mr1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + MF4 br1 = tex1.SampleLevel(sam, pos + inputPt, 0); + + MF4 tl2 = tex2.SampleLevel(sam, pos - inputPt, 0); + MF4 ml2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + MF4 bl2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + MF4 tc2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + MF4 mc2 = tex2.SampleLevel(sam, pos, 0); + MF4 bc2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + MF4 tr2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + MF4 mr2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + MF4 br2 = tex2.SampleLevel(sam, pos + inputPt, 0); + + MF4 c1 = { -0.0466146245598793,0.0514914207160473,-0.1174036413431168,0.1775186359882355 }; + c1 = MulAdd(tl1, MF4x4(0.0403013713657856, -0.0201274380087852, -0.0814156234264374, -0.0156530365347862, -0.1377502679824829, -0.0417905971407890, 0.0382207930088043, -0.1200272291898727, 0.0445257574319839, -0.0034040021710098, -0.1461200863122940, -0.0280020851641893, -0.0284179765731096, -0.0257030930370092, -0.0306571796536446, 0.0027001383714378), c1); + c1 = MulAdd(tl2, MF4x4(0.0474935993552208, -0.1903701126575470, -0.5459222793579102, 0.0117317456752062, 0.1298255324363708, -0.1852943897247314, 0.0912317335605621, 0.0597700774669647, 0.0291008763015270, -0.0133294332772493, 0.2249753177165985, -0.1039957106113434, -0.0360252261161804, 0.0713268965482712, -0.1684121936559677, 0.0038015090394765), c1); + c1 = MulAdd(ml1, MF4x4(0.1707874685525894, -0.0190297029912472, 0.0620337612926960, 0.1826018989086151, -0.0537132881581783, 0.0105489455163479, 0.0954312533140182, -0.0787296965718269, 0.1069839373230934, 0.1045138239860535, 0.1288910657167435, 0.0561389364302158, -0.0446831695735455, 0.0814650580286980, 0.0968160405755043, -0.0053927232511342), c1); + c1 = MulAdd(ml2, MF4x4(-0.4036456644535065, 0.5173328518867493, 0.2669098377227783, 0.1405677497386932, 0.7028214335441589, 0.4912839531898499, 0.6029286384582520, 0.5650771260261536, 0.0271350406110287, 0.0700132623314857, -0.0723611563444138, -0.1151952818036079, -0.1905510127544403, -0.1715065985918045, 0.2840125858783722, -0.0591109022498131), c1); + c1 = MulAdd(bl1, MF4x4(-0.0454136617481709, 0.0026271974202245, 0.0717174336314201, -0.0211183167994022, -0.0145919620990753, -0.0671255514025688, -0.0502023696899414, 0.0087185343727469, -0.0288616847246885, -0.0113478675484657, -0.2463355064392090, 0.0117223775014281, -0.0308947954326868, -0.0319634601473808, 0.0732206851243973, -0.0482302159070969), c1); + c1 = MulAdd(bl2, MF4x4(0.1823509633541107, -0.0050594508647919, -0.3849277794361115, 0.0906973257660866, -0.2269264608621597, -0.1660439521074295, -0.0572436749935150, -0.2200210094451904, -0.0326488390564919, -0.0223822314292192, 0.2504203319549561, -0.0020041887182742, 0.0842133983969688, 0.0320363529026508, -0.2134571075439453, 0.0419851355254650), c1); + c1 = MulAdd(tc1, MF4x4(-0.1438133865594864, 0.0348416790366173, 0.0934710800647736, 0.0492126196622849, -0.2420855760574341, 0.0501662716269493, 0.1352647244930267, -0.0790266394615173, 0.2533731162548065, 0.0078659672290087, -0.6405819058418274, 0.0266417451202869, 0.1385341882705688, 0.2193202823400497, -0.0242960918694735, 0.0509849824011326), c1); + c1 = MulAdd(tc2, MF4x4(-0.1021846532821655, 0.0409118719398975, -0.0411306917667389, -0.0381408408284187, 0.3855873942375183, 0.0598353408277035, -0.0579320192337036, 0.0582632422447205, 0.1651727408170700, 0.1139431521296501, 0.3897903859615326, -0.2790665924549103, -0.2033874839544296, 0.0795733034610748, 0.1354800611734390, 0.0271498821675777), c1); + c1 = MulAdd(mc1, MF4x4(0.5974506139755249, 0.2842084765434265, 0.2237064242362976, -0.1201776340603828, 0.4645690321922302, -0.4258180558681488, 0.2686293423175812, -0.5262981653213501, -0.6491079330444336, -0.1924646943807602, 0.5328685045242310, -0.2459655404090881, -0.0878667980432510, -0.5910828113555908, 0.5153566598892212, 0.1743167340755463), c1); + c1 = MulAdd(mc2, MF4x4(-0.0390859283506870, 0.1246269494295120, 0.0820790305733681, -0.0892064496874809, 0.2791964113712311, -0.0395625308156013, 0.8821132779121399, 0.0838626548647881, 0.0933722704648972, -0.3405517935752869, -0.6993819475173950, -0.2844510078430176, 0.0422397889196873, -0.0712213888764381, 0.4871867001056671, 0.1554806381464005), c1); + c1 = MulAdd(bc1, MF4x4(-0.2699566185474396, 0.0006965834181756, 0.2727784812450409, -0.0620054267346859, -0.3147658407688141, -0.0038526458665729, 0.3236006200313568, -0.0877058431506157, 0.2106281071901321, 0.0453009121119976, -0.5439859628677368, 0.0184146761894226, -0.0296773612499237, -0.0038715677801520, 0.1077574864029884, 0.0138774076476693), c1); + c1 = MulAdd(bc2, MF4x4(-0.0886235535144806, -0.0839715227484703, -0.1154380440711975, -0.0073053352534771, 0.0023921213578433, -0.0275820419192314, 0.0455723628401756, 0.0129091050475836, -0.0887900739908218, -0.0148513885214925, 0.0876928195357323, -0.0784894824028015, -0.0062762177549303, -0.0526834838092327, 0.0037576633039862, 0.0319633670151234), c1); + c1 = MulAdd(tr1, MF4x4(-0.0464110672473907, 0.0236346330493689, 0.0669583231210709, -0.0472575165331364, 0.0208626259118319, -0.0271043106913567, -0.0606599785387516, -0.0139442197978497, -0.0014815550530329, 0.0122182741761208, 0.0004507199628279, 0.0158279109746218, 0.0385761559009552, 0.0296174921095371, -0.0291853323578835, 0.0165338683873415), c1); + c1 = MulAdd(tr2, MF4x4(-0.0233153514564037, 0.0219305511564016, -0.0743728205561638, -0.0093567697331309, 0.0286262184381485, 0.0774424001574516, 0.1148394867777824, -0.0301702339202166, 0.0024869549088180, -0.0374004244804382, 0.4192572534084320, -0.0790813118219376, -0.0493366643786430, -0.0104152699932456, -0.0490021072328091, -0.0024766430724412), c1); + c1 = MulAdd(mr1, MF4x4(0.0144469039514661, -0.1078102141618729, 0.1059413179755211, -0.1089596152305603, 0.0470409952104092, 0.0500787831842899, -0.0514846295118332, 0.0626327991485596, -0.0882258489727974, 0.0321751609444618, 0.2398201376199722, -0.2443147897720337, 0.0259658545255661, -0.0396602302789688, 0.2174025326967239, 0.0715249925851822), c1); + c1 = MulAdd(mr2, MF4x4(-0.0154824554920197, 0.0291374288499355, 0.0039887567982078, -0.0431501194834709, 0.0593536123633385, -0.0845754146575928, 0.2000104188919067, 0.0186260938644409, 0.0547599084675312, 0.0526885949075222, -0.1233010515570641, -0.0432526804506779, 0.0357321202754974, 0.0021650493144989, 0.0814491733908653, 0.0971980616450310), c1); + c1 = MulAdd(br1, MF4x4(0.0287933535873890, 0.0494442507624626, 0.0210838094353676, -0.0483787320554256, -0.0759167149662971, -0.0253081526607275, 0.1219362914562225, -0.0418672412633896, -0.0189503412693739, 0.0143473483622074, -0.1790502667427063, -0.0662427768111229, 0.0324281528592110, 0.0148597611114383, 0.0266743116080761, 0.0377049185335636), c1); + c1 = MulAdd(br2, MF4x4(-0.0054656565189362, -0.0150196319445968, -0.0463149808347225, -0.0104772448539734, 0.0347928367555141, 0.0091987038031220, 0.0037015024572611, 0.0577751062810421, 0.0232732165604830, 0.0034828644711524, 0.0977631732821465, -0.0051266341470182, -0.0189268663525581, -0.0049852686934173, 0.0438088737428188, -0.0025434335693717), c1); + c1 = max(c1, 0) + MF4(0.2930726408958435, -0.7832366824150085, 0.0082256151363254, 1.0583437681198120) * min(c1, 0); + + MF4 c2 = { -0.0836324766278267,0.0299216359853745,0.0159619841724634,-0.1379968672990799 }; + c2 = MulAdd(tl1, MF4x4(-0.0837135538458824, 0.0536015741527081, -0.0739900916814804, 0.0180259179323912, 0.0306078922003508, 0.0638481751084328, -0.0674207285046577, -0.0308991391211748, -0.0134472101926804, -0.0455930270254612, 0.0009395828237757, -0.0776428431272507, -0.0028933393768966, -0.0612038075923920, -0.0162172410637140, -0.0327735245227814), c2); + c2 = MulAdd(tl2, MF4x4(-0.1481152623891830, 0.1160185635089874, -0.1463897079229355, 0.0038319902960211, -0.1815536171197891, 0.0099428771063685, -0.1647379845380783, 0.0335076004266739, -0.0302220620214939, -0.1180571690201759, -0.0232424903661013, -0.0164348836988211, -0.0015188503311947, 0.1787684559822083, 0.0413909815251827, 0.0699580833315849), c2); + c2 = MulAdd(ml1, MF4x4(0.0381213910877705, -1.8747351169586182, 0.0916024073958397, -0.1057635396718979, -0.1335459649562836, 0.0378836020827293, -0.0848037749528885, 0.1720509082078934, 0.1115766093134880, 0.0519676357507706, 0.1311796754598618, -0.2338305413722992, -0.0886595770716667, 0.1390771120786667, 0.0120587171986699, 0.0929709225893021), c2); + c2 = MulAdd(ml2, MF4x4(-0.0403309725224972, 0.1023108437657356, -0.0249778237193823, -0.2056589871644974, 0.0864044427871704, 0.7677633166313171, 0.6112527251243591, -0.0935023576021194, 0.0358289442956448, -0.0510838404297829, 0.0531301461160183, -0.1200713515281677, -0.0281702410429716, 0.3054289221763611, -0.1970508396625519, 0.1440129280090332), c2); + c2 = MulAdd(bl1, MF4x4(0.0137230604887009, 0.1833357512950897, -0.0056075016036630, -0.1050542071461678, 0.0367035493254662, 0.0896537080407143, -0.0109558179974556, -0.0221142154186964, -0.0462382063269615, -0.1151964291930199, -0.0042086942121387, 0.0297981910407543, 0.0043998458422720, 0.0687817037105560, -0.0601253211498260, 0.0031949516851455), c2); + c2 = MulAdd(bl2, MF4x4(0.0760864540934563, 0.1863034367561340, 0.0503818355500698, -0.0258647575974464, -0.0760487392544746, 0.2433954179286957, -0.1973436474800110, 0.0079258847981691, -0.0294476337730885, -0.0404389686882496, -0.0294238775968552, 0.0358795709908009, 0.0298653114587069, 0.0783578902482986, 0.0419599078595638, 0.0248970054090023), c2); + c2 = MulAdd(tc1, MF4x4(0.0907182395458221, 0.0576495565474033, 0.0530257523059845, 0.0549531430006027, 0.0915074944496155, -0.0465312339365482, -0.0230909585952759, -0.1178105399012566, -0.1995413154363632, -0.0433083362877369, -0.0418573357164860, 0.0866744294762611, -0.0793146342039108, 0.0074148247949779, 0.1731810569763184, 0.0715740397572517), c2); + c2 = MulAdd(tc2, MF4x4(0.0023458630312234, 0.0404974594712257, -0.1356777399778366, 0.0418198816478252, -0.0675975754857063, 0.0050714882090688, 0.1104314029216766, -0.1206769123673439, -0.1913090348243713, 0.0773992761969566, 0.0560133233666420, -0.2490582764148712, 0.0971352458000183, 0.0728188008069992, 0.0192280132323503, 0.0784228071570396), c2); + c2 = MulAdd(mc1, MF4x4(-0.0261659007519484, 0.1157309040427208, -0.3534074723720551, 0.5200188755989075, 0.1345363408327103, -0.1973183751106262, 0.1199645772576332, -1.1136766672134399, 0.1412540972232819, 0.1534357517957687, 0.2593606412410736, 0.3824510574340820, 0.5013928413391113, -0.1928857117891312, -0.2875523269176483, -0.0354673676192760), c2); + c2 = MulAdd(mc2, MF4x4(0.1194906458258629, 0.1256935596466064, 0.0932049900293350, 0.1464174836874008, 0.0670514181256294, -0.1400509625673294, 0.1003381684422493, -0.1169824004173279, 0.2452844530344009, -0.0348181650042534, -0.3607256710529327, -0.3338264226913452, -0.0456272326409817, -0.0939910858869553, -0.1304696053266525, 0.0402086712419987), c2); + c2 = MulAdd(bc1, MF4x4(-0.0869473740458488, -0.1211445480585098, 0.0065225088037550, 0.0068075512535870, -0.1304764598608017, -0.0536689385771751, -0.1490984708070755, -0.0136555638164282, 0.1024399474263191, 0.0315260104835033, 0.0369606800377369, -0.0167442485690117, 0.0501606240868568, -0.0476666353642941, 0.0291139576584101, -0.0997947081923485), c2); + c2 = MulAdd(bc2, MF4x4(-0.0373449698090553, -0.0382255539298058, -0.1446493864059448, 0.0054460307583213, 0.0428361445665359, -0.2411493360996246, 0.0281034875661135, -0.1959404051303864, -0.0147660700604320, -0.0115446811541915, -0.0435077212750912, -0.0445765219628811, 0.0025869212113321, -0.0454641655087471, -0.0552970357239246, 0.0636689588427544), c2); + c2 = MulAdd(tr1, MF4x4(0.0051982915028930, -0.0825250744819641, 0.0167135465890169, -0.1018612906336784, -0.0686964690685272, 0.0087886471301317, -0.0206265803426504, -0.0206072553992271, 0.0498707666993141, -0.0364030301570892, 0.0309015773236752, 0.0090340757742524, -0.0129618886858225, 0.0051486417651176, 0.0260841641575098, 0.0167939160019159), c2); + c2 = MulAdd(tr2, MF4x4(0.0010780893499032, -0.0352349840104580, -0.0192162413150072, -0.0381371527910233, 0.0282760411500931, -0.0559629201889038, 0.0619673281908035, -0.0514238551259041, 0.0122259482741356, 0.0608348101377487, -0.0754647627472878, -0.1400517821311951, -0.0136043848469853, 0.0550616234540939, -0.0093750739470124, 0.0393888689577579), c2); + c2 = MulAdd(mr1, MF4x4(-0.1028572022914886, -0.0257119275629520, -0.0871436968445778, -0.2802977561950684, 0.1279940754175186, 0.0694741085171700, 0.0366430617868900, 0.1782210469245911, -0.0931140556931496, -0.1249292492866516, -0.0775476619601250, -0.3643486201763153, -0.1307956129312515, -0.0392269045114517, -0.0212084632366896, 0.0024634231813252), c2); + c2 = MulAdd(mr2, MF4x4(-0.0036887160968035, -0.0489760562777519, -0.0022769547067583, -0.0936355590820312, -0.0981694832444191, -0.0402673967182636, -0.0764046013355255, 0.0067043504677713, 0.0426195561885834, -0.0056512621231377, 0.0814872384071350, -0.0765113532543182, 0.0027868365868926, 0.0623648613691330, 0.0598746836185455, 0.1226531565189362), c2); + c2 = MulAdd(br1, MF4x4(0.0350537523627281, 0.0229270569980145, 0.0465267412364483, -0.0452729463577271, 0.0256041418761015, 0.0215708781033754, -0.0193957649171352, -0.0173931997269392, 0.0231500957161188, -0.0499401167035103, 0.0173991154879332, -0.0804103761911392, -0.0232445765286684, -0.0107213268056512, 0.0450597628951073, 0.0691299363970757), c2); + c2 = MulAdd(br2, MF4x4(0.0023379696067423, -0.0239015202969313, -0.0137971211224794, -0.0529763884842396, 0.0394022278487682, 0.0184435173869133, 0.0855478867888451, 0.0777183994650841, -0.0052624838426709, -0.0031146518886089, 0.0387042500078678, 0.0015482418239117, -0.0088084554299712, 0.0233839545398951, -0.0151950558647513, 0.0070519605651498), c2); + c2 = max(c2, 0) + MF4(0.7931001186370850, -0.0506631620228291, 0.3833878636360168, 0.3061273992061615) * min(c2, 0); + + MF4 target1 = MF4(0.0313877351582050, -0.0325053185224533, -0.0413495972752571, 0.0357267409563065); + target1 = MulAdd(c1, MF4x4(0.1461677402257919, -0.1208343803882599, 0.0125428512692451, -0.1319324076175690, -0.3387282788753510, -0.3393035829067230, -0.2043240815401077, 0.0048940703272820, -0.0343158058822155, 0.0995195582509041, -0.0571033284068108, -0.0356303341686726, 0.1097832918167114, 0.1462953090667725, 0.0575199872255325, -0.1682354062795639), target1); + target1 = MulAdd(c2, MF4x4(0.1110563054680824, 0.0068465564399958, -0.0098551185801625, -0.1380221396684647, -0.3268660008907318, -0.2625139057636261, 0.5479852557182312, -0.0188483651727438, -0.7557058334350586, -0.6716431379318237, 0.0680231377482414, 1.0786534547805786, -0.0519768036901951, -0.0483648441731930, 0.1052823588252068, 0.0570317767560482), target1); target1 += featureMap1.SampleLevel(sam, pos, 0); - target1 = max(target1, 0) + float4(0.9962985515594482, 0.9851159453392029, 0.2272046357393265, -0.1116774082183838) * min(target1, 0); + target1 = max(target1, 0) + MF4(0.9962985515594482, 0.9851159453392029, 0.2272046357393265, -0.1116774082183838) * min(target1, 0); - float4 target2 = float4(-0.0820835754275322, -0.0049459170550108, -0.1635029017925262, 0.0367167443037033); - target2 += mul(c1, float4x4(-0.1730685681104660, -0.1758024245500565, -0.0673282966017723, -0.7883995175361633, -0.0092403469607234, 0.1237237676978111, -0.0802010595798492, 0.1532886922359467, -0.0256615914404392, 0.0783618539571762, -0.0216845069080591, 0.4360575079917908, 0.2488089799880981, -0.0421040952205658, 0.4070311486721039, 0.1112201139330864)); - target2 += mul(c2, float4x4(-0.1360913068056107, -0.0307455379515886, -0.1545475125312805, -0.0465389303863049, -0.1308580189943314, 0.2625028789043427, -0.2567890584468842, 0.3363034725189209, 0.1148972064256668, 0.3114618360996246, -0.1600875705480576, 0.6157666444778442, 0.0422471873462200, 0.0509155690670013, -1.1255714893341064, 0.0219085998833179)); + MF4 target2 = MF4(-0.0820835754275322, -0.0049459170550108, -0.1635029017925262, 0.0367167443037033); + target2 = MulAdd(c1, MF4x4(-0.1730685681104660, -0.1758024245500565, -0.0673282966017723, -0.7883995175361633, -0.0092403469607234, 0.1237237676978111, -0.0802010595798492, 0.1532886922359467, -0.0256615914404392, 0.0783618539571762, -0.0216845069080591, 0.4360575079917908, 0.2488089799880981, -0.0421040952205658, 0.4070311486721039, 0.1112201139330864), target2); + target2 = MulAdd(c2, MF4x4(-0.1360913068056107, -0.0307455379515886, -0.1545475125312805, -0.0465389303863049, -0.1308580189943314, 0.2625028789043427, -0.2567890584468842, 0.3363034725189209, 0.1148972064256668, 0.3114618360996246, -0.1600875705480576, 0.6157666444778442, 0.0422471873462200, 0.0509155690670013, -1.1255714893341064, 0.0219085998833179), target2); target2 += featureMap2.SampleLevel(sam, pos, 0); - target2 = max(target2, 0) + float4(-0.3776825070381165, 1.2568452358245850, 0.3147132694721222, 1.0953333377838135) * min(target2, 0); + target2 = max(target2, 0) + MF4(-0.3776825070381165, 1.2568452358245850, 0.3147132694721222, 1.0953333377838135) * min(target2, 0); tex3[gxy] = target1; tex4[gxy] = target2; @@ -538,12 +541,12 @@ void Pass5(uint2 blockStart, uint3 threadId) { //!BLOCK_SIZE 16 //!NUM_THREADS 64 -const static float2x3 rgb2uv = { +const static MF2x3 rgb2uv = { -0.169, -0.331, 0.5, 0.5, -0.419, -0.081 }; -const static float3x3 yuv2rgb = { +const static MF3x3 yuv2rgb = { 1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099 @@ -565,45 +568,45 @@ void Pass6(uint2 blockStart, uint3 threadId) { // [tl, tc, tr] // [ml, mc, mr] // [bl, bc, br] - float4 tl1 = tex3.SampleLevel(sam, pos - inputPt, 0); - float4 ml1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); - float4 bl1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); - float4 tc1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); - float4 mc1 = tex3.SampleLevel(sam, pos, 0); - float4 bc1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); - float4 tr1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); - float4 mr1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); - float4 br1 = tex3.SampleLevel(sam, pos + inputPt, 0); - - float4 tl2 = tex4.SampleLevel(sam, pos - inputPt, 0); - float4 ml2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); - float4 bl2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); - float4 tc2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); - float4 mc2 = tex4.SampleLevel(sam, pos, 0); - float4 bc2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); - float4 tr2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); - float4 mr2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); - float4 br2 = tex4.SampleLevel(sam, pos + inputPt, 0); - - float4 result = { 0.2160381823778152,0.2298326790332794,0.2062894254922867,0.2233859002590179 }; - result += mul(tl1, float4x4(-0.0031023439951241, 0.0059619112871587, 0.0058020050637424, 0.0062482208013535, 0.0052765505388379, -0.0022552218288183, -0.0065842187032104, -0.0008604917675257, 0.0003460258303676, -0.0022195784840733, -0.0074996319599450, -0.0015739878872409, 0.0056171459145844, 0.0002592361997813, 0.0019835520070046, 0.0018105609342456)); - result += mul(tl2, float4x4(0.0275507327169180, 0.0051669259555638, -0.0139658711850643, 0.0030883529689163, 0.0089544747024775, 0.0085759535431862, -0.0002981633588206, -0.0054096584208310, -0.0125233745202422, 0.0065056309103966, 0.0073427790775895, 0.0003864165919367, -0.0041021117940545, 0.0030372787732631, 0.0006185144884512, 0.0062267151661217)); - result += mul(ml1, float4x4(0.0105074141174555, -0.0004502697847784, -0.0009122507181019, 0.0026426143012941, 0.0040101194754243, -0.0023395312018692, 0.0017362632788718, 0.0091658774763346, 0.0205013062804937, -0.0020239499863237, -0.0114130824804306, -0.0075353132560849, 0.0225470513105392, -0.0366680622100830, -0.0000535918843525, 0.0050146644935012)); - result += mul(ml2, float4x4(0.0625989288091660, 0.1151928976178169, -0.0027104590553790, -0.0123298475518823, -0.0099824573844671, 0.0108926137909293, 0.0334197729825974, 0.0193741749972105, 0.0490560680627823, 0.0152345551177859, 0.0405332818627357, 0.0399475656449795, 0.0105373272672296, -0.0020778691396117, -0.0097913276404142, 0.0004653275245801)); - result += mul(bl1, float4x4(-0.0080901645123959, -0.0073888520710170, -0.0015905323671177, 0.0006022278103046, -0.0082205599173903, -0.0010853469138965, 0.0055166282691061, 0.0084898881614208, 0.0071090045385063, 0.0003273489419371, -0.0063129402697086, -0.0121429329738021, -0.0069875395856798, -0.0045646210201085, 0.0015905641485006, 0.0037552448920906)); - result += mul(bl2, float4x4(-0.0046724923886359, 0.0016644159331918, 0.0062663913704455, 0.0014710600953549, -0.0338839665055275, -0.0083738788962364, 0.0060393707826734, 0.0241303537040949, -0.0034021178726107, 0.0056042689830065, -0.0083823651075363, 0.0026392592117190, 0.0026657863054425, 0.0020144139416516, -0.0042312326841056, -0.0035609540063888)); - result += mul(tc1, float4x4(0.0009882192825899, 0.0002152582601411, -0.0123004913330078, -0.0005484037101269, -0.0111524788662791, 0.0119459852576256, -0.0154546750709414, 0.0188625976443291, 0.0204733721911907, -0.0079483250156045, -0.0007576426141895, -0.0042714662849903, 0.0307108033448458, -0.0069440440274775, 0.0000003838358680, 0.0070015545934439)); - result += mul(tc2, float4x4(-0.0063984976150095, -0.0027606852818280, -0.0355033427476883, 0.0163108259439468, -0.0317453853785992, 0.0353556163609028, -0.0016268522012979, 0.0312290452420712, 0.0149499354884028, -0.0139254443347454, 0.0109228380024433, 0.0234404578804970, 0.0088780215010047, 0.0083913588896394, 0.0070422240532935, 0.0086190626025200)); - result += mul(mc1, float4x4(0.4668360948562622, 0.5502970218658447, 0.5616708993911743, 0.4827409684658051, -0.4825374484062195, -0.4473012387752533, -0.3305214643478394, -0.5241096019744873, -0.1466400325298309, 0.0804405659437180, 0.0349484048783779, 0.0052239256910980, -0.1175492331385612, 0.0954673886299133, 0.1326161473989487, -0.1377900093793869)); - result += mul(mc2, float4x4(0.0407393611967564, 0.0987918972969055, -0.0807525441050529, -0.1235820129513741, -0.0658119991421700, 0.0621846020221710, -0.0816140249371529, 0.0406704805791378, 0.3287288248538971, 0.3512411415576935, -0.3979234397411346, -0.3275865316390991, 0.2961717247962952, 0.0169802401214838, 0.2695185840129852, 0.0150923738256097)); - result += mul(bc1, float4x4(-0.0050105354748666, -0.0008035619393922, -0.0079824347048998, 0.0376648232340813, -0.0048509594053030, -0.0067090289667249, -0.0259696990251541, 0.0086074192076921, 0.0126011213287711, -0.0174850821495056, 0.0156697910279036, 0.0028463243506849, 0.0099161649122834, -0.0114374589174986, -0.0169337950646877, 0.0257211104035378)); - result += mul(bc2, float4x4(0.0043578865006566, -0.0106186466291547, 0.0165964700281620, 0.0014091627672315, 0.0037815889809281, 0.0364324115216732, -0.0395258180797100, 0.0625703483819962, -0.0144113656133413, 0.0321297869086266, 0.0208382532000542, -0.0383783876895905, -0.0258011315017939, -0.0143258580937982, -0.0092717679217458, -0.0178859047591686)); - result += mul(tr1, float4x4(-0.0050332350656390, -0.0001488960406277, -0.0115145305171609, 0.0045622875913978, -0.0021374952048063, 0.0015796425286680, -0.0077072884887457, 0.0015828146133572, 0.0171351470053196, 0.0024297721683979, -0.0374940223991871, 0.0058337682858109, -0.0027352231554687, 0.0007051698048599, -0.0002820930676535, -0.0036236173473299)); - result += mul(tr2, float4x4(0.0016052227001637, 0.0049667325802147, -0.0081164520233870, -0.0035548578016460, -0.0131295239552855, 0.0158293209969997, -0.0340857952833176, 0.0222462061792612, -0.0019779701251537, -0.0012918257853016, 0.0048964750021696, 0.0049310824833810, 0.0011332486756146, -0.0004788591759279, -0.0017202866729349, 0.0012215448077768)); - result += mul(mr1, float4x4(-0.0057867411524057, -0.0133802210912108, 0.0158019792288542, 0.0344623439013958, -0.0040443707257509, -0.0037790425121784, 0.0045541841536760, 0.0198037009686232, 0.1110823750495911, 0.0822209641337395, -0.0382958762347698, -0.1459431499242783, 0.0010219293180853, -0.0161874033510685, -0.0285891294479370, 0.0300961043685675)); - result += mul(mr2, float4x4(-0.0089390166103840, -0.0105021111667156, -0.0130960196256638, -0.0257005076855421, 0.0099271116778255, -0.0267942640930414, 0.0677764937281609, 0.0239320658147335, -0.0469516664743423, -0.0230019800364971, -0.0446235798299313, -0.0338921397924423, -0.0061601125635207, 0.0139691382646561, -0.0029954034835100, -0.0108477231115103)); - result += mul(br1, float4x4(-0.0002513871586416, 0.0017334159929305, -0.0072467559948564, -0.0149013847112656, -0.0091025009751320, -0.0054195052944124, -0.0053499941714108, 0.0118143679574132, -0.0134334927424788, 0.0106377983465791, -0.0072384304367006, 0.0483081750571728, -0.0011459409724921, -0.0010465533705428, 0.0040362793952227, -0.0004637696838472)); - result += mul(br2, float4x4(0.0037248437292874, 0.0041816406883299, -0.0139264371246099, 0.0035959482192993, -0.0390684641897678, -0.0257617402821779, -0.0298785082995892, -0.0066674682311714, -0.0151000469923019, -0.0176323894411325, 0.0045121158473194, 0.0077007445506752, 0.0051746177487075, 0.0075236861594021, 0.0008582049049437, 0.0010294843232259)); + MF4 tl1 = tex3.SampleLevel(sam, pos - inputPt, 0); + MF4 ml1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + MF4 bl1 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + MF4 tc1 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + MF4 mc1 = tex3.SampleLevel(sam, pos, 0); + MF4 bc1 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + MF4 tr1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + MF4 mr1 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + MF4 br1 = tex3.SampleLevel(sam, pos + inputPt, 0); + + MF4 tl2 = tex4.SampleLevel(sam, pos - inputPt, 0); + MF4 ml2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + MF4 bl2 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + MF4 tc2 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + MF4 mc2 = tex4.SampleLevel(sam, pos, 0); + MF4 bc2 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + MF4 tr2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + MF4 mr2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + MF4 br2 = tex4.SampleLevel(sam, pos + inputPt, 0); + + MF4 result = { 0.2160381823778152,0.2298326790332794,0.2062894254922867,0.2233859002590179 }; + result = MulAdd(tl1, MF4x4(-0.0031023439951241, 0.0059619112871587, 0.0058020050637424, 0.0062482208013535, 0.0052765505388379, -0.0022552218288183, -0.0065842187032104, -0.0008604917675257, 0.0003460258303676, -0.0022195784840733, -0.0074996319599450, -0.0015739878872409, 0.0056171459145844, 0.0002592361997813, 0.0019835520070046, 0.0018105609342456), result); + result = MulAdd(tl2, MF4x4(0.0275507327169180, 0.0051669259555638, -0.0139658711850643, 0.0030883529689163, 0.0089544747024775, 0.0085759535431862, -0.0002981633588206, -0.0054096584208310, -0.0125233745202422, 0.0065056309103966, 0.0073427790775895, 0.0003864165919367, -0.0041021117940545, 0.0030372787732631, 0.0006185144884512, 0.0062267151661217), result); + result = MulAdd(ml1, MF4x4(0.0105074141174555, -0.0004502697847784, -0.0009122507181019, 0.0026426143012941, 0.0040101194754243, -0.0023395312018692, 0.0017362632788718, 0.0091658774763346, 0.0205013062804937, -0.0020239499863237, -0.0114130824804306, -0.0075353132560849, 0.0225470513105392, -0.0366680622100830, -0.0000535918843525, 0.0050146644935012), result); + result = MulAdd(ml2, MF4x4(0.0625989288091660, 0.1151928976178169, -0.0027104590553790, -0.0123298475518823, -0.0099824573844671, 0.0108926137909293, 0.0334197729825974, 0.0193741749972105, 0.0490560680627823, 0.0152345551177859, 0.0405332818627357, 0.0399475656449795, 0.0105373272672296, -0.0020778691396117, -0.0097913276404142, 0.0004653275245801), result); + result = MulAdd(bl1, MF4x4(-0.0080901645123959, -0.0073888520710170, -0.0015905323671177, 0.0006022278103046, -0.0082205599173903, -0.0010853469138965, 0.0055166282691061, 0.0084898881614208, 0.0071090045385063, 0.0003273489419371, -0.0063129402697086, -0.0121429329738021, -0.0069875395856798, -0.0045646210201085, 0.0015905641485006, 0.0037552448920906), result); + result = MulAdd(bl2, MF4x4(-0.0046724923886359, 0.0016644159331918, 0.0062663913704455, 0.0014710600953549, -0.0338839665055275, -0.0083738788962364, 0.0060393707826734, 0.0241303537040949, -0.0034021178726107, 0.0056042689830065, -0.0083823651075363, 0.0026392592117190, 0.0026657863054425, 0.0020144139416516, -0.0042312326841056, -0.0035609540063888), result); + result = MulAdd(tc1, MF4x4(0.0009882192825899, 0.0002152582601411, -0.0123004913330078, -0.0005484037101269, -0.0111524788662791, 0.0119459852576256, -0.0154546750709414, 0.0188625976443291, 0.0204733721911907, -0.0079483250156045, -0.0007576426141895, -0.0042714662849903, 0.0307108033448458, -0.0069440440274775, 0.0000003838358680, 0.0070015545934439), result); + result = MulAdd(tc2, MF4x4(-0.0063984976150095, -0.0027606852818280, -0.0355033427476883, 0.0163108259439468, -0.0317453853785992, 0.0353556163609028, -0.0016268522012979, 0.0312290452420712, 0.0149499354884028, -0.0139254443347454, 0.0109228380024433, 0.0234404578804970, 0.0088780215010047, 0.0083913588896394, 0.0070422240532935, 0.0086190626025200), result); + result = MulAdd(mc1, MF4x4(0.4668360948562622, 0.5502970218658447, 0.5616708993911743, 0.4827409684658051, -0.4825374484062195, -0.4473012387752533, -0.3305214643478394, -0.5241096019744873, -0.1466400325298309, 0.0804405659437180, 0.0349484048783779, 0.0052239256910980, -0.1175492331385612, 0.0954673886299133, 0.1326161473989487, -0.1377900093793869), result); + result = MulAdd(mc2, MF4x4(0.0407393611967564, 0.0987918972969055, -0.0807525441050529, -0.1235820129513741, -0.0658119991421700, 0.0621846020221710, -0.0816140249371529, 0.0406704805791378, 0.3287288248538971, 0.3512411415576935, -0.3979234397411346, -0.3275865316390991, 0.2961717247962952, 0.0169802401214838, 0.2695185840129852, 0.0150923738256097), result); + result = MulAdd(bc1, MF4x4(-0.0050105354748666, -0.0008035619393922, -0.0079824347048998, 0.0376648232340813, -0.0048509594053030, -0.0067090289667249, -0.0259696990251541, 0.0086074192076921, 0.0126011213287711, -0.0174850821495056, 0.0156697910279036, 0.0028463243506849, 0.0099161649122834, -0.0114374589174986, -0.0169337950646877, 0.0257211104035378), result); + result = MulAdd(bc2, MF4x4(0.0043578865006566, -0.0106186466291547, 0.0165964700281620, 0.0014091627672315, 0.0037815889809281, 0.0364324115216732, -0.0395258180797100, 0.0625703483819962, -0.0144113656133413, 0.0321297869086266, 0.0208382532000542, -0.0383783876895905, -0.0258011315017939, -0.0143258580937982, -0.0092717679217458, -0.0178859047591686), result); + result = MulAdd(tr1, MF4x4(-0.0050332350656390, -0.0001488960406277, -0.0115145305171609, 0.0045622875913978, -0.0021374952048063, 0.0015796425286680, -0.0077072884887457, 0.0015828146133572, 0.0171351470053196, 0.0024297721683979, -0.0374940223991871, 0.0058337682858109, -0.0027352231554687, 0.0007051698048599, -0.0002820930676535, -0.0036236173473299), result); + result = MulAdd(tr2, MF4x4(0.0016052227001637, 0.0049667325802147, -0.0081164520233870, -0.0035548578016460, -0.0131295239552855, 0.0158293209969997, -0.0340857952833176, 0.0222462061792612, -0.0019779701251537, -0.0012918257853016, 0.0048964750021696, 0.0049310824833810, 0.0011332486756146, -0.0004788591759279, -0.0017202866729349, 0.0012215448077768), result); + result = MulAdd(mr1, MF4x4(-0.0057867411524057, -0.0133802210912108, 0.0158019792288542, 0.0344623439013958, -0.0040443707257509, -0.0037790425121784, 0.0045541841536760, 0.0198037009686232, 0.1110823750495911, 0.0822209641337395, -0.0382958762347698, -0.1459431499242783, 0.0010219293180853, -0.0161874033510685, -0.0285891294479370, 0.0300961043685675), result); + result = MulAdd(mr2, MF4x4(-0.0089390166103840, -0.0105021111667156, -0.0130960196256638, -0.0257005076855421, 0.0099271116778255, -0.0267942640930414, 0.0677764937281609, 0.0239320658147335, -0.0469516664743423, -0.0230019800364971, -0.0446235798299313, -0.0338921397924423, -0.0061601125635207, 0.0139691382646561, -0.0029954034835100, -0.0108477231115103), result); + result = MulAdd(br1, MF4x4(-0.0002513871586416, 0.0017334159929305, -0.0072467559948564, -0.0149013847112656, -0.0091025009751320, -0.0054195052944124, -0.0053499941714108, 0.0118143679574132, -0.0134334927424788, 0.0106377983465791, -0.0072384304367006, 0.0483081750571728, -0.0011459409724921, -0.0010465533705428, 0.0040362793952227, -0.0004637696838472), result); + result = MulAdd(br2, MF4x4(0.0037248437292874, 0.0041816406883299, -0.0139264371246099, 0.0035959482192993, -0.0390684641897678, -0.0257617402821779, -0.0298785082995892, -0.0066674682311714, -0.0151000469923019, -0.0176323894411325, 0.0045121158473194, 0.0077007445506752, 0.0051746177487075, 0.0075236861594021, 0.0008582049049437, 0.0010294843232259), result); [unroll] for (uint i = 0; i <= 1; ++i) { @@ -611,9 +614,9 @@ void Pass6(uint2 blockStart, uint3 threadId) { for (uint j = 0; j <= 1; ++j) { const uint2 destPos = gxy + uint2(i, j); - float2 originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb); + MF2 originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb); const uint index = i * 2 + j; - OUTPUT[destPos] = float4(mul(yuv2rgb, float3(result[index], originUV)), 1); + OUTPUT[destPos] = MF4(mul(yuv2rgb, MF3(result[index], originUV)), 1); } } } diff --git a/src/Magpie/AppSettings.cpp b/src/Magpie/AppSettings.cpp index 0fc0a5abc..18fdaa20d 100644 --- a/src/Magpie/AppSettings.cpp +++ b/src/Magpie/AppSettings.cpp @@ -373,12 +373,14 @@ void AppSettings::IsDeveloperMode(bool value) noexcept { if (!value) { // 关闭开发者模式则禁用所有开发者选项 _isDebugMode = false; + _isBenchmarkMode = false; _isEffectCacheDisabled = false; _isFontCacheDisabled = false; _isSaveEffectSources = false; _isWarningsAreErrors = false; _duplicateFrameDetectionMode = DuplicateFrameDetectionMode::Dynamic; _isStatisticsForDynamicDetectionEnabled = false; + _isFP16Disabled = false; } SaveAsync();