From 94c34a462b17462d2311eabb79b1e3f62feef915 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 11 Nov 2023 18:19:21 -0800 Subject: [PATCH 1/3] Initial work on cleaning up shaders --- .../FeatureLib/pfMetalPipeline/CMakeLists.txt | 1 + .../pfMetalPipeline/ShaderSrc/Water.h | 14 +++++ .../pfMetalPipeline/ShaderSrc/Water.metal | 24 +++++++++ .../pfMetalPipeline/ShaderSrc/WaveSet7.metal | 52 ++++++------------- 4 files changed, 56 insertions(+), 35 deletions(-) create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Water.h create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Water.metal diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt index 2fc73c1de7..8fd88dc876 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt @@ -69,6 +69,7 @@ set(pfMetalPipeline_SHADERS ShaderSrc/Grass.metal ShaderSrc/WaveDecEnv.metal ShaderSrc/Avatar.metal + ShaderSrc/Water.metal ShaderSrc/WaveDec1Lay_7.metal ShaderSrc/WaveRip.metal ShaderSrc/Clear.metal diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Water.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Water.h new file mode 100644 index 0000000000..f246688192 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Water.h @@ -0,0 +1,14 @@ +// +// Water.h +// Plasma +// +// Created by Colin Cornaby on 12/29/22. +// + +#ifndef Water_h +#define Water_h +#include + +float3 CalcDepthFilter(const float4 depthOffset, const float4 depthScale, const float4 wPos, const float4 minAtten); + +#endif /* Water_h */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Water.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Water.metal new file mode 100644 index 0000000000..357cd1173f --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Water.metal @@ -0,0 +1,24 @@ +// +// Water.metal +// plClient +// +// Created by Colin Cornaby on 12/29/22. +// + +#include +#include "Water.h" +using namespace metal; + +// Depth filter channels control: +// dFilter.x => overall opacity +// dFilter.y => reflection strength +// dFilter.z => wave height +float3 CalcDepthFilter(const float4 depthOffset, const float4 depthScale, const float4 wPos, const float4 minAtten) { + float3 dFilter = float3(depthOffset.xyz) - wPos.zzz; + + dFilter *= float3(depthScale.xyz); + dFilter += minAtten.xyz; + dFilter = clamp(dFilter, 0, 1); + + return dFilter; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal index d0efefcce6..db3cacaf7e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -44,6 +44,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com using namespace metal; #include "ShaderVertex.h" +#include "Water.h" typedef struct { @@ -65,9 +66,7 @@ typedef struct float4 WindRot; float4 EnvAdjust; float4 EnvTint; - float4 LocalToWorldRow1; - float4 LocalToWorldRow2; - float4 LocalToWorldRow3; + float3x4 LocalToWorld; float4 Lengths; float4 WaterLevel; float4 DepthFalloff; @@ -101,18 +100,7 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st vs_WaveFixedFin7InOut out; // Store our input position in world space in r6 - float3 column1 = float3(uniforms.LocalToWorldRow1[0], uniforms.LocalToWorldRow2[0], uniforms.LocalToWorldRow3[0]); - float3 column2 = float3(uniforms.LocalToWorldRow1[1], uniforms.LocalToWorldRow2[1], uniforms.LocalToWorldRow3[1]); - float3 column3 = float3(uniforms.LocalToWorldRow1[2], uniforms.LocalToWorldRow2[2], uniforms.LocalToWorldRow3[2]); - float3 column4 = float3(uniforms.LocalToWorldRow1[3], uniforms.LocalToWorldRow2[3], uniforms.LocalToWorldRow3[3]); - - matrix_float4x3 localToWorld; - localToWorld[0] = column1; - localToWorld[1] = column2; - localToWorld[2] = column3; - localToWorld[3] = column4; - - float4 worldPosition = float4(localToWorld * float4(in.position, 1.0), 1.0); + float4 worldPosition = float4(transpose(uniforms.LocalToWorld) * float4(in.position, 1.0), 1.0); // @@ -168,20 +156,19 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st // // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); - distance = distance * uniforms.Frequency; - distance = distance + uniforms.Phase; + distance *= uniforms.Frequency; + distance += uniforms.Phase; // // // Now we need dist mod'd into range [-Pi..Pi] // dist *= rcp(kTwoPi); - float4 piRecip = 1.0f / uniforms.PiConsts.wwww; - distance = distance + uniforms.PiConsts.zzzz; - distance *= piRecip; + distance += M_PI_F; + distance /= (M_PI_F * 2.0f); // dist = frac(dist); distance = fract(distance); // dist *= kTwoPi; - distance *= uniforms.PiConsts.wwww; + distance *= (M_PI_F * 2.0f); // dist += -kPi; - distance -= uniforms.PiConsts.zzzz; + distance -= M_PI_F; //Metals pow function does not like negative bases //Doing the same thing as the DX assembly until I know more about why @@ -207,11 +194,7 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st // Calc our depth based filtering here into r4 (because we don't use it again // after here, and we need our filtering shortly). - float4 depth = uniforms.WaterLevel - worldPosition.zzzz; - depth *= uniforms.DepthFalloff; - depth += uniforms.MinAtten; - // Clamp .xyz to range [0..1] - depth = clamp(depth, 0, 1); + float3 depthFilter = CalcDepthFilter(uniforms.WaterLevel, uniforms.DepthFalloff, worldPosition, uniforms.MinAtten); // Calc our filter (see above). float4 inColor = float4(in.color) / 255.0f; @@ -232,7 +215,7 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st // accumPos.z += height; (but accumPos.z is currently 0). float4 accumPos = 0; accumPos.x = dot(sinDist, uniforms.NumericConsts.zzzz); - accumPos.y = accumPos.x * depth.z; + accumPos.y = accumPos.x * depthFilter.z; accumPos.z = accumPos.y + uniforms.WaterLevel.w; worldPosition.z = max(worldPosition.z, accumPos.z); // CLAMP // r8.x == wave height relative to 0 @@ -360,9 +343,9 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st // ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump. r0.xyz = normalize(r0.xyz); - r1.w = -r0.x; - r2.w = -r0.y; - r3.w = -r0.z; + r1.w = r0.x; + r2.w = r0.y; + r3.w = r0.z; r0.zw = uniforms.NumericConsts.xz; @@ -430,7 +413,7 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st r1.w *= uniforms.NumericConsts.y; // No need to clamp, since the destination register (in the pixel shader) // will saturate [0..1] anyway. - r1 *= depth.yyyx; // HACKTESTCOLOR + r1 *= depthFilter.yyyx; // HACKTESTCOLOR //R in the in color is the alpha value, but remember it's encoded ARGB r1.w *= inColor.g; r1.w *= uniforms.WaterTint.w; @@ -472,10 +455,9 @@ fragment float4 ps_WaveFixed(vs_WaveFixedFin7InOut in [[stage_in]], float3 N = float3(u, v, w); float3 E = float3(in.texCoord1.w, in.texCoord2.w, in.texCoord3.w); - //float3 coord = reflect(E, N); - float3 coord = 2*(dot(N, E) / dot(N, N))*N - E; + float3 reflectCoord = reflect(E, N); - float4 out = float4(environmentMap.sample(colorSampler, coord)); + float4 out = float4(environmentMap.sample(colorSampler, reflectCoord)); out = (out * in.c1) + in.c2; out.a = in.c1.a; return out; From c90286cfe2adc51b2e7c1ff9a77c34fab6ff4eb6 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 7 Jan 2023 15:22:11 -0800 Subject: [PATCH 2/3] Moving to Metal fast sin/cos in WaveSet7 --- .../pfMetalPipeline/ShaderSrc/WaveSet7.metal | 52 +++++-------------- 1 file changed, 14 insertions(+), 38 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal index db3cacaf7e..d63252d389 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -156,40 +156,17 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st // // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); - distance *= uniforms.Frequency; - distance += uniforms.Phase; - // - // // Now we need dist mod'd into range [-Pi..Pi] - // dist *= rcp(kTwoPi); - distance += M_PI_F; - distance /= (M_PI_F * 2.0f); - // dist = frac(dist); - distance = fract(distance); - // dist *= kTwoPi; - distance *= (M_PI_F * 2.0f); - // dist += -kPi; - distance -= M_PI_F; - - //Metals pow function does not like negative bases - //Doing the same thing as the DX assembly until I know more about why - - float4 pow2 = distance * distance; // r0^2 - float4 pow3 = pow2 * distance; // r0^3 - probably stall - float4 pow4 = pow2 * pow2; // r0^4 - float4 pow5 = pow2 * pow3; // r0^5 - float4 pow7 = pow2 * pow5; // r0^7 - - // - // sincos(dist, sinDist, cosDist); - // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z - // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z - //r1 - float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; - //r2 - float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; - - cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; - sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; + distance = (distance * uniforms.Frequency) + uniforms.Phase; + + /* + Metal note: This section of the shader originally implemented a fast sin/cos + algorithm in HLSL - including the GPU Gems Ch 1 version. Metal has a built in + fast cos/sin algorithm. When porting this shader to a different shading language, + make sure fast math or a fast algorithm is available for best performance. Fast + math is on for the MSL compiler, but I'm making the fast version explicit here. + */ + float4 cosDist = fast::cos(distance); + float4 sinDist = fast::sin(distance); // Calc our depth based filtering here into r4 (because we don't use it again @@ -197,10 +174,9 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st float3 depthFilter = CalcDepthFilter(uniforms.WaterLevel, uniforms.DepthFalloff, worldPosition, uniforms.MinAtten); // Calc our filter (see above). - float4 inColor = float4(in.color) / 255.0f; + const float4 inColor = float4(in.color) / 255.0f; float4 filter = inColor.wwww * uniforms.Lengths; - filter = max(filter, uniforms.NumericConsts.xxxx); - filter = min(filter, uniforms.NumericConsts.zzzz); + filter = clamp(filter, 0.0f, 1.0f); //mov r2, r1; // r2 == sinDist @@ -214,7 +190,7 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st // height = dp4(sinDist, kOne); // accumPos.z += height; (but accumPos.z is currently 0). float4 accumPos = 0; - accumPos.x = dot(sinDist, uniforms.NumericConsts.zzzz); + accumPos.x = dot(sinDist, float4(1.0f)); accumPos.y = accumPos.x * depthFilter.z; accumPos.z = accumPos.y + uniforms.WaterLevel.w; worldPosition.z = max(worldPosition.z, accumPos.z); // CLAMP From 2bd8f9dcc045e1cba57e3df4b409c4b5a1c7dc21 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 12 Nov 2023 18:19:51 -0800 Subject: [PATCH 3/3] WIP --- .../pfMetalPipeline/ShaderSrc/WaveSet7.metal | 158 ++++++++++-------- 1 file changed, 84 insertions(+), 74 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal index d63252d389..6fe33e92cf 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -94,9 +94,74 @@ typedef struct float fog; } vs_WaveFixedFin7InOut; -vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], - constant vs_WaveFixedFin7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +void CalcEyeRayAndBumpAttenuation(const float4 wPos, + const float4 cameraPos, + const float4 specAtten, + thread float3 &cam2Vtx, + thread float &pertAtten) { + cam2Vtx = wPos.xyz - cameraPos.xyz; + pertAtten = length(cam2Vtx); + cam2Vtx /= pertAtten; + + // Calculate our specular attenuation from and into r5.w. + // r5.w starts off the distance from vtx to camera. + // Once we've turned it into an attenuation factor, we + // scale the x and y of our normal map (through the transform bases) + // so that in the distance, the normal map is flat. Note that the + // geometry in the distance isn't necessarily flat. We want to apply + // this scale to the normal read from the normal map before it is + // transformed into surface space. + pertAtten += specAtten.x; + pertAtten *= specAtten.y; + pertAtten = clamp(pertAtten, 0.f, 1.f); + pertAtten *= pertAtten; // Square it to account for perspective + pertAtten *= specAtten.z; +} + +float3 FinitizeEyeRay(const float3 cam2Vtx, const float4 envAdjust) +{ + // So, our "finitized" eyeray is: + // camPos + D * t - envCenter = D * t - (envCenter - camPos) + // with + // D = (pos - camPos) / |pos - camPos| // normalized usual eyeray + // and + // t = D dot F + sqrt( (D dot F)^2 - G ) + // with + // F = (envCenter - camPos) => c19.xyz + // G = F^2 - R^2 => c19.w + // R = environment radius. => unused + // + // This all derives from the positive root of equation + // (camPos + (pos - camPos) * t - envCenter)^2 = R^2, + // In other words, where on a sphere of radius R centered about envCenter + // does the ray from the real camera position through this point hit. + // + // Note that F, G, and R are all constants (one point, two scalars). + // + // So first we calculate D into r0, + // then D dot F into r10.x, + // then (D dot F)^2 - G into r10.y + // then rsq( (D dot F)^2 - G ) into r9.x; + // then t = r10.z = r10.x + r10.y * r9.x; + // and + // r0 = D * t - (envCenter - camPos) + // = r0 * r10.zzzz - F; + // + //https://developer.download.nvidia.com/books/HTML/gpugems/gpugems_ch01.html + + const float3 F = envAdjust.xyz; + const float G = envAdjust.w; + // METAL NOTE: HLSL 1.1 always applies an abs operation to values it's about to sqrt + const float3 t = dot(cam2Vtx, F.xyz) + sqrt(abs(pow(abs(dot(cam2Vtx, F.xyz)), 2) - G));// r10.z = D dot F + SQRT((D dot F)^2 - G) + const float3 eyeRay = (cam2Vtx * t) - F; // r0.xyz = D * t - (envCenter - camPos) + + // ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump. + return normalize(eyeRay); +} + +vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], + constant vs_WaveFixedFin7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { vs_WaveFixedFin7InOut out; // Store our input position in world space in r6 @@ -232,9 +297,12 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st // // But we want the transpose of that to go into r1-r3 + // CalcFinalPosition + worldPosition.x += dot(cosDist, uniforms.DirXK); worldPosition.y += dot(cosDist, uniforms.DirYK); + // CalcTangentBasis float4 r1, r2, r3 = 0; r1.x = dot(sinDist, -uniforms.DirXSqKW); @@ -252,96 +320,37 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st r3.z = dot(sinDist, -uniforms.WK); r3.z = r3.z + uniforms.NumericConsts.z; - // Calculate our normalized vector from camera to vtx. - // We'll use that a couple of times coming up. - float4 r5 = worldPosition - uniforms.CameraPos; - float4 r10; - r10.x = rsqrt(dot(r5.xyz, r5.xyz)); - r5 = r5 * r10.xxxx; - r5.w = 1.0 / r10.x; - - // Calculate our specular attenuation from and into r5.w. - // r5.w starts off the distance from vtx to camera. - // Once we've turned it into an attenuation factor, we - // scale the x and y of our normal map (through the transform bases) - // so that in the distance, the normal map is flat. Note that the - // geometry in the distance isn't necessarily flat. We want to apply - // this scale to the normal read from the normal map before it is - // transformed into surface space. - r5.w += uniforms.SpecAtten.x; - r5.w *= uniforms.SpecAtten.y; - r5.w = min(r5.w, uniforms.NumericConsts.z); - r5.w = max(r5.w, uniforms.NumericConsts.x); - r5.w *= r5.w; // Square it to account for perspective - r5.w *= uniforms.SpecAtten.z; + float3 cam2Vtx; + float pertAtten; + + CalcEyeRayAndBumpAttenuation(worldPosition, uniforms.CameraPos, uniforms.SpecAtten, cam2Vtx, pertAtten); - // So, our "finitized" eyeray is: - // camPos + D * t - envCenter = D * t - (envCenter - camPos) - // with - // D = (pos - camPos) / |pos - camPos| // normalized usual eyeray - // and - // t = D dot F + sqrt( (D dot F)^2 - G ) - // with - // F = (envCenter - camPos) => c19.xyz - // G = F^2 - R^2 => c19.w - // R = environment radius. => unused - // - // This all derives from the positive root of equation - // (camPos + (pos - camPos) * t - envCenter)^2 = R^2, - // In other words, where on a sphere of radius R centered about envCenter - // does the ray from the real camera position through this point hit. - // - // Note that F, G, and R are all constants (one point, two scalars). - // - // So first we calculate D into r0, - // then D dot F into r10.x, - // then (D dot F)^2 - G into r10.y - // then rsq( (D dot F)^2 - G ) into r9.x; - // then t = r10.z = r10.x + r10.y * r9.x; - // and - // r0 = D * t - (envCenter - camPos) - // = r0 * r10.zzzz - F; - // - //https://developer.download.nvidia.com/books/HTML/gpugems/gpugems_ch01.html + float3 eyeRay = FinitizeEyeRay(cam2Vtx, uniforms.EnvAdjust); + r1.w = -eyeRay.x; + r2.w = -eyeRay.y; + r3.w = -eyeRay.z; float4 r0 = float4(0); - - { - float3 D = r5.xyz; - float3 F = uniforms.EnvAdjust.xyz; - float G = uniforms.EnvAdjust.w; - // METAL NOTE: HLSL 1.1 always applies an abs operation to values it's about to sqrt - float3 t = dot(D.xyz, F.xyz) + sqrt(abs(pow(abs(dot(D.xyz, F.xyz)), 2) - G));// r10.z = D dot F + SQRT((D dot F)^2 - G) - r0.xyz = (D * t) - F; // r0.xyz = D * t - (envCenter - camPos) - } - - // ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump. - r0.xyz = normalize(r0.xyz); - - r1.w = r0.x; - r2.w = r0.y; - r3.w = r0.z; - r0.zw = uniforms.NumericConsts.xz; float4 r11 = float4(0); r0.x = dot(r1.xyz, r1.xyz); r0.xy = rsqrt(r0.x); - r0.x *= r5.w; + r0.x *= pertAtten; out.texCoord1 = r1 * r0.xxyw; r11.x = r1.z * r0.y; r0.x = dot(r2.xyz, r2.xyz); r0.xy = rsqrt(r0.x); - r0.x *= r5.w; + r0.x *= pertAtten; out.texCoord3 = r2 * r0.xxyw; r11.y = r2.z * r0.y; r0.x = dot(r3.xyz, r3.xyz); r0.xy = rsqrt(r0.x); - r0.x *= r5.w; + r0.x *= pertAtten; out.texCoord2 = r3 * r0.xxyw; r11.z = r3.z * r0.y; @@ -369,7 +378,8 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st // // Transform position to screen // // - float4 r9; + // CalcScreenPosAndFog + float4 r9, r10; r9 = worldPosition * uniforms.WorldToNDC; r10.x = r9.w + uniforms.FogSet.x; out.fog = r10.x * uniforms.FogSet.y; @@ -382,7 +392,7 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[st // Questionble attenuation follows // vector from this point to camera and normalize stashed in r5 // Dot that with the computed normal - r1.x = dot(-r5, r11); + r1.x = dot(-cam2Vtx.xyz, r11.xyz); r1.x = r1.x * inColor.z; r1.xyzw = uniforms.NumericConsts.z - r1.x; r1.w += uniforms.NumericConsts.z;